In [170]:
import numpy as np
from scipy.stats import norm, signaltonoise

## Import the data

In [54]:
data = np.load("gmail/clean_data.npy")

## Parameters

In [184]:
num_steps = 250
num_diffs = 0
num_gaussian = 0
num_unif = 0

## Calculate summary stats in human data

In [179]:
# Stats for all 6 coordinates in each time step in all of the data
step_mins = np.min(data, axis=0)
step_maxs = np.max(data, axis=0)
step_means = np.mean(data, axis=0)
step_vars = np.var(data, axis=0)
step_snr = np.abs(signaltonoise(data, axis=0))

# Stats 
diffs = data[:,:-1] - data[:,1:]
diff_means = np.mean(diffs, axis=0)
diff_vars = np.var(diffs, axis=0)

## Data generation functions

In [None]:
def sample_steps(n):
    mean_var = np.dstack((step_means, step_vars))
    return np.array([map(lambda x: [np.random.normal(mv[0], mv[1]) for mv in x], mean_var) for _ in range(n)])

def sample_diffs(n):
    step_mean_var = np.dstack((step_means[0], step_vars[0])) 
    
    samples = np.array([]).reshape(0,260,6)
    for i in range(n):
        start = np.array(map(lambda x: [np.random.normal(mv[0], mv[1]) for mv in x], step_mean_var))
    
        diff_mean_var = np.dstack((diff_means, diff_vars))
        diffs = np.array(map(lambda x: [np.random.normal(mv[0], mv[1]) for mv in x], diff_mean_var))
    
        for j in range(diffs.shape[0]):
            start = np.append(start, [start[j] + diffs[j]], axis=0)

        samples = np.append(samples, [start], axis=0)
    
    return samples
    
def gaussian_noise(n):
    idx = np.random.randint(data.shape[0], size=n)
    samples = data[idx]
    
    return np.array([samples[i] + map(lambda x: [np.random.normal(0.0, snr) for snr in x], step_snr) for i in range(n)])
    
def unif_noise(n):
    min_max = np.dstack((step_mins, step_maxs))
    return np.array([map(lambda x: [np.random.normal(mm[0], mm[1]) for mm in x], mean_var) for _ in range(n)])

## Generate data

In [186]:
output = np.array([]).reshape(0,260,6)

counts = [num_steps, num_diffs, num_gaussian, num_unif]
functions = [sample_steps, sample_diffs, gaussian_noise, unif_noise]

for i, fun in enumerate(functions):
    if (counts[i]) > 0:
        output = np.append(output, apply(fun, [counts[i]]), axis=0)

np.save('fake_data.npy', output)

(250, 260, 6)
