In [1]:
import numpy as np
from scipy.stats import norm, signaltonoise
from hmmlearn import hmm

## Import the data

In [2]:
data = np.load("gmail/clean_data.npy")

## Parameters

In [3]:
num_steps = 0
num_diffs = 0
num_gaussian = 0
num_unif = 0
num_hmm = 1000

save_file = 'more_hmm.npy'

## Calculate summary stats in human data

In [4]:
# Stats for all 6 coordinates in each time step in all of the data
step_mins = np.min(data, axis=0)
step_maxs = np.max(data, axis=0)
step_means = np.mean(data, axis=0)
step_vars = np.var(data, axis=0)
step_snr = np.abs(signaltonoise(data, axis=0))

print np.mean(step_vars,axis=0)

# Stats 
diffs = data[:,:-1] - data[:,1:]
diff_means = np.mean(diffs, axis=0)
diff_vars = np.var(diffs, axis=0)

[   3.83188094   14.82577596   20.46925214  351.85945174  376.15781886
  353.2787744 ]


## Train HMM

In [5]:
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

lengths = [260 for i in range(data.shape[0])]
X = np.concatenate(data, axis=0)
model = hmm.GaussianHMM(n_components=5).fit(X, lengths)

## Data generation functions

In [6]:
def sample_steps(n):
    mean_var = np.dstack((step_means, step_vars))
    return np.array([map(lambda x: [np.random.normal(mv[0], mv[1]) for mv in x], mean_var) for _ in range(n)])

def sample_diffs(n):
    step_mean_var = np.dstack((step_means[0], step_vars[0])) 
    
    samples = np.array([]).reshape(0,260,6)
    for i in range(n):
        start = np.array(map(lambda x: [np.random.normal(mv[0], mv[1]) for mv in x], step_mean_var))
    
        diff_mean_var = np.dstack((diff_means, diff_vars))
        diffs = np.array(map(lambda x: [np.random.normal(mv[0], mv[1]) for mv in x], diff_mean_var))
    
        for j in range(diffs.shape[0]):
            start = np.append(start, [start[j] + diffs[j]], axis=0)

        samples = np.append(samples, [start], axis=0)
    
    return samples
    
def gaussian_noise(n):
    idx = np.random.randint(data.shape[0], size=n)
    samples = data[idx]

    return np.array([samples[i] + map(lambda x: [np.random.normal(0.0, snr) for snr in x], step_snr) for i in range(n)])
    
def unif_noise(n):
    min_max = np.dstack((step_mins, step_maxs))
    return np.array([map(lambda x: [np.random.normal(mm[0], mm[1]) for mm in x], min_max) for _ in range(n)])

def hmm(n):    
    output = np.array([]).reshape(0,260,6)
    for i in range(n):
        X, _ = model.sample(260)
        output = np.append(output, [X], axis=0)
    return output

## Generate data

In [7]:
output = np.array([]).reshape(0,260,6)

counts = [num_steps, num_diffs, num_gaussian, num_unif, num_hmm]
functions = [sample_steps, sample_diffs, gaussian_noise, unif_noise, hmm]

for i, fun in enumerate(functions):
    if (counts[i]) > 0:
        output = np.append(output, apply(fun, [counts[i]]), axis=0)

print output.shape
np.save(save_file, output)

(1000, 260, 6)
