%load_ext autoreload

%autoreload 2


In [None]:
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import ClusterStats.cluster_stats as cs

In [None]:
%matplotlib inline

In [None]:
def generate_trial(N = 100, sig = 1):
    a = np.random.rand(100)
    x = np.linspace(-10, 10, 100)
    y = np.exp(-(x**2)/2) * np.random.rand() * sig
    return a + y

In [None]:
N_trials = 1000
data_cols = [str(i) for i in np.linspace(-10, 10, 100).tolist()]

In [None]:
df = pd.DataFrame(columns=data_cols)
df['cond'] = 0
for i in range(N_trials):
    df.loc[i, np.linspace(-10, 10, 100).tolist()] = generate_trial()
    df.loc[i, 'cond'] = 1
    
for i in range(N_trials, N_trials * 2):
    df.loc[i, np.linspace(-10, 10, 100).tolist()] = generate_trial(sig=0)
    df.loc[i, 'cond'] = 0

In [None]:
df.to_csv('trials.h5')

In [None]:
df = pd.read_csv('trials.h5')
data_cols = [str(i) for i in np.linspace(-10, 10, 100).tolist()]

In [None]:
df.columns

In [None]:
df[data_cols].T.plot(legend=None, figsize=(9, 6))

In [None]:
data = df[data_cols].values
labels = df['cond'].values
unique_labels = np.array([1, 0])
stat, pval = cs.site_statistics_ttest_ind(data, labels, unique_labels)


In [None]:
plt.plot(np.linspace(-10, 10, 100), np.log(pval))

In [None]:
%%timeit
cluster_stats, clusters = cs.clust_stats_1d(stat, pval, threshold=0.05)

In [None]:
%%timeit
cluster_stats, clusters = cs.clust_stats_opencv(stat, pval, threshold=0.05)

In [None]:
%timeit sx = cs.monte_carlo_iteration(data, labels, unique_labels=unique_labels)

In [None]:
%lprun -f cs.site_statistics_ttest_ind tt = cs.site_statistics_ttest_ind(df, 'cond', data_cols)

In [None]:
import itertools

In [None]:
%load_ext line_profiler

In [None]:
def run_monte_carlo_map(df, col_groups, col_values, n_repetitions, connectivity='1d', site_alpha=0.05):
    data = df[col_values].values
    labels = df[col_groups].values
    unique_labels = np.unique(labels).astype(np.int)[::-1]
    
    stats_mc = map(cs.monte_carlo_iteration,
                            *[itertools.repeat(d, n_repetitions) for d in (data, labels, unique_labels,
                                                                           connectivity,
                                                                           site_alpha)])
                            # chunksize=100)
    stats_mc = np.array(list(stats_mc))
    stats_mc.sort()
    stats_mc = stats_mc[::-1]
    idx = np.linspace(0, 1, n_repetitions)
    stats_mc = pd.Series(data=stats_mc, index=idx)
    return stats_mc


In [None]:
%time stats_mc = cs.run_monte_carlo(df, 'cond', data_cols, n_repetitions=1000)

In [None]:
stats_mc.isna().any()

In [None]:
stats_mc

In [None]:
labels