## Import All Dependencies

In [None]:
%matplotlib inline
# Syllable Statistics Dependencies
import matplotlib.pyplot as plt
from moseq2_viz.util import parse_index
from moseq2_viz.viz import plot_syll_stats_with_sem
from moseq2_viz.scalars.util import scalars_to_dataframe
from moseq2_viz.model.util import (compute_behavioral_statistics, 
                                   sort_syllables_by_stat, get_syllable_usages,
                                   compute_syllable_explained_variance,
                                   sort_syllables_by_stat_difference)

# Behavioral Distance Dependencies
from scipy.cluster.hierarchy import linkage, dendrogram
from moseq2_viz.model.dist import get_behavioral_distance

# Transition Matrices Compute/Graphing Dependencies
from moseq2_viz.model.util import parse_model_results, relabel_by_usage
from moseq2_viz.model.trans_graph import get_trans_graph_groups, get_group_trans_mats, graph_transition_matrix

# Hypothesis Testing Dependencies
import numpy as np
from scipy import stats
from sklearn import preprocessing
from statsmodels.stats.multitest import multipletests

## Set Paths to Model and Index Files

In [None]:
index_path = './moseq2-index.yaml'
model_path = './saline-amphetamine/model.p'
_, sorted_index = parse_index(index_path)

## Compute Number Of Syllables that Explain 99% of the Data's Variance

In [None]:
model_data = parse_model_results(model_path)
max_syllable = compute_syllable_explained_variance(model_data, n_explained=99)

## Compute Syllable Statistics

In [None]:
_, sorted_index = parse_index(index_path)

# compute session scalar data
scalar_df = scalars_to_dataframe(sorted_index, model_path=model_path)

# compute syllable usage and scalar statistics
df = compute_behavioral_statistics(scalar_df, count='usage', groupby=['group', 'uuid'], usage_normalization=True)

## Compute Scalar Means (By Group) For Each Syllable

In [None]:
stat = 'usage'

unique_groups = df.group.unique()

plot_syll_stats_with_sem(df, 
                         stat='usage',
                         ordering='stat',
                         max_sylls=max_syllable,
                         groups=unique_groups,
                         ctrl_group=None,
                         exp_group=None,
                         colors=None,
                         join=True,
                         figsize=(15,7))

## Plot Mean Statistics Sorted By Group Difference

In [None]:
# statistic to plot
stat = 'usage'

# groups to get difference
group1 = 'Saline'
group2 = 'Amphetamine'

unique_groups = df.group.unique()

plot_syll_stats_with_sem(df, 
                         stat='usage', # choose any of the df columns
                         ordering='diff',
                         max_sylls=max_syllable,
                         groups=unique_groups,
                         ctrl_group=group1,
                         exp_group=group2,
                         colors=None,
                         join=True,
                         figsize=(15,7))

## Compute Scalar Means (By Session) For Each Syllable

In [None]:
%matplotlib inline

# you may switch 'uuid' with 'SessionName' or 'SubjectName' if they are all unique
session_mean_df = df.groupby(['syllable', 'uuid']).mean() 

stat = 'usage'

fig, ax = plt.subplots(figsize=(15,7))
session_mean_df[stat].unstack().plot(style='o-', ax=ax, legend=False)

## Compute Syllable Distance Matrices

In [None]:
# Get Pairwise distances
X = get_behavioral_distance(sorted_index,
                            model_path,
                            max_syllable=max_syllable,
                            distances=['ar[init]'])['ar[init]']

## Plot Syllable Distance Dendrogram

In [None]:
%matplotlib inline

fig, ax = plt.subplots(1, 1, figsize=(15, 7))

# Compute linkage matrix with respect to a selected precomputed metric
Z = linkage(np.nan_to_num(X), 'complete')
results = dendrogram(Z, distance_sort=False, get_leaves=True, ax=ax, color_threshold=0)
similarity_ordering = results['leaves']

## Compute Syllable Transition Matrices

In [None]:
# load your model
model_data = parse_model_results(model_path)

# set maximum syllables to include
max_syllable = 40

# select a transition matrix normalization method
normalize = 'bigram' # other options: 'columns', 'rows'

# Get labels and relabel them by usage sorting
labels = model_data['labels']
labels = relabel_by_usage(labels, count='usage')[0]

# Get modeled session uuids to compute group-mean transition graph for
group, label_group, _ = get_trans_graph_groups(model_data, sorted_index)

# compute transition matrices and usages for each group
trans_mats, usages = get_group_trans_mats(labels, label_group, group, max_syllable, normalize=normalize)

## Plot Computed Transition Graphs

In [None]:
fig, _, _ = graph_transition_matrix(trans_mats,
                                    layout='spring', # or: 'circular', 'spectral'
                                    usages=usages,
                                    groups=group,
                                    arrows=True,
                                    headless=True)

## Hypothesis Testing

### Get Mean Grouped Data

In [None]:
def get_session_mean_df(df, group, stat, max_syllable):
    
    selected_group =  df['group'] == group
    df = df[selected_group]
    
    uuids = df.uuid.unique()
    
    group_stat = np.zeros((len(uuids), max_syllable))
    for i, uuid in enumerate(uuids):
        group_stat[i] = df[df['uuid'] == uuid][stat][:max_syllable]
    
    
    return group_stat
    

# select statistic to compute
statistic = 'usage'

# select groups to compare
group1 = 'Saline'
group2 = 'Amphetamine'

# get separated group variables
group1_stat = get_session_mean_df(df, group1, statistic, max_syllable)
group2_stat = get_session_mean_df(df, group2, statistic, max_syllable)

### Bootstrap the Data

In [None]:
def bootstrap_group(lst, rng):
    return list(rng.choice(len(lst),len(lst),replace=True))

def bootstrap_me(usages, iters=10000):
    bootstrap_mean_usages = []
    for i in range(iters):
        rng = np.random.RandomState(seed=i)
        temp = []
        boot_mice = bootstrap_group(usages, rng)
        for mouse in boot_mice:
            temp.append(usages[mouse])
        temp = np.asarray(temp)
        bootstrap_mean_usages.append(np.nanmean(temp,axis=0))
        
    return bootstrap_mean_usages

group1_boot = np.array(bootstrap_me(group1_stat))
group2_boot = np.array(bootstrap_me(group2_stat))

### Perform Z-Test

In [None]:
def ztest(d1, d2, mu1=None, mu2=None):
    mu1 = d1.mean() if mu1 is None else mu1
    mu2 = d2.mean() if mu2 is None else mu2
    std1, std2 = d1.std(), d2.std()

    std = np.sqrt(std1**2 + std2**2)
    return np.minimum(1.,2*stats.norm.cdf(-np.abs(mu1 - mu2)/std))

# do a ztest on the bootstrap distributions of your 2 conditions
pvals_ztest_boots = np.array([ztest(group1_boot[:,i], group2_boot[:,i]) for i in range(group1_boot.shape[1])])

np.array(range(len(pvals_ztest_boots)))[pvals_ztest_boots < 0.05]

### Multiple Comparisons Test

In [None]:
# significant syllables (relabeled by time used)
np.where(multipletests(pvals_ztest_boots, alpha=0.10, method='fdr_bh')[0])[0]

### Perform T-Test

In [None]:
st, p = stats.ttest_ind(group1_stat, group2_stat)

np.array(range(len(p)))[p < 0.05]

### Multiple Comparisons Test

In [None]:
# significant syllables (relabeled by time used)
np.where(multipletests(p, alpha=0.10, method='fdr_bh')[0])[0]

***