In [None]:
pip install scikit-posthocs

In [None]:
import h5py
import numpy as np
import pandas as pd
from pathlib import Path
import seaborn as sns
import os
import matplotlib.pyplot as plt
from scipy.signal import find_peaks
from scipy import stats
import scikit_posthocs as sp

### Import files

In [None]:
#import filenames from tierpsy summary
files = pd.read_csv(r'path to filenames.csv', skiprows=4)
dictionary = pd.Series(files.filename.values,index=files.file_id).to_dict()
files.head()

In [None]:
#import features from tierpsy summary
feat = pd.read_csv(r'path to features.csv', skiprows=1)

#get filenames
feat['filename']= feat['file_id'].map(dictionary)

#get strain names
strain = []
file = []
for p in feat['filename']:
    a = os.path.basename(p)
    b = a[0:6]
    strain.append(b)
    file.append(a)
feat['strain']= strain
feat['file']= file

#map a coloumn for video timepoints - can be omitted if you dont have different videos for different timepoints, make sure to remove video_timepoint from all code after this point
def f(row):
    if '0to5' in row['filename']:
        val = '5'
    elif '20to25' in row['filename']:
        val = '25'
    elif '40to45' in row['filename']:
        val = '45'
    else:
        val = '0'
    return val

#create new column 'video_timepoint' using the function above
feat['video_timepoint'] = feat.apply(f, axis=1)

#make a unique index for each worm 'video_index'
feat['unique_worm_index'] = feat['file'] + '_' + feat['worm_index'].astype(str)

feat.head()

In [None]:
#how many trajectories per strain?
feat['strain'].value_counts()

In [None]:
feat_means = feat.groupby(['strain','video_timepoint']).mean()
feat_means.head()

In [None]:
#select timepoint of interest
timepoint = '45'
newdata = feat.loc[(feat['video_timepoint'] == timepoint)]

feat_plot = feat.copy()

#drop cols that are not features, stats will be done on all data from each strain, remove anything you dont want to include before this point
feat_plot.drop(['video_timepoint', 'file_id', 'worm_index', 'n_skeletons', 'filename', 'file', 'unique_worm_index'], axis=1, inplace=True)

feat_plot.head()

In [None]:
# plot all your features, make sure the max cols x max rows = the number of features you are planning to plot, defualt set up for 8 features
maxc = 2 #number of cols in the grid
maxr = 4 #number of rows in the grid

h = 9 #height in inches of the graph image
w = 16 #width in inches of the graph image

a=0 #dont change these
b=0

# Initialise the subplot function using number of rows and columns
fig, axes = plt.subplots(maxc, maxr, figsize=(w, h), sharey=False, constrained_layout = True)
#fig.suptitle('Title')

for col in feat_plot:
    sns.barplot(ax=axes[a, b], x=feat.strain, y=feat[col])
    axes[a,b].set_title(col)
    axes[a,b].tick_params(labelrotation=45)
    b = b+1
    if b > maxr-1:
        a = a+1
        b = 0

In [None]:
#perform kruskal wallis test, saved in 'kruskal_result' and dunns multiple t tests with bonferroni correction, saved in 'dunn_bonferroni_result' and displayed
#make a lists needed for loop and stats
strain_list = feat_plot.strain.unique()
key=[]
values_k=[]
values_d=[]
kruskal_result = {}
dunn_bonferroni_result={}

for col in feat_plot:
    if col != 'strain':
        group1 = newdata.query('strain == "N2____"')[col] #update these with your strain names, you can add more if you like by adding new groups 
        group2 = newdata.query('strain == "lgc-41"')[col]
        group3 = newdata.query('strain == "alh-11"')[col]
        key.append(col)
        values_k.append(stats.kruskal(group1, group2, group3))
        all_data = [group1, group2, group3]
        #perform Dunn's test using a Bonferonni correction for the p-values
        values_d.append(sp.posthoc_dunn(all_data, p_adjust = 'bonferroni'))
        
for i in range(len(key)):
    kruskal_result[key[i]] = values_k[i]
    dunn_bonferroni_result[key[i]] = values_d[i]
    
for key, value in dunn_bonferroni_result.items():
    print(key)
    print(value)

In [None]:
#if you want to export anything
export = feat
export.to_csv(r'export path and file name.csv')