In [None]:
import scipy.stats as stats
import pandas as pd
import dask.dataframe as dd
import os
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
from scipy.signal import savgol_filter

%matplotlib inline
#from google.colab import drive
#drive.mount('/content/drive')

plt.rcParams.update(plt.rcParamsDefault)

mpl.rcParams['pdf.fonttype']=42
plt.rcParams['svg.fonttype'] = 'none'
plt.rcParams["font.family"] = "Arial"
plt.rcParams["font.size"] = 11

In [None]:
df = pd.read_csv('screen_data.csv')

In [None]:
exp_idx = df[(df.ts.between(30,60)) | (df.ts.between(90,120)) | (df.ts.between(150,180))].groupby(by=['file_path', 'fly',  'genotype'],
                      group_keys=False)['pref'].mean().reset_index().rename_axis(None, axis=1)

#to calculate significance via man-whitney


genotypes = exp_idx['genotype'].unique()

results = []

for genotype in genotypes:
    if genotype != 'ctrl':  # We don't compare 'emptyGal4' with itself
        group_data = exp_idx[exp_idx['genotype'] == genotype]['pref']
        emptyGal4_data = exp_idx[exp_idx['genotype'] == 'ctrl']['pref']
        
        # Perform Mann-Whitney U test, ‘two-sided’: the distributions are not equal, i.e. SX(u) ≠ SY(u) for at least one u.


        U_stat, p_value = stats.mannwhitneyu(group_data, emptyGal4_data, alternative='less')
        significance = 'Yes' if p_value < 0.01 else 'No'

        # Store the results
        results.append({
            'genotype': genotype,
            'U_stat': U_stat,
            'p_value': p_value,
            'significant': significance
        })

# Convert results to a DataFrame for better visualization
results_df = pd.DataFrame(results)

print(results_df)

In [None]:
fig = plt.figure(figsize=(3,5))

ax = sns.stripplot(data=exp_idx, 
                   jitter=True,
                   y= 'genotype', 
                   x= 'pref', 
                   color='grey', 
                   alpha=.2, 
                   size=2,
                 )

ax = sns.boxplot(data=exp_idx, 
                 y= 'genotype', 
                 x= 'pref',
                  showmeans=True,
                meanline=True,
                meanprops={'color': 'k', 'ls': '-', 'lw': 3},
                 #medianprops={'visible': True},
                 whiskerprops={'visible': False},
                 zorder=10,
                 showfliers=False,
                showbox=False,
                showcaps=False,
                 )
plt.xlim(-1,1)
plt.xticks([-1.1,-.5,0,.5,1.1])
#x.collections[0].set_lw(0)
plt.xlabel('Aversion Index (normalized)')
#plt.ylabel('Gal4/LexA lines')
sns.despine(offset=4, trim=True)

#plt.xticks(rotation=90)
plt.show()

In [None]:
d = df[df.genotype.isin(['40B01-splithalf','SS51024'])]

# Plotting using seaborn for smooth line with confidence intervals
fig = plt.figure(figsize=(2, 2))
sns.lineplot(data=d[d.tm_bin_left<=90], palette=['black','#99bfdd'],
             x='tm_bin_left', y='pref', hue='genotype', lw=1)

#plt.xticks([-30,0,30,60])
#plt.yticks([0,10,20])
plt.xticks([0,30,60, 90,])
plt.yticks([-1,0,1.0])
    
sns.despine(trim=True, offset=4)
plt.tick_params(axis="y", direction='in', length=4)
plt.tick_params(axis="x", direction='in', length=4)
plt.xlabel('Time (s)')


# Show plot
plt.show()