In [None]:
pip install statannotations

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
import seaborn as sns
import scipy as sp
import scikit_posthocs as sk
import tkinter as tk
from statsmodels.formula.api import ols
from tkinter.filedialog import askopenfilename
from statsmodels.formula.api import ols
from bioinfokit.analys import stat
from statannotations.Annotator import Annotator
#from statannotations.Annotator import add_stat_annotation

# Run this at the beginning before averaging repeats

In [None]:
#empty array of the experimental repeats
percentRepeats = {}
normalizedpercentRepeats = {}

intensityRepeats = {}
normalizedintensityRepeats = {}

positiveintensityRepeats = {}
normalizedpositiveintensityRepeats = {}

normalizedintensityGmeanRepeats = {}
intensityGmeanRepeats = {}

normalizedintensityMeanRepeats = {}
intensityMeanRepeats = {}

In [None]:
#opens file selection dialog
filename = askopenfilename()
dataOut = pd.read_csv(filename, keep_default_na=False)
dataOut

In [None]:
#define the metadata tag that groups the data for an experimental run
# metadataname ='Metadata_Treatment'
# metadataname = 'Metadata_Concentration'
metadataname = 'Metadata_Cotreatment'

#counts the number of groups made based on the tag. make sure this matches expectation
groups = dataOut[metadataname].value_counts(sort = False,dropna = False)
uniqueorder= dataOut[metadataname].unique()

print(groups)

In [None]:


#Define the data to be plotted and the order for plotting
# axorder = [0,10,20,50,100]
# axorder = [0,100,300,500,1000]
# axorder = ['DMSO', 'Chloroquine','Control']
# axorder = ['Control', 'DMSO','Wortmannin','Ikarugamycin','Pyrenebutyrate','Chloroquine']
# axorder = ['Control','GFP11','BP100GFP11','TATGFP11','R9GFP11']    
# axorder = ['Control','DMSO','Chloroquine','NA','4C'] 
# axorder = ['Control','4Hattached','4HRechallenge','24Hattached']
axorder = ['Control','NA','R9(1-1)','R9(3-1)'] 
# axorder = ['Control', 'GFP11WUSR9']
# axorder = ['Control','NA','4C'] 

#set the theme
sns.set_style("ticks")
#scatter plot
ax = sns.catplot(x=metadataname, y='Math_Ratio',kind ='strip', order = axorder,s= 2, ci='sd',data=dataOut, palette = 'pastel')
#define the y-axis
ax.set(ylim=(0,1.1))
#add the mean as a horizontal line
ax2 = sns.boxplot(showmeans=True,
            meanline=True,
            meanprops={'color': 'k', 'ls': '-', 'lw': 1},
            medianprops={'visible': False},
            whiskerprops={'visible': False},
            zorder=10,
            x=metadataname,
            y='Math_Ratio',
            data=dataOut,
            showfliers=False,
            showbox=False,
            showcaps=False,
            order = axorder)


#get info to prepare the title and filename
date = str(dataOut['Metadata_Date'][1])
projection = dataOut['Metadata_Projection'][1]

Ngroups = groups
Ngroups.index =Ngroups.index.map(str)



sns.set(font_scale = 1)
plt.xticks(rotation =45)
savename = date +' Scatter' 
plt.title(savename)
plt.ylabel('Green/Red Ratio', fontsize = 15)
plt.xlabel('')

#aesthetics 
sns.set_style("ticks")
ax2.spines['left'].set_linewidth(2)
ax2.spines['bottom'].set_linewidth(2)
ax2.xaxis.set_tick_params(width =2)
ax2.yaxis.set_tick_params(width =2)
plt.rcParams['figure.dpi'] = 420
if metadataname == 'Metadata_Concentration':
    plt.xlabel('[R9-GFP11] μM', fontsize = 15)



In [None]:
#save the raw, trimmed data as csv
dataRatios= dataOut[['Metadata_Date','Math_Ratio', 'Metadata_Concentration','Metadata_Treatment','Metadata_Cotreatment','Metadata_CotreatmentConcentration']]
# dataRatios= dataOut[['Metadata_Date','Math_Ratio', 'Metadata_Concentration','Metadata_Treatment']]

dataRatios= dataRatios.sort_values(by = [metadataname])
dataRatios.to_csv('G:/My Drive/Good Imaging Data/CSV/' + date +'.csv' )


In [None]:
#get the mean and stdev of each group
dataMean = dataOut.groupby(metadataname).mean()
dataMedian = dataOut.groupby(metadataname).median()
dataOut['Math_Ratio+1'] = dataOut.groupby(metadataname)['Math_Ratio'].apply(lambda x: x+1)
dataOutGrouped= dataOut.groupby(metadataname)['Math_Ratio+1'].apply(sp.stats.gmean)
dataOutGrouped = dataOutGrouped-1
dataGmean = dataOutGrouped


dataSD = dataOut.groupby(metadataname).std()


#define the control group
control = 'Control'
# control = 0



#define a 1-tailed 95% CI from the SD. 99% is 2.326 (95% is 1.96 for two tails and ~98% for 1 tailed, 1.645 is 1 tailed 95%)
sD= dataSD.loc[control,'Math_Ratio']
mean =dataMean.loc[control,'Math_Ratio'] 
cutoff = sD*1.96 + dataMean.loc[control,'Math_Ratio'] 


#counts the number of positive nuclei
positive=dataOut.loc[dataOut['Math_Ratio'] > cutoff][metadataname].value_counts(sort = False, dropna = False).reindex(
    dataOut[metadataname].unique(), fill_value=0)


#what to normalize by?
normval = 'NA'
#get the median ratio
intensity = dataMedian['Math_Ratio']
normalizedintensity = intensity/intensity.loc[normval]


#Mean Ratio
intensityMean = dataMean['Math_Ratio']
normalizedintensityMean = intensityMean/intensityMean.loc[normval]


#% Positive
percent = positive.div(groups.values)*100
percent = percent.fillna(0)
normalizedpercent = {}
normalizedpercent = percent/percent.loc[normval]



In [None]:
#% Positive
percentRepeats[date] = percent
normalizedpercentRepeats[date] = normalizedpercent 

#mean
intensityMeanRepeats[date] = intensityMean
normalizedintensityMeanRepeats[date] = normalizedintensityMean



In [None]:
#plot this data as a check
sns.barplot(x=percent.keys(), y = percent[percent.keys()], order = axorder,palette = 'pastel')
sns.set_style("ticks")
sns.despine()
plt.xticks(rotation= '45')
plt.ylabel('Percent Positive (%)')
plt.title(date)
plt.show()

# DO NOT RUN UNTIL YOU SET UP THE REPLICATES

In [None]:
#don't run until you've set up the replicates


# % Positive
apl0=pd.DataFrame(percentRepeats)
finalAPL0 = apl0.transpose()
meltedapl0=finalAPL0.melt(ignore_index= False)
meltedapl0 = meltedapl0.rename({"variable":metadataname, "value":"Positive"}, axis="columns")

apl1=pd.DataFrame(normalizedpercentRepeats)
finalAPL1 = apl1.transpose()
meltedapl1=finalAPL1.melt(ignore_index= False)
meltedapl1 = meltedapl1.rename({"variable":metadataname, "value":"normPositive"}, axis="columns")


#mean
apl4=pd.DataFrame(intensityMeanRepeats)
finalAPL4 = apl4.transpose()
meltedapl4=finalAPL4.melt(ignore_index= False)
meltedapl4 = meltedapl4.rename({"variable":metadataname, "value":"Mean"}, axis="columns")

apl5=pd.DataFrame(normalizedintensityMeanRepeats)
finalAPL5 = apl5.transpose()
meltedapl5=finalAPL5.melt(ignore_index= False)
meltedapl5 = meltedapl5.rename({"variable":metadataname, "value":"normMean"}, axis="columns")




In [None]:
savename = '' #what do you want to save the data as?

In [None]:
#save meltedapl to a csv for plotting in Prism



meltedapl0.to_csv('G:/My Drive/Good Imaging Data/CSV/Summary/Labeled Better/' + savename +' Percent Positive.csv' )
meltedapl1.to_csv('G:/My Drive/Good Imaging Data/CSV/Summary/Labeled Better/' + savename+' normalized Perecent Positive' +'.csv' )

meltedapl4.to_csv('G:/My Drive/Good Imaging Data/CSV/Summary/Labeled Better/' + savename +' Intensity Mean.csv' )
meltedapl5.to_csv('G:/My Drive/Good Imaging Data/CSV/Summary/Labeled Better/' + savename+' normalized Intensity Mean' +'.csv' )

