# Import Data

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import functions
import numpy as np

In [2]:
# raw data
df = pd.read_csv('dataframes/pos_df.csv')
df.sample(3)

Unnamed: 0,Sample Name,CAS9-A,CAS9-A.1,CAS9-B,CAS9-B.1,CAV_A,CAV_A.1,CAV_B,CAV_B.1,CAVIN_A,...,SPTLC_B,SPTLC_B.1,UGCG-A,UGCG-A.1,UGCG-B,UGCG-B.1,WT_A,WT_A.1,WT_B,WT_B.1
3927,SGalCer 28:4;3 (LCB 18:0;2-2H2O),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,3.7087,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2440,PS 32:5 (PS),30.5043,22.9981,23.4306,21.9285,7.738,24.5139,0.0,7.286,7.5452,...,0.0,0.0,0.0,0.0,14.5263,29.0,7.9593,0.0,16.7356,0.0
6151,DAG 34:2+NH4 (-FA 20:1 (NH4)),0.0,0.0,74.0,0.0,0.0,0.0,57.0,47.0,0.0,...,0.0,0.0,0.0,61.0,34.0,50.0,0.0,0.0,25.0,0.0


In [3]:
# experiment metadata
df_exps = pd.read_csv('dataframes/pos_df_exps.csv')
df_exps.sample(3)

Unnamed: 0,Exp,Mutation
33,SPTLC_B.1,SPTLC
4,CAV_A,CAV
20,CERS5-A,CERS5


In [4]:
# lipid metadata
df_meta2 = pd.read_csv('dataframes/pos_lipids_df_meta2.csv')
df_meta2.sample(3)

Unnamed: 0,Sample Name,Head Group,Acyl Chain Length,Unsaturation,Head Group 2,Unsaturation 2
5032,MADAG 55:2+NH4 (-FA 18:0 (NH4)),MADAG,55,2,MADAG,2
2656,PC O-34:1 (PC 104),PC O,34,1,PC,1
2389,PS O-40:1 (FA 19:1),PS O,40,1,PS,1


In [5]:
# reformat columns
df_p = functions.df_p(df, df_exps)
df_p.head()

Mutation,CAS9,CAS9,CAS9,CAS9,CAV,CAV,CAV,CAV,CAVIN,CAVIN,...,SPTLC,SPTLC,UGCG,UGCG,UGCG,UGCG,WT,WT,WT,WT
Sample Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Hex2Cer 26:3;2 (LCB 18:0;2-2H2O),167.0,143.0,125.0,132.0,171.6091,182.9007,120.0,120.0,281.3523,184.3462,...,130.0,142.0,68.0529,46.4729,88.0,91.0,229.5233,278.1255,206.0,182.0
Hex2Cer 26:2;2 (LCB 18:0;2-2H2O),53.2158,52.8015,47.4908,76.2783,27.6563,23.0569,9.0712,24.0712,0.0,13.8507,...,0.0,2.1176,47.256,62.7063,11.1856,0.0,10.8768,6.0672,0.0,34.4747
Hex2Cer 26:2;2 (LCB 18:0;2-2H2O),167.8122,131.3388,220.0689,128.3439,342.976,331.4596,268.9979,289.4925,295.0869,224.9824,...,290.0353,223.3067,187.9922,219.2679,231.3212,219.2373,329.9421,313.2847,251.0406,226.8066
Hex2Cer 26:2;2 (LCB 18:0;2-H2O),54.0,47.0,39.0,44.0,74.0,64.0,84.5341,88.7361,70.0,51.0,...,44.0,37.0,48.6404,51.9843,38.9995,52.0,62.0,57.0,51.0,40.0
Hex2Cer 26:0;2 (LCB 18:0;2-2H2O),391.224,326.4641,444.772,385.3202,175.0126,296.5953,238.6997,214.9442,215.1018,214.6494,...,309.7805,272.9937,373.2111,292.2522,348.9607,348.3176,304.3199,271.4133,303.6544,253.3654


# One-Way ANOVA - Odds

In [6]:
# %pip install scipy

In [7]:
from scipy.stats import f_oneway
from scipy.stats import dunnett

In [8]:
# import odds data
df_odds = pd.read_csv('dataframes/pos_df_odds.csv')
df_odds.head()

Unnamed: 0,Mutation,odd,even
0,CAS9,0.025079,0.974921
1,CAS9,0.035813,0.964187
2,CAS9,0.025719,0.974281
3,CAS9,0.040462,0.959538
4,CAV,0.019239,0.980761


In [9]:
# reformat data into arrays for each mutation
df_odds_agg = df_odds[['Mutation','odd']].groupby('Mutation', as_index=True).odd.apply(np.array)
df_odds_agg.head()

Mutation
CAS9        [0.0250790916946102, 0.0358128216430987, 0.025...
CAV         [0.0192387792134248, 0.0127705079775463, 0.020...
CAVIN       [0.0332286531888394, 0.0333678006749146, 0.036...
CERS2-1g    [0.0249991304634732, 0.0199930855972587, 0.021...
CERS2_mg    [0.0133687904173157, 0.0266619565989788, 0.028...
Name: odd, dtype: object

In [10]:
print(*df_odds_agg)

[0.02507909 0.03581282 0.02571877 0.04046224] [0.01923878 0.01277051 0.02022906 0.02078622] [0.03322865 0.0333678  0.03625956 0.03178837] [0.02499913 0.01999309 0.02185855 0.02062182] [0.01336879 0.02666196 0.02873482 0.02687406] [0.02294828 0.01912686 0.02374499 0.02365047] [0.0289843  0.03073564 0.03233896 0.03463304] [0.03144209 0.01281677 0.03160594 0.03039399] [0.02852444 0.03214696 0.03941527 0.04029798] [0.03583028 0.03781113 0.04232463 0.04344322]


In [11]:
print('F-statistic and p-value for percentage of odd chain lengths between mutations')
f_oneway(*df_odds_agg)

F-statistic and p-value for percentage of odd chain lengths between mutations


F_onewayResult(statistic=7.076233841749475, pvalue=2.000814631749214e-05)

In [12]:
df_odds_agg

Mutation
CAS9        [0.0250790916946102, 0.0358128216430987, 0.025...
CAV         [0.0192387792134248, 0.0127705079775463, 0.020...
CAVIN       [0.0332286531888394, 0.0333678006749146, 0.036...
CERS2-1g    [0.0249991304634732, 0.0199930855972587, 0.021...
CERS2_mg    [0.0133687904173157, 0.0266619565989788, 0.028...
CERS5       [0.0229482838125045, 0.0191268620830812, 0.023...
Flot2       [0.0289843045966665, 0.0307356383153398, 0.032...
SPTLC       [0.0314420874791157, 0.0128167684186993, 0.031...
UGCG        [0.0285244414306175, 0.0321469599164273, 0.039...
WT          [0.0358302751094197, 0.0378111289452934, 0.042...
Name: odd, dtype: object

In [13]:
# dunnett test
control=df_odds_agg['CAS9']
print(dunnett(*df_odds_agg, control=control))
print('There is a significant difference between the percentage of odds in CAVIN and CAS9')

Dunnett's test (95.0% Confidence Interval)
Comparison               Statistic  p-value  Lower CI  Upper CI
 (Sample 0 - Control)      0.000     1.000    -0.011     0.011
 (Sample 1 - Control)     -3.497     0.011    -0.025    -0.002
 (Sample 2 - Control)      0.490     0.999    -0.009     0.013
 (Sample 3 - Control)     -2.562     0.100    -0.021     0.001
 (Sample 4 - Control)     -2.034     0.276    -0.019     0.003
 (Sample 5 - Control)     -2.433     0.130    -0.021     0.002
 (Sample 6 - Control)     -0.025     1.000    -0.011     0.011
 (Sample 7 - Control)     -1.347     0.718    -0.016     0.006
 (Sample 8 - Control)      0.861     0.965    -0.008     0.014
 (Sample 9 - Control)      2.092     0.250    -0.003     0.019

There is a significant difference between the percentage of odds in CAVIN and CAS9


# Two-Way ANOVA - Head Group

In [17]:
# 
functions.norm_long(df_meta2, 
                    df_p, 
                    var='Head Group 2',
                    renamed_var='Head Group',
                    drop_mutation=['WT', 'RAJU'],
                   )

Unnamed: 0,Mutation,Head Group,Fraction
0,CAS9,"DAG,TAG,MAG",0.063226
1,CAS9,"DAG,TAG,MAG",0.077570
2,CAS9,"DAG,TAG,MAG",0.070757
3,CAS9,"DAG,TAG,MAG",0.084759
4,CAV,"DAG,TAG,MAG",0.055945
...,...,...,...
715,SPTLC,SM,0.177090
716,UGCG,SM,0.211858
717,UGCG,SM,0.217901
718,UGCG,SM,0.205151


In [22]:
import statsmodels.api as sm
from statsmodels.formula.api import ols