In [278]:
import pandas as pd
import pymssql
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

from matplotlib_venn import venn2, venn2_circles, venn2_unweighted
from matplotlib_venn import venn3, venn3_circles, venn3_unweighted
from matplotlib import pyplot as plt
import scipy.stats as stats


In [279]:
from config import database
from config import Mouse
from config import Virus
from config import Protein
from config import Biotin
from config import Fraction
from config import Protein_Id
from config import Gene
from config import Protein_Description
from config import Peptide
from config import username
from config import password
from config import server

In [280]:
try:
    conn = pymssql.connect(server,username, password,database)

    cursor = conn.cursor()
    query = f"""
    SELECT 
        P.Protein_Name,
        V.Virus_Label,
        B.Biotin_Label,
        F.Fraction_Label,
        MP.Hits
        FROM Mouse_Protein AS MP 
    INNER JOIN {Mouse} AS M on MP.Mouse_ID = M.Mouse_ID
    INNER JOIN {Protein} AS P on MP.Protein_ID = P.Protein_ID
    INNER JOIN {Virus} AS V on M.Virus_ID = V.Virus_ID
    INNER JOIN {Biotin} AS B on M.Biotin_ID = B.Biotin_ID
    INNER JOIN {Fraction} AS F on M.Fraction_ID = F.Fraction_ID
    INNER JOIN {Protein_Id} AS PI on P.Protein_Id_ID = PI.Protein_Id_ID
    INNER JOIN {Gene} AS G on P.Gene_ID = G.Gene_ID
    INNER JOIN {Protein_Description} AS PD on P.Description_ID = PD.Description_ID
    INNER JOIN {Peptide} AS PE on P.Peptide_ID = PE.Peptide_ID
    ORDER BY MP.Hits DESC
    """
    main_df = pd.read_sql(query, conn)
except Exception as e:
    print(e)
main_df.head()

Unnamed: 0,Protein_Name,Virus_Label,Biotin_Label,Fraction_Label,Hits
0,NFH_MOUSE,TDP43-TurboID,72,LP1,5225510000.0
1,NFL_MOUSE,TDP43-TurboID,72,LP1,4174620000.0
2,H2A1B_MOUSE;H2A1C_MOUSE;H2A1D_MOUSE;H2A1E_MOUS...,TDP43-TurboID,72,LP1,3127040000.0
3,TBA1A_MOUSE,TDP43-TurboID,72,LP1,3018330000.0
4,PLEC-6_MOUSE;PLEC-7_MOUSE;PLEC-8_MOUSE,TDP43-TurboID,72,LP1,2856640000.0


In [281]:
print("Virus types:", list(main_df["Virus_Label"].unique()))
print("Biotin types:", list(main_df["Biotin_Label"].unique()))
print("Fraction types:", list(main_df["Fraction_Label"].unique()))
print("Total rows of data:", len(main_df), "\nWith 24 data frame combos")

Virus types: ['TDP43-TurboID', 'TurboID']
Biotin types: ['72', '1.5', 'saline']
Fraction types: ['LP1', 'LS1', 'whole brain', 'Nuclear']
Total rows of data: 115317 
With 24 data frame combos


In [282]:
# Virus type _ Biotin type _ Fraction Type  =  labeling scheme for dfs

# TurboID virus & 72 Biotin combonations
TurboID_72_LP1 = main_df[main_df["Virus_Label"] == "TurboID"][main_df["Biotin_Label"] == "72"][main_df["Fraction_Label"] == "LP1"]
TurboID_72_wholebrain = main_df[main_df["Virus_Label"] == "TurboID"][main_df["Biotin_Label"] == "72"][main_df["Fraction_Label"] == "whole brain"]
TurboID_72_LS1 = main_df[main_df["Virus_Label"] == "TurboID"][main_df["Biotin_Label"] == "72"][main_df["Fraction_Label"] == "LS1"]
TurboID_72_Nuclear = main_df[main_df["Virus_Label"] == "TurboID"][main_df["Biotin_Label"] == "72"][main_df["Fraction_Label"] == "Nuclear"]

# TurboID virus & 1.5 Biotin combonations
TurboID_15_LP1 = main_df[main_df["Virus_Label"] == "TurboID"][main_df["Biotin_Label"] == "1.5"][main_df["Fraction_Label"] == "LP1"]
TurboID_15_wholebrain = main_df[main_df["Virus_Label"] == "TurboID"][main_df["Biotin_Label"] == "1.5"][main_df["Fraction_Label"] == "whole brain"]
TurboID_15_LS1 = main_df[main_df["Virus_Label"] == "TurboID"][main_df["Biotin_Label"] == "1.5"][main_df["Fraction_Label"] == "LS1"]
TurboID_15_Nuclear = main_df[main_df["Virus_Label"] == "TurboID"][main_df["Biotin_Label"] == "1.5"][main_df["Fraction_Label"] == "Nuclear"]

# TurboID virus & saline Biotin combonations
TurboID_saline_LP1 = main_df[main_df["Virus_Label"] == "TurboID"][main_df["Biotin_Label"] == "saline"][main_df["Fraction_Label"] == "LP1"]
TurboID_saline_wholebrain = main_df[main_df["Virus_Label"] == "TurboID"][main_df["Biotin_Label"] == "saline"][main_df["Fraction_Label"] == "whole brain"]
TurboID_saline_LS1 = main_df[main_df["Virus_Label"] == "TurboID"][main_df["Biotin_Label"] == "saline"][main_df["Fraction_Label"] == "LS1"]
TurboID_saline_Nuclear = main_df[main_df["Virus_Label"] == "TurboID"][main_df["Biotin_Label"] == "saline"][main_df["Fraction_Label"] == "Nuclear"]

# TDP43-TurboID virus & 72 Biotin combonations
TDP43TurboID_72_LP1 = main_df[main_df["Virus_Label"] == "TDP43-TurboID"][main_df["Biotin_Label"] == "72"][main_df["Fraction_Label"] == "LP1"]
TDP43TurboID_72_wholebrain = main_df[main_df["Virus_Label"] == "TDP43-TurboID"][main_df["Biotin_Label"] == "72"][main_df["Fraction_Label"] == "whole brain"]
TDP43TurboID_72_LS1 = main_df[main_df["Virus_Label"] == "TDP43-TurboID"][main_df["Biotin_Label"] == "72"][main_df["Fraction_Label"] == "LS1"]
TDP43TurboID_72_Nuclear = main_df[main_df["Virus_Label"] == "TDP43-TurboID"][main_df["Biotin_Label"] == "72"][main_df["Fraction_Label"] == "Nuclear"]

# TDP43-TurboID virus & 1.5 Biotin combonations
TDP43TurboID_15_LP1 = main_df[main_df["Virus_Label"] == "TDP43-TurboID"][main_df["Biotin_Label"] == "1.5"][main_df["Fraction_Label"] == "LP1"]
TDP43TurboID_15_wholebrain = main_df[main_df["Virus_Label"] == "TDP43-TurboID"][main_df["Biotin_Label"] == "1.5"][main_df["Fraction_Label"] == "whole brain"]
TDP43TurboID_15_LS1 = main_df[main_df["Virus_Label"] == "TDP43-TurboID"][main_df["Biotin_Label"] == "1.5"][main_df["Fraction_Label"] == "LS1"]
TDP43TurboID_15_Nuclear = main_df[main_df["Virus_Label"] == "TDP43-TurboID"][main_df["Biotin_Label"] == "1.5"][main_df["Fraction_Label"] == "Nuclear"]

# TDP43-TurboID virus & saline Biotin combonations
TDP43TurboID_saline_LP1 = main_df[main_df["Virus_Label"] == "TDP43-TurboID"][main_df["Biotin_Label"] == "saline"][main_df["Fraction_Label"] == "LP1"]
TDP43TurboID_saline_wholebrain = main_df[main_df["Virus_Label"] == "TDP43-TurboID"][main_df["Biotin_Label"] == "saline"][main_df["Fraction_Label"] == "whole brain"]
TDP43TurboID_saline_LS1 = main_df[main_df["Virus_Label"] == "TDP43-TurboID"][main_df["Biotin_Label"] == "saline"][main_df["Fraction_Label"] == "LS1"]
TDP43TurboID_saline_Nuclear = main_df[main_df["Virus_Label"] == "TDP43-TurboID"][main_df["Biotin_Label"] == "saline"][main_df["Fraction_Label"] == "Nuclear"]

# Total amount of row captured check, should get a total of zero
print("Should be zero:", len(main_df) - (len(TurboID_72_LP1) + len(TurboID_72_wholebrain) + len(TurboID_72_LS1) + len(TurboID_72_Nuclear)
                    + len(TurboID_15_LP1) + len(TurboID_15_wholebrain) + len(TurboID_15_LS1) + len(TurboID_15_Nuclear)
                    + len(TurboID_saline_LP1) + len(TurboID_saline_wholebrain) + len(TurboID_saline_LS1) + len(TurboID_saline_Nuclear)
                    
                    + len(TDP43TurboID_72_LP1) + len(TDP43TurboID_72_wholebrain) + len(TDP43TurboID_72_LS1) + len(TDP43TurboID_72_Nuclear)
                    + len(TDP43TurboID_15_LP1) + len(TDP43TurboID_15_wholebrain) + len(TDP43TurboID_15_LS1) + len(TDP43TurboID_15_Nuclear)
                    + len(TDP43TurboID_saline_LP1) + len(TDP43TurboID_saline_wholebrain) + len(TDP43TurboID_saline_LS1) + len(TDP43TurboID_saline_Nuclear)))

  TurboID_72_LP1 = main_df[main_df["Virus_Label"] == "TurboID"][main_df["Biotin_Label"] == "72"][main_df["Fraction_Label"] == "LP1"]
  TurboID_72_wholebrain = main_df[main_df["Virus_Label"] == "TurboID"][main_df["Biotin_Label"] == "72"][main_df["Fraction_Label"] == "whole brain"]
  TurboID_72_LS1 = main_df[main_df["Virus_Label"] == "TurboID"][main_df["Biotin_Label"] == "72"][main_df["Fraction_Label"] == "LS1"]
  TurboID_72_Nuclear = main_df[main_df["Virus_Label"] == "TurboID"][main_df["Biotin_Label"] == "72"][main_df["Fraction_Label"] == "Nuclear"]
  TurboID_15_LP1 = main_df[main_df["Virus_Label"] == "TurboID"][main_df["Biotin_Label"] == "1.5"][main_df["Fraction_Label"] == "LP1"]
  TurboID_15_wholebrain = main_df[main_df["Virus_Label"] == "TurboID"][main_df["Biotin_Label"] == "1.5"][main_df["Fraction_Label"] == "whole brain"]
  TurboID_15_LS1 = main_df[main_df["Virus_Label"] == "TurboID"][main_df["Biotin_Label"] == "1.5"][main_df["Fraction_Label"] == "LS1"]
  TurboID_15_Nuclear = main_

Should be zero: 0


In [283]:
TurboID_72_LP1 = TurboID_72_LP1[["Protein_Name", "Hits"]].rename(columns = {"Hits": "TurboID_72_LP1"})
TurboID_72_wholebrain = TurboID_72_wholebrain[["Protein_Name", "Hits"]].rename(columns = {"Hits": "TurboID_72_wholebrain"})
TurboID_72_LS1 = TurboID_72_LS1[["Protein_Name", "Hits"]].rename(columns = {"Hits": "TurboID_72_LS1"})
TurboID_15_wholebrain = TurboID_15_wholebrain[["Protein_Name", "Hits"]].rename(columns = {"Hits": "TurboID_15_wholebrain"})

# Told this experiment went poorly
# TDP43TurboID_72_LP1 = TDP43TurboID_72_LP1.rename(columns = {"Hits": "TDP43TurboID_72_LP1"})

TDP43TurboID_72_wholebrain = TDP43TurboID_72_wholebrain[["Protein_Name", "Hits"]].rename(columns = {"Hits": "TDP43TurboID_72_wholebrain"})
TDP43TurboID_72_LS1 = TDP43TurboID_72_LS1[["Protein_Name", "Hits"]].rename(columns = {"Hits": "TDP43TurboID_72_LS1"})
TDP43TurboID_15_LP1 = TDP43TurboID_15_LP1[["Protein_Name", "Hits"]].rename(columns = {"Hits": "TDP43TurboID_15_LP1"})
TDP43TurboID_15_wholebrain = TDP43TurboID_15_wholebrain[["Protein_Name", "Hits"]].rename(columns = {"Hits": "TDP43TurboID_15_wholebrain"})
TDP43TurboID_15_LS1 = TDP43TurboID_15_LS1[["Protein_Name", "Hits"]].rename(columns = {"Hits": "TDP43TurboID_15_LS1"})
TDP43TurboID_15_Nuclear = TDP43TurboID_15_Nuclear[["Protein_Name", "Hits"]].rename(columns = {"Hits": "TDP43TurboID_15_Nuclear"})
TDP43TurboID_saline_LP1 = TDP43TurboID_saline_LP1[["Protein_Name", "Hits"]].rename(columns = {"Hits": "TDP43TurboID_saline_LP1"})
TDP43TurboID_saline_LS1 = TDP43TurboID_saline_LS1[["Protein_Name", "Hits"]].rename(columns = {"Hits": "TDP43TurboID_saline_LS1"})

print("Should be zero:", len(main_df) - (len(TurboID_72_LP1) + len(TurboID_72_wholebrain) + len(TurboID_72_LS1) 
                    + len(TurboID_15_wholebrain) + len(TDP43TurboID_72_wholebrain) + len(TDP43TurboID_72_LS1) 
                    + len(TDP43TurboID_15_LP1) + len(TDP43TurboID_15_wholebrain) + len(TDP43TurboID_15_LS1) 
                     + len(TDP43TurboID_15_Nuclear) + len(TDP43TurboID_saline_LP1) + len(TDP43TurboID_saline_LS1))    - len(TDP43TurboID_72_LP1))


Should be zero: 0


In [284]:
merge_df = TurboID_72_LP1.merge(TurboID_72_wholebrain, how = "inner", on = "Protein_Name")
merge_df = merge_df.merge(TurboID_72_LS1, how = "inner", on = "Protein_Name")
merge_df = merge_df.merge(TurboID_15_wholebrain, how = "inner", on = "Protein_Name")
# merge_df = merge_df.merge(TDP43TurboID_72_LP1, how = "inner", on = "Protein_Name")
merge_df = merge_df.merge(TDP43TurboID_72_wholebrain, how = "inner", on = "Protein_Name")
merge_df = merge_df.merge(TDP43TurboID_72_LS1, how = "inner", on = "Protein_Name")
merge_df = merge_df.merge(TDP43TurboID_15_LP1, how = "inner", on = "Protein_Name")
merge_df = merge_df.merge(TDP43TurboID_15_wholebrain, how = "inner", on = "Protein_Name")
merge_df = merge_df.merge(TDP43TurboID_15_LS1, how = "inner", on = "Protein_Name")
merge_df = merge_df.merge(TDP43TurboID_15_Nuclear, how = "inner", on = "Protein_Name")
merge_df = merge_df.merge(TDP43TurboID_saline_LP1, how = "inner", on = "Protein_Name")
merge_df = merge_df.merge(TDP43TurboID_saline_LS1, how = "inner", on = "Protein_Name")
merge_df

Unnamed: 0,Protein_Name,TurboID_72_LP1,TurboID_72_wholebrain,TurboID_72_LS1,TurboID_15_wholebrain,TDP43TurboID_72_wholebrain,TDP43TurboID_72_LS1,TDP43TurboID_15_LP1,TDP43TurboID_15_wholebrain,TDP43TurboID_15_LS1,TDP43TurboID_15_Nuclear,TDP43TurboID_saline_LP1,TDP43TurboID_saline_LS1
0,NFH_MOUSE,377871000.0,31793600.0,2131870.0,4852130.0,15509000.0,16590700.0,36090300.0,3218000.0,17141700.0,1197810.0,32875900.0,4808820.0
1,NFH_MOUSE,377871000.0,31793600.0,2131870.0,4852130.0,11760800.0,16590700.0,36090300.0,3218000.0,17141700.0,1197810.0,32875900.0,4808820.0
2,NFH_MOUSE,377871000.0,31793600.0,2131870.0,4852130.0,2382530.0,16590700.0,36090300.0,3218000.0,17141700.0,1197810.0,32875900.0,4808820.0
3,NFH_MOUSE,377871000.0,31793600.0,2131870.0,4852130.0,2382530.0,16590700.0,36090300.0,3218000.0,17141700.0,1197810.0,32875900.0,4808820.0
4,NFH_MOUSE,377871000.0,31793600.0,2131870.0,4852130.0,2382530.0,16590700.0,36090300.0,3218000.0,17141700.0,1197810.0,32875900.0,4808820.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
185959,NOE2_MOUSE,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
185960,GARL3-2_MOUSE;GARL3_MOUSE,0.0,0.0,0.0,0.0,97530.7,93561.1,108678.0,0.0,490093.0,0.0,268032.0,0.0
185961,GARL3-2_MOUSE;GARL3_MOUSE,0.0,0.0,0.0,0.0,0.0,93561.1,108678.0,0.0,490093.0,0.0,268032.0,0.0
185962,GARL3-2_MOUSE;GARL3_MOUSE,0.0,0.0,0.0,0.0,97530.7,93561.1,108678.0,0.0,490093.0,0.0,268032.0,0.0


In [285]:
merge_df.describe()

Unnamed: 0,TurboID_72_LP1,TurboID_72_wholebrain,TurboID_72_LS1,TurboID_15_wholebrain,TDP43TurboID_72_wholebrain,TDP43TurboID_72_LS1,TDP43TurboID_15_LP1,TDP43TurboID_15_wholebrain,TDP43TurboID_15_LS1,TDP43TurboID_15_Nuclear,TDP43TurboID_saline_LP1,TDP43TurboID_saline_LS1
count,185964.0,185964.0,185964.0,185964.0,185964.0,185964.0,185964.0,185964.0,185964.0,185964.0,185964.0,185964.0
mean,1709280.0,446535.0,341834.0,440675.0,729570.8,932073.4,2469001.0,527283.8,7236318.0,629438.9,3464203.0,181417.4
std,14601370.0,5688293.0,4007230.0,3289367.0,5129691.0,4229897.0,8952643.0,3787972.0,32440630.0,6654092.0,12620720.0,1531939.0
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,33257.4,0.0,218700.0,0.0,413148.0,0.0,315507.0,0.0
50%,167123.0,20940.9,0.0,0.0,84161.6,178504.0,604325.0,0.0,1368370.0,0.0,881447.0,0.0
75%,543706.0,73704.1,0.0,46074.8,256992.0,504050.0,1601930.0,80012.4,4662490.0,0.0,2266070.0,0.0
max,377871000.0,274785000.0,152748000.0,98376600.0,273462000.0,100687000.0,161783000.0,90748200.0,899569000.0,171290000.0,248236000.0,57498600.0


In [286]:
merge_df["TurboID_72_LP1_pval"] = [stats.norm.sf(abs(x)) for x in stats.zscore(merge_df["TurboID_72_LP1"].to_list())]
merge_df["TurboID_72_wholebrain_pval"] = [stats.norm.sf(abs(x)) for x in stats.zscore(merge_df["TurboID_72_wholebrain"].to_list())]
merge_df["TurboID_72_LS1_pval"] = [stats.norm.sf(abs(x)) for x in stats.zscore(merge_df["TurboID_72_LS1"].to_list())]
merge_df["TurboID_15_wholebrain_pval"] = [stats.norm.sf(abs(x)) for x in stats.zscore(merge_df["TurboID_15_wholebrain"].to_list())]
merge_df["TDP43TurboID_72_wholebrain_pval"] = [stats.norm.sf(abs(x)) for x in stats.zscore(merge_df["TDP43TurboID_72_wholebrain"].to_list())]
merge_df["TDP43TurboID_72_LS1_pval"] = [stats.norm.sf(abs(x)) for x in stats.zscore(merge_df["TDP43TurboID_72_LS1"].to_list())]
merge_df["TDP43TurboID_15_LP1_pval"] = [stats.norm.sf(abs(x)) for x in stats.zscore(merge_df["TDP43TurboID_15_LP1"].to_list())]
merge_df["TDP43TurboID_15_wholebrain_pval"] = [stats.norm.sf(abs(x)) for x in stats.zscore(merge_df["TDP43TurboID_15_wholebrain"].to_list())]
merge_df["TDP43TurboID_15_LS1_pval"] = [stats.norm.sf(abs(x)) for x in stats.zscore(merge_df["TDP43TurboID_15_LS1"].to_list())]
merge_df["TDP43TurboID_15_Nuclear_pval"] = [stats.norm.sf(abs(x)) for x in stats.zscore(merge_df["TDP43TurboID_15_Nuclear"].to_list())]
merge_df["TDP43TurboID_saline_LP1_pval"] = [stats.norm.sf(abs(x)) for x in stats.zscore(merge_df["TDP43TurboID_saline_LP1"].to_list())]
merge_df["TDP43TurboID_saline_LS1_pval"] = [stats.norm.sf(abs(x)) for x in stats.zscore(merge_df["TDP43TurboID_saline_LS1"].to_list())]

merge_df

Unnamed: 0,Protein_Name,TurboID_72_LP1,TurboID_72_wholebrain,TurboID_72_LS1,TurboID_15_wholebrain,TDP43TurboID_72_wholebrain,TDP43TurboID_72_LS1,TDP43TurboID_15_LP1,TDP43TurboID_15_wholebrain,TDP43TurboID_15_LS1,...,TurboID_72_LS1_pval,TurboID_15_wholebrain_pval,TDP43TurboID_72_wholebrain_pval,TDP43TurboID_72_LS1_pval,TDP43TurboID_15_LP1_pval,TDP43TurboID_15_wholebrain_pval,TDP43TurboID_15_LS1_pval,TDP43TurboID_15_Nuclear_pval,TDP43TurboID_saline_LP1_pval,TDP43TurboID_saline_LS1_pval
0,NFH_MOUSE,377871000.0,31793600.0,2131870.0,4852130.0,15509000.0,16590700.0,36090300.0,3218000.0,17141700.0,...,0.327545,0.089939,0.001981,0.000107,0.000087,0.238749,0.380054,0.465965,0.009892,0.001261
1,NFH_MOUSE,377871000.0,31793600.0,2131870.0,4852130.0,11760800.0,16590700.0,36090300.0,3218000.0,17141700.0,...,0.327545,0.089939,0.015759,0.000107,0.000087,0.238749,0.380054,0.465965,0.009892,0.001261
2,NFH_MOUSE,377871000.0,31793600.0,2131870.0,4852130.0,2382530.0,16590700.0,36090300.0,3218000.0,17141700.0,...,0.327545,0.089939,0.373638,0.000107,0.000087,0.238749,0.380054,0.465965,0.009892,0.001261
3,NFH_MOUSE,377871000.0,31793600.0,2131870.0,4852130.0,2382530.0,16590700.0,36090300.0,3218000.0,17141700.0,...,0.327545,0.089939,0.373638,0.000107,0.000087,0.238749,0.380054,0.465965,0.009892,0.001261
4,NFH_MOUSE,377871000.0,31793600.0,2131870.0,4852130.0,2382530.0,16590700.0,36090300.0,3218000.0,17141700.0,...,0.327545,0.089939,0.373638,0.000107,0.000087,0.238749,0.380054,0.465965,0.009892,0.001261
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
185959,NOE2_MOUSE,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.466010,0.446713,0.443451,0.412798,0.391356,0.444646,0.411743,0.462318,0.391856,0.452866
185960,GARL3-2_MOUSE;GARL3_MOUSE,0.0,0.0,0.0,0.0,97530.7,93561.1,108678.0,0.0,490093.0,...,0.466010,0.446713,0.450969,0.421431,0.396026,0.444646,0.417631,0.462318,0.400038,0.452866
185961,GARL3-2_MOUSE;GARL3_MOUSE,0.0,0.0,0.0,0.0,0.0,93561.1,108678.0,0.0,490093.0,...,0.466010,0.446713,0.443451,0.421431,0.396026,0.444646,0.417631,0.462318,0.400038,0.452866
185962,GARL3-2_MOUSE;GARL3_MOUSE,0.0,0.0,0.0,0.0,97530.7,93561.1,108678.0,0.0,490093.0,...,0.466010,0.446713,0.450969,0.421431,0.396026,0.444646,0.417631,0.462318,0.400038,0.452866


In [303]:
def reset_columns(df):
    columns_list = df.columns.tolist()
    columns_list.remove("Protein_Name")
    columns_list.sort()
    columns_list = ["Protein_Name"] + columns_list
    return df[columns_list]

In [305]:
merge_df = reset_columns(merge_df)
merge_df

Unnamed: 0,Protein_Name,TDP43TurboID_15_LP1,TDP43TurboID_15_LP1_pval,TDP43TurboID_15_LS1,TDP43TurboID_15_LS1_pval,TDP43TurboID_15_Nuclear,TDP43TurboID_15_Nuclear_pval,TDP43TurboID_15_wholebrain,TDP43TurboID_15_wholebrain_pval,TDP43TurboID_72_LS1,...,TDP43TurboID_saline_LS1,TDP43TurboID_saline_LS1_pval,TurboID_15_wholebrain,TurboID_15_wholebrain_pval,TurboID_72_LP1,TurboID_72_LP1_pval,TurboID_72_LS1,TurboID_72_LS1_pval,TurboID_72_wholebrain,TurboID_72_wholebrain_pval
0,NFH_MOUSE,36090300.0,0.000087,17141700.0,0.380054,1197810.0,0.465965,3218000.0,0.238749,16590700.0,...,4808820.0,0.001261,4852130.0,0.089939,377871000.0,1.177892e-146,2131870.0,0.327545,31793600.0,1.785842e-08
1,NFH_MOUSE,36090300.0,0.000087,17141700.0,0.380054,1197810.0,0.465965,3218000.0,0.238749,16590700.0,...,4808820.0,0.001261,4852130.0,0.089939,377871000.0,1.177892e-146,2131870.0,0.327545,31793600.0,1.785842e-08
2,NFH_MOUSE,36090300.0,0.000087,17141700.0,0.380054,1197810.0,0.465965,3218000.0,0.238749,16590700.0,...,4808820.0,0.001261,4852130.0,0.089939,377871000.0,1.177892e-146,2131870.0,0.327545,31793600.0,1.785842e-08
3,NFH_MOUSE,36090300.0,0.000087,17141700.0,0.380054,1197810.0,0.465965,3218000.0,0.238749,16590700.0,...,4808820.0,0.001261,4852130.0,0.089939,377871000.0,1.177892e-146,2131870.0,0.327545,31793600.0,1.785842e-08
4,NFH_MOUSE,36090300.0,0.000087,17141700.0,0.380054,1197810.0,0.465965,3218000.0,0.238749,16590700.0,...,4808820.0,0.001261,4852130.0,0.089939,377871000.0,1.177892e-146,2131870.0,0.327545,31793600.0,1.785842e-08
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
185959,NOE2_MOUSE,0.0,0.391356,0.0,0.411743,0.0,0.462318,0.0,0.444646,0.0,...,0.0,0.452866,0.0,0.446713,0.0,4.534050e-01,0.0,0.466010,0.0,4.687148e-01
185960,GARL3-2_MOUSE;GARL3_MOUSE,108678.0,0.396026,490093.0,0.417631,0.0,0.462318,0.0,0.444646,93561.1,...,0.0,0.452866,0.0,0.446713,0.0,4.534050e-01,0.0,0.466010,0.0,4.687148e-01
185961,GARL3-2_MOUSE;GARL3_MOUSE,108678.0,0.396026,490093.0,0.417631,0.0,0.462318,0.0,0.444646,93561.1,...,0.0,0.452866,0.0,0.446713,0.0,4.534050e-01,0.0,0.466010,0.0,4.687148e-01
185962,GARL3-2_MOUSE;GARL3_MOUSE,108678.0,0.396026,490093.0,0.417631,0.0,0.462318,0.0,0.444646,93561.1,...,0.0,0.452866,0.0,0.446713,0.0,4.534050e-01,0.0,0.466010,0.0,4.687148e-01


In [287]:
# test

l = [1,2,3,4,5]

# z score
stats.zscore(l)

# p score
p = [stats.norm.sf(abs(x)) for x in l]
p

[0.15865525393145707,
 0.022750131948179195,
 0.0013498980316300933,
 3.167124183311986e-05,
 2.866515718791933e-07]