In [1]:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
from sklearn import preprocessing

In [2]:
# Read in csv (already has a genometric mean normalization applied)
df = pd.read_csv("/Users/jordanberg/Desktop/Sara/sara_collab_7nov18/MtFAS_raw.csv",sep=",",index_col=0)

#Drop indexes that are NA
df.reset_index()
df_dropped = df.reset_index().dropna().set_index('index')

FileNotFoundError: File b'/Users/jordanberg/Desktop/Sara/sara_collab_7nov18/MtFAS_raw.csv' does not exist

In [None]:
df_dropped = df_dropped.drop(labels=['O2.9-1','O2.9-2'], axis=1)
df_dropped += 1e-7
df_dropped = np.log10(df_dropped)

#Scale proteins
df_scaled = df_dropped.T
df_scaled[df_scaled.columns] = preprocessing.scale(df_scaled[df_scaled.columns])
df_scaled = df_scaled.T 


In [None]:
# Select proteins of interest
n_module = ['Ndufv1','Ndufv2','Ndufs1','Ndufa2','Ndufs4',
            'Ndufa12','Ndufs6','Ndufv3']
q_module = ['Ndufa6','Ndufa7','Ndufa5','Ndufs2',
            'Ndufs3','Ndufs7','Ndufs8','Ndufa9']
p_module = ['Mtnd1','Ndufa3','Ndufa8','Ndufa13','Ndufa1',
            'Mtnd3','Mtnd4l','Ndufc2','Ndufa10','Ndufs5',
            'Ndufa11','Ndufb6','Ndufb5','Ndufb10','Ndufb11',
            'Mtnd4','Ndufb4','Ndufb7','Ndufb3','Ndufb8',
            'Ndufb9','Ndufb2','Ndufs5']
pp_module = ['Mtnd1','Ndufa3','Ndufa8','Ndufa13','Ndufa1',
             'Mtnd4l','Mtnd3','Ndufc2','Ndufa10','Ndufs5',
             'Ndufa11']
pp1_module = ['Mtnd1','Ndufa3','Ndufa8','Ndufa13','Ndufa1']
pp2_module = ['Mtnd4l','Mtnd3','Ndufc2','Ndufa10','Ndufs5',
              'Ndufa11']
pd_module = ['Ndufb6','Ndufb5','Ndufb10','Ndufb11','Mtnd4',
             'Ndufb4','Ndufb7','Ndufb3','Ndufb8','Ndufb9',
             'Ndufb2']
pd1_module = ['Ndufb6','Ndufb5','Ndufb10','Ndufb11','Mtnd4',
             'Ndufb4']
pd2_module = ['Ndufb7','Ndufb3','Ndufb8','Ndufb9','Ndufb2']
complex_ii = ['Sdha','Sdhb','Sdhc']
complex_iii = ['Uqcrc2','Uqcrc1','Uqcrb','Uqcrq','Cyc1','Mt-CyB',
               'Uqcr10','Uqcrh','Uqcrfs1']
complex_iv = ['Mtco1','Cox5a','Cox5b','Cox6c','Mtco2',
              'Cox7a2','Cox7c','Cox6b1','Cox7a2l','Ndufa4',
              'Cox15']
complex_v = ['Atp5a1','Atp5b','Atp5c1','Atp5e','Atp5g2',
             'Mtatp6','Mtatp8','Atp5h','Atp5i','Atp5f1',
             'Atp5d','Atp5j2','Atp5j','Atp5l','Atp5o',
             'Atpif1']
fes_cluster = ['Lyrm4','Nfs1','Iscu']
lyr_proteins = ['Ndufa6','Ndufb9','Lyrm4']
lyr_targets = ['Sdhb','Uqcrfs1']

protein_list = n_module + q_module + pp_module + pd_module+ complex_ii + complex_iii + complex_iv + complex_v + fes_cluster
df_mtfas = df_scaled.loc[protein_list]

In [None]:
# Heatmap
plt.figure(figsize=(5, 25))
ax = sns.heatmap(df_mtfas, cmap="RdBu_r", center=0,
            xticklabels=True, linewidths=.1,
            linecolor='#DCDCDC')

In [None]:
import plotfun as pf

In [None]:
df.reset_index()
df_volc = df.reset_index().dropna().set_index('index')

In [None]:
# Average every by cell line
df_scaled_avg = df_volc.groupby(np.arange(len(df_volc.columns))//4, axis=1).mean()
df_scaled_avg = df_scaled_avg.rename(index=str, columns={0: "GFP", 1: "MECR",2:"OXSM"})

In [None]:
# Take data of interest
#drop proteins with NA values
df_scaled_avg2 = df_scaled_avg[~df_scaled_avg.index.duplicated()]
df_scaled_avg2 = df_scaled_avg2.dropna()
df_scaled_avg2 = df_scaled_avg2[~df_scaled_avg2.index.duplicated()]

In [None]:
import scipy.stats as stats
import sys

# Calculate fold change using average
df_mecr = np.log2(df_scaled_avg2["MECR"] / df_scaled_avg2["GFP"])
df_oxsm = np.log2(df_scaled_avg2["OXSM"] / df_scaled_avg2["GFP"])
df_mecr = pd.DataFrame(df_mecr, columns=['log2 Fold Change'])
df_oxsm = pd.DataFrame(df_oxsm, columns=['log2 Fold Change'])
df_mecr['-log10 P-Value'] = ''
df_oxsm['-log10 P-Value'] = ''

# Calculate p-value using 1-way ANOVA with replicates and append to df_...
for row in df_volc.iterrows():
    index, data = row 
    mecr_row = data[4:8].values.tolist()
    oxsm_row = data[8:].values.tolist()
    gfp_row = data[0:4].values.tolist()
    
    # Append p_value to df_mecr
    statistic, p_value = stats.f_oneway(mecr_row, gfp_row)
    df_mecr.loc[index,'-log10 P-Value'] = float(-1 * (np.log10(p_value)))
    
    # Append p_value to df_oxsm
    statistic, p_value = stats.f_oneway(oxsm_row, gfp_row)
    df_oxsm.loc[index,'-log10 P-Value'] = float(-1 * (np.log10(p_value)))

df_mecr = df_mecr.dropna()
df_mecr.to_csv('df_mecr_volcano.csv',sep=",")
df_mecr_resp = df_mecr.loc[protein_list]
df_mecr_resp.to_csv('df_mecr_resp_volcano.csv',sep=",")
df_oxsm = df_oxsm.dropna()
df_oxsm.to_csv('df_oxsm_volcano.csv',sep=",")
df_oxsm_resp = df_oxsm.loc[protein_list]
df_oxsm_resp.to_csv('df_oxsm_resp_volcano.csv',sep=",")

In [None]:
import seaborn as sns
import matplotlib as plt

volcano_etc = ['Ndufv1', 'Ndufv2', 'Ndufs1', 'Ndufa2', 'Ndufs4',
                    'Ndufa12', 'Ndufs6', 'Ndufa6', 'Ndufa7', 'Ndufa5',
                    'Ndufs2', 'Ndufs3', 'Ndufs7', 'Ndufs8', 'Ndufa8',
                    'Ndufa1', 'Ndufa11', 'Ndufb6', 'Ndufb4', 'Ndufb3',
                    'Ndufb8', 'Ndufb9', 'Ndufb2', 'Sdha', 'Sdhb', 
                    'Sdhc', 'Uqcrfs1', 'Cox5a', 'Cox5b', 'Cox6b1',
                    'Atpif1', 'Lyrm4', 'Nfs1', 'Iscu']
                    
volcano_stem = ['Cdh15', 'Cd34', 'MyoD', 'Tnnt3', 'Tnni2',
                    'CD44', 'Tnnt2', 'Tnni1', 'Tnnc1', 'Myh1', 
                    'Mb', 'Chdh', 'Mylpf', 'Myh3', 'Acta1',
                    'Myl1', 'Myl4', 'Myl6b', 'Sorbs2', 'Csrp3',
                    'Nrap', 'Klh141', 'Ccdc141', 'Neb', 'Bin1',
                    'Ckm', 'Ckb', 'Ank1', 'Trim72', 'Itga7',
                    'Akap6', 'Dusp27']


# Scatter plot log2FoldChange vs. log10P-Value
ax = sns.scatterplot(x='log2 Fold Change', y='-log10 P-Value', data=df_mecr, color='LightGray', label="All Proteins")
ax = sns.scatterplot(x='log2 Fold Change', y='-log10 P-Value', data=df_mecr.loc[protein_list], color='DarkRed', label="Respiratory Complexes")

ax = ax.set_title('MECRvWT_ETC')
ax.figure.savefig("MECRvWT_ETC_volcano_mono", dpi=600)

In [None]:
import seaborn as sns
import matplotlib as plt

volcano_etc = ['Ndufv1', 'Ndufv2', 'Ndufs1', 'Ndufa2', 'Ndufs4',
                    'Ndufa12', 'Ndufs6', 'Ndufa6', 'Ndufa7', 'Ndufa5',
                    'Ndufs2', 'Ndufs3', 'Ndufs7', 'Ndufs8', 'Ndufa8',
                    'Ndufa1', 'Ndufa11', 'Ndufb6', 'Ndufb4', 'Ndufb3',
                    'Ndufb8', 'Ndufb9', 'Ndufb2', 'Sdha', 'Sdhb', 
                    'Sdhc', 'Uqcrfs1', 'Cox5a', 'Cox5b', 'Cox6b1',
                    'Atpif1', 'Lyrm4', 'Nfs1', 'Iscu']
                    
volcano_stem = ['Cdh15', 'Cd34', 'MyoD', 'Tnnt3', 'Tnni2',
                    'CD44', 'Tnnt2', 'Tnni1', 'Tnnc1', 'Myh1', 
                    'Mb', 'Chdh', 'Mylpf', 'Myh3', 'Acta1',
                    'Myl1', 'Myl4', 'Myl6b', 'Sorbs2', 'Csrp3',
                    'Nrap', 'Klh141', 'Ccdc141', 'Neb', 'Bin1',
                    'Ckm', 'Ckb', 'Ank1', 'Trim72', 'Itga7',
                    'Akap6', 'Dusp27']


# Scatter plot log2FoldChange vs. log10P-Value
ax = sns.scatterplot(x='log2 Fold Change', y='-log10 P-Value', data=df_oxsm, color='LightGray', label="All Proteins")
ax = sns.scatterplot(x='log2 Fold Change', y='-log10 P-Value', data=df_oxsm.loc[protein_list], color='DarkRed', label="Respiratory Complexes")


ax = ax.set_title('OXSMvWT_ETC')
ax.figure.savefig("OXSMvWT_ETC_volcano_mono", dpi=600)

In [None]:
import seaborn as sns
import matplotlib as plt

volcano_etc = ['Ndufv1', 'Ndufv2', 'Ndufs1', 'Ndufa2', 'Ndufs4',
                    'Ndufa12', 'Ndufs6', 'Ndufa6', 'Ndufa7', 'Ndufa5',
                    'Ndufs2', 'Ndufs3', 'Ndufs7', 'Ndufs8', 'Ndufa8',
                    'Ndufa1', 'Ndufa11', 'Ndufb6', 'Ndufb4', 'Ndufb3',
                    'Ndufb8', 'Ndufb9', 'Ndufb2', 'Sdha', 'Sdhb', 
                    'Sdhc', 'Uqcrfs1', 'Cox5a', 'Cox5b', 'Cox6b1',
                    'Atpif1', 'Lyrm4', 'Nfs1', 'Iscu']
                    
volcano_stem = ['Cdh15', 'Cd34', 'MyoD', 'Tnnt3', 'Tnni2',
                    'CD44', 'Tnnt2', 'Tnni1', 'Tnnc1', 'Myh1', 
                    'Mb', 'Chdh', 'Mylpf', 'Myh3', 'Acta1',
                    'Myl1', 'Myl4', 'Myl6b', 'Sorbs2', 'Csrp3',
                    'Nrap', 'Klh141', 'Ccdc141', 'Neb', 'Bin1',
                    'Ckm', 'Ckb', 'Ank1', 'Trim72', 'Itga7',
                    'Akap6', 'Dusp27']


# Scatter plot log2FoldChange vs. log10P-Value
ax = sns.scatterplot(x='log2 Fold Change', y='-log10 P-Value', data=df_mecr, color='LightGray', label="All Proteins")
ax = sns.scatterplot(x='log2 Fold Change', y='-log10 P-Value', data=df_mecr.loc[n_module + q_module + p_module + \
                                                                                pp_module + pp1_module + pp2_module + \
                                                                               pd_module + pd1_module + pd2_module], color='DarkRed')
ax = sns.scatterplot(x='log2 Fold Change', y='-log10 P-Value', data=df_mecr.loc[complex_ii], color='DarkRed')
ax = sns.scatterplot(x='log2 Fold Change', y='-log10 P-Value', data=df_mecr.loc[complex_iii], color='DarkRed')
ax = sns.scatterplot(x='log2 Fold Change', y='-log10 P-Value', data=df_mecr.loc[complex_iv], color='DarkRed')
ax = sns.scatterplot(x='log2 Fold Change', y='-log10 P-Value', data=df_mecr.loc[complex_v], color='DarkRed', label="Respiratory Complexes")

ax = sns.scatterplot(x='log2 Fold Change', y='-log10 P-Value', data=df_mecr.loc[fes_cluster], color='Green', label="Iron Sulfer Cluster")
ax = sns.scatterplot(x='log2 Fold Change', y='-log10 P-Value', data=df_mecr.loc[lyr_proteins], color='Blue', label="LYR Proteins")
ax = sns.scatterplot(x='log2 Fold Change', y='-log10 P-Value', data=df_mecr.loc[lyr_targets], color='Orange', label="LYR Targets")

ax = ax.set_title('MECRvWT_ETC_wothers')
ax.figure.savefig("MECRvWT_ETC_volcano_wothers_mono", dpi=600)

In [None]:
import seaborn as sns
import matplotlib as plt

volcano_etc = ['Ndufv1', 'Ndufv2', 'Ndufs1', 'Ndufa2', 'Ndufs4',
                    'Ndufa12', 'Ndufs6', 'Ndufa6', 'Ndufa7', 'Ndufa5',
                    'Ndufs2', 'Ndufs3', 'Ndufs7', 'Ndufs8', 'Ndufa8',
                    'Ndufa1', 'Ndufa11', 'Ndufb6', 'Ndufb4', 'Ndufb3',
                    'Ndufb8', 'Ndufb9', 'Ndufb2', 'Sdha', 'Sdhb', 
                    'Sdhc', 'Uqcrfs1', 'Cox5a', 'Cox5b', 'Cox6b1',
                    'Atpif1', 'Lyrm4', 'Nfs1', 'Iscu']
                    
volcano_stem = ['Cdh15', 'Cd34', 'MyoD', 'Tnnt3', 'Tnni2',
                    'CD44', 'Tnnt2', 'Tnni1', 'Tnnc1', 'Myh1', 
                    'Mb', 'Chdh', 'Mylpf', 'Myh3', 'Acta1',
                    'Myl1', 'Myl4', 'Myl6b', 'Sorbs2', 'Csrp3',
                    'Nrap', 'Klh141', 'Ccdc141', 'Neb', 'Bin1',
                    'Ckm', 'Ckb', 'Ank1', 'Trim72', 'Itga7',
                    'Akap6', 'Dusp27']


# Scatter plot log2FoldChange vs. log10P-Value
ax = sns.scatterplot(x='log2 Fold Change', y='-log10 P-Value', data=df_oxsm, color='LightGray', label="All Proteins")
ax = sns.scatterplot(x='log2 Fold Change', y='-log10 P-Value', data=df_oxsm.loc[n_module + q_module + p_module + \
                                                                                pp_module + pp1_module + pp2_module + \
                                                                               pd_module + pd1_module + pd2_module], color='DarkRed')
ax = sns.scatterplot(x='log2 Fold Change', y='-log10 P-Value', data=df_oxsm.loc[complex_ii], color='DarkRed')
ax = sns.scatterplot(x='log2 Fold Change', y='-log10 P-Value', data=df_oxsm.loc[complex_iii], color='DarkRed')
ax = sns.scatterplot(x='log2 Fold Change', y='-log10 P-Value', data=df_oxsm.loc[complex_iv], color='DarkRed')
ax = sns.scatterplot(x='log2 Fold Change', y='-log10 P-Value', data=df_oxsm.loc[complex_v], color='DarkRed', label="Respiratory Complexes")

ax = sns.scatterplot(x='log2 Fold Change', y='-log10 P-Value', data=df_oxsm.loc[fes_cluster], color='Green', label="Iron Sulfer Cluster")
ax = sns.scatterplot(x='log2 Fold Change', y='-log10 P-Value', data=df_oxsm.loc[lyr_proteins], color='Blue', label="LYR Proteins")
ax = sns.scatterplot(x='log2 Fold Change', y='-log10 P-Value', data=df_oxsm.loc[lyr_targets], color='Orange', label="LYR Targets")


ax = ax.set_title('OXSMvWT_ETC_wothers')
ax.figure.savefig("OXSMvWT_ETC_volcano_wothers_mono", dpi=600)