![Cardiotoxicity](HeartFailureProjectTitle.jpg)
#Import Modules

In [None]:
import numpy as np
import pandas as pd
from scipy import stats
import xlrd
import sys
import time

#Extract MALDI Raw Data

In [None]:
#Import MALDI File///////////////////////////////////////
filename='MALDI Raw.xlsx'
book=xlrd.open_workbook(filename)
print('Source file: '+sys.path[0]+filename+' loaded!')


#Extraction
nsheets=book.nsheets
sheet_name='Grouped'
sheet_names=book.sheet_names()
nrows=book.sheet_by_name(sheet_name).nrows

header=book.sheet_by_name(sheet_name).row_values(0)
data=[book.sheet_by_name(sheet_name).row_values(i) for i in range(1, nrows)]
df=pd.DataFrame(data, columns=header)
df=df.set_index('ID')
df=df.replace('', np.nan)

#Extract LC Raw Data

In [None]:
#Import LC File/////////////////////////////////////////////
filename='LC Raw.xlsx'
book=xlrd.open_workbook(filename)
print('Source file: '+sys.path[0]+filename+' loaded!')

#Extraction
nsheets=book.nsheets
sheet_name='Sheet1'
sheet_names=book.sheet_names()
nrows=book.sheet_by_name(sheet_name).nrows

header=book.sheet_by_name(sheet_name).row_values(0)
data=[book.sheet_by_name(sheet_name).row_values(i) for i in range(1, nrows)]
dflc_raw=pd.DataFrame(data, columns=header)
#dflc=dflc.set_index('ID')

dflc=dflc_raw[(dflc_raw['Sequence']!='') & (dflc_raw['Protein Group Accessions']!='')]

dflc=dflc.replace('', np.nan)

dflc=dflc[(dflc['IonScore A2']>=20) | (dflc['IonScore B2']>=20) | (dflc['IonScore C2']>=20) | (dflc['IonScore D2']>=20)]
dflc=dflc.reset_index()

dflc=dflc.replace('', np.nan)

#Matching
- Match MALDI data with LC data by molecular weights

In [None]:
peaks=list(df.columns)[3:]

mhs=list(dflc['MH+ [Da]'])

matched_peaks=[]
matched_mh=[]
matched_combo=[]

for mh in mhs:
    for peak in peaks:
        if abs(mh-peak)<0.5:
            matched_peaks.append(peak)
            matched_mh.append(mh)
            matched_combo.append((peak, mh))

#Group and T-test
- We use different clinical indeces to group different peaks;
- Then we implement the t-test for each peak

In [None]:
print('Parsing...')
ttest_rehospitalization={}
ttest_mortality={}
ttest_transplantation={}
ttest_triple={}

ttest_rm={}
ttest_rt={}
ttest_mt={}

for peak, mh in matched_combo:
    ttest_rehospitalization[peak]={}
    ttest_rehospitalization[peak]['t'], ttest_rehospitalization[peak]['p']=stats.ttest_ind(
        df[df['Rehospitalization']=='Yes'][peak],
        df[df['Rehospitalization']=='No'][peak])
    ttest_rehospitalization[peak]['Sequence']=list(dflc[dflc['MH+ [Da]']==mh]['Sequence'].values)[0]
    ttest_rehospitalization[peak]['Group']=list(dflc[dflc['MH+ [Da]']==mh]['Protein Group Accessions'].values)[0]
    ttest_rehospitalization[peak]['Modifications']=list(dflc[dflc['MH+ [Da]']==mh]['Modifications'].values)[0]
    
    ttest_mortality[peak]={}
    ttest_mortality[peak]['t'], ttest_mortality[peak]['p']=stats.ttest_ind(
        df[df['Mortality']=='Yes'][peak],
        df[df['Mortality']=='No'][peak])
    ttest_mortality[peak]['Sequence']=list(dflc[dflc['MH+ [Da]']==mh]['Sequence'].values)[0]
    ttest_mortality[peak]['Group']=list(dflc[dflc['MH+ [Da]']==mh]['Protein Group Accessions'].values)[0]
    ttest_mortality[peak]['Modifications']=list(dflc[dflc['MH+ [Da]']==mh]['Modifications'].values)[0]
    
    ttest_transplantation[peak]={}
    ttest_transplantation[peak]['t'], ttest_transplantation[peak]['p']=stats.ttest_ind(
        df[df['Transplantation']=='Yes'][peak],
        df[df['Transplantation']=='No'][peak])
    ttest_transplantation[peak]['Sequence']=list(dflc[dflc['MH+ [Da]']==mh]['Sequence'].values)[0]
    ttest_transplantation[peak]['Group']=list(dflc[dflc['MH+ [Da]']==mh]['Protein Group Accessions'].values)[0]
    ttest_transplantation[peak]['Modifications']=list(dflc[dflc['MH+ [Da]']==mh]['Modifications'].values)[0]
    
    ttest_rm[peak]={}
    ttest_rm[peak]['t'], ttest_rm[peak]['p']=stats.ttest_ind(
        df[(df['Rehospitalization']=='Yes')|(df['Mortality']=='Yes')][peak],
        df[(df['Rehospitalization']=='No')&(df['Mortality']=='No')][peak])
    ttest_rm[peak]['Sequence']=list(dflc[dflc['MH+ [Da]']==mh]['Sequence'].values)[0]
    ttest_rm[peak]['Group']=list(dflc[dflc['MH+ [Da]']==mh]['Protein Group Accessions'].values)[0]
    ttest_rm[peak]['Modifications']=list(dflc[dflc['MH+ [Da]']==mh]['Modifications'].values)[0]
    
    ttest_rt[peak]={}
    ttest_rt[peak]['t'], ttest_rt[peak]['p']=stats.ttest_ind(
        df[(df['Rehospitalization']=='Yes')|(df['Transplantation']=='Yes')][peak],
        df[(df['Rehospitalization']=='No')&(df['Transplantation']=='No')][peak])
    ttest_rt[peak]['Sequence']=list(dflc[dflc['MH+ [Da]']==mh]['Sequence'].values)[0]
    ttest_rt[peak]['Group']=list(dflc[dflc['MH+ [Da]']==mh]['Protein Group Accessions'].values)[0]
    ttest_rt[peak]['Modifications']=list(dflc[dflc['MH+ [Da]']==mh]['Modifications'].values)[0]
    
    ttest_mt[peak]={}
    ttest_mt[peak]['t'], ttest_mt[peak]['p']=stats.ttest_ind(
        df[(df['Mortality']=='Yes')|(df['Transplantation']=='Yes')][peak],
        df[(df['Mortality']=='No')&(df['Transplantation']=='No')][peak])
    ttest_mt[peak]['Sequence']=list(dflc[dflc['MH+ [Da]']==mh]['Sequence'].values)[0]
    ttest_mt[peak]['Group']=list(dflc[dflc['MH+ [Da]']==mh]['Protein Group Accessions'].values)[0]
    ttest_mt[peak]['Modifications']=list(dflc[dflc['MH+ [Da]']==mh]['Modifications'].values)[0]
    
    ttest_triple[peak]={}
    ttest_triple[peak]['t'], ttest_triple[peak]['p']=stats.ttest_ind(
        df[(df['Rehospitalization']=='Yes')|(df['Mortality']=='Yes')|(df['Transplantation']=='Yes')][peak],
        df[(df['Rehospitalization']=='No')&(df['Mortality']=='No')&(df['Transplantation']=='No')][peak])
    ttest_triple[peak]['Sequence']=list(dflc[dflc['MH+ [Da]']==mh]['Sequence'].values)[0]
    ttest_triple[peak]['Group']=list(dflc[dflc['MH+ [Da]']==mh]['Protein Group Accessions'].values)[0]
    ttest_triple[peak]['Modifications']=list(dflc[dflc['MH+ [Da]']==mh]['Modifications'].values)[0]

#Gather the t-test of the matched peaks
- dfr: dataframe of **Rehospitalization**
- dfm: dataframe of **Mortality**
- dft: dataframe of **Transplantation**

- dfrm: dataframe of **Rehospitalization** or **Mortality** (either one is positive)
- dfrt: dataframe of **Rehospitalization** or **Transplantation**
- dfmt: dataframe of **Mortality** or **Transplantation**

In [None]:
dfr=pd.DataFrame(ttest_rehospitalization).T
dfm=pd.DataFrame(ttest_mortality).T
dft=pd.DataFrame(ttest_transplantation).T
dftriple=pd.DataFrame(ttest_triple).T

dfrm=pd.DataFrame(ttest_rm).T
dfrt=pd.DataFrame(ttest_rt).T
dfmt=pd.DataFrame(ttest_mt).T

In [None]:
dfmatched=pd.DataFrame(matched_combo)

#Merge the dataframes above together

In [None]:
dfmerged=pd.concat([dfr, dfm, dft, dfrm, dfrt, dfmt, dftriple], axis=1,
                   keys=['Rehospitalization', 'Mortility', 'Transplantation',
                         'Rehospitalization + Mortality', 'Rehospitalization + Transplantation', 'Mortality + Transplantation',
                         'Triple'])

#Export the file

In [None]:
dfmatched.to_csv('Matched Peaks.csv')
dfmerged.to_csv('Ttest Results.csv')

print('File Exported!')