# Repeated Measures Correlations

Versions:
- pandas 0.24.2
- pingouin 0.2.5

In [1]:
import pandas as pd
import pingouin as pg

%matplotlib inline

In [2]:
mapping = pd.read_csv('Data_Files/Metadata_Metabolome.txt', sep='\t', dtype={'#SampleID':str})
distances_d0 = pd.read_csv('Data_Files/FirstDifferences_Baseline.txt', sep='\t', dtype={'#SampleID':str})

mapping.set_index('#SampleID', inplace=True)
distances_d0.set_index('#SampleID', inplace=True)

In [3]:
distances_d0 = distances_d0.rename({'Distance':'Distance_d0'}, axis=1)

In [4]:
mapping_dist = pd.merge(mapping, distances_d0, left_index=True, right_index=True)

In [5]:
met_subset = mapping[mapping['time_point'].isin([2,3,4,5,6,7,8,9])]
met_subset_dist = mapping_dist[mapping_dist['time_point'].isin([2,3,4,5,6,7,8,9])]

## Correlation with Alpha Diversity

In [6]:
metabolites = ['time_point','creatinine', 'CAF_caffeine', 'DEX_dextromethorphan', 'MDZ_midazolam', 'CAF_methylxanthine', 'DEX_3hydroxymorphinan', 'DEX_3methoxymorphinan', 'MDZ_5hydroxyMDZ', 'MDZ_hydroxyMDZ_glucuronide', 'OME1_Omeprazole', 'OME3_Omeprazole_sulfide', 'OME4_Omeprazole_sulfone', 'OME5_Hydroxyomeprazole', 'OME7_Hydroxyomeprazole_sulfide', 'OME11_Carboxyomeprazole', 'OME13_Carboxyomeprazole_sulfide', 'OME15_Carboxyomeprazole_sulfone', 'OME17_5_O_desmethylomeprazole', 'OME1920_O_desmethylomeprazole_sulfide', 'OME27_Omeprazole_sulfide_O_glucuronide', 'OME29_Omeprazole_sulfide_4_O_glucuronide']

In [7]:
df_subset = pd.DataFrame({'r':[], 'dof':[], 'pval':[], 'CI95%':[], 'power':[]})

In [8]:
for i in metabolites:
    x = pg.rm_corr(data=met_subset, x='observed_otus', y=i, subject='host_subject_id')
    df_subset = df_subset.append(x)

In [9]:
df_subset['name'] = (metabolites)

In [10]:
sig = []
for i in df_subset['pval']:
    if i > 0.05: 
        sig.append('FALSE')
    else:
        sig.append('TRUE')
df_subset['sig'] = sig

In [11]:
df_subset.head()

Unnamed: 0,r,dof,pval,CI95%,power,name,sig
rm_corr,-0.607,75.0,4.86519e-09,"[-0.73, -0.44]",1.0,time_point,True
rm_corr,-0.158,75.0,0.1699088,"[-0.37, 0.07]",0.281,creatinine,False
rm_corr,-0.373,75.0,0.0008305643,"[-0.55, -0.16]",0.924,CAF_caffeine,True
rm_corr,-0.132,75.0,0.2529104,"[-0.35, 0.09]",0.209,DEX_dextromethorphan,False
rm_corr,-0.422,75.0,0.0001295494,"[-0.59, -0.22]",0.974,MDZ_midazolam,True


In [12]:
df_subset.to_csv('Final_Figures/RM_Corr/Correlation_Observed_OTUs.txt', sep='\t')

## Correlation with Time

In [13]:
metabolites_time = ['observed_otus','creatinine', 'CAF_caffeine', 'DEX_dextromethorphan', 'MDZ_midazolam', 'CAF_methylxanthine', 'DEX_3hydroxymorphinan', 'DEX_3methoxymorphinan', 'MDZ_5hydroxyMDZ', 'MDZ_hydroxyMDZ_glucuronide', 'OME1_Omeprazole', 'OME3_Omeprazole_sulfide', 'OME4_Omeprazole_sulfone', 'OME5_Hydroxyomeprazole', 'OME7_Hydroxyomeprazole_sulfide', 'OME11_Carboxyomeprazole', 'OME13_Carboxyomeprazole_sulfide', 'OME15_Carboxyomeprazole_sulfone', 'OME17_5_O_desmethylomeprazole', 'OME1920_O_desmethylomeprazole_sulfide', 'OME27_Omeprazole_sulfide_O_glucuronide', 'OME29_Omeprazole_sulfide_4_O_glucuronide']

In [14]:
df_time = pd.DataFrame({'r':[], 'dof':[], 'pval':[], 'CI95%':[], 'power':[]})

In [15]:
for i in metabolites_time:
    x = pg.rm_corr(data=met_subset, x='time_point', y=i, subject='host_subject_id')
    df_time = df_time.append(x)

In [16]:
df_time['name'] = (metabolites_time)

In [17]:
sig = []
for i in df_time['pval']:
    if i > 0.05: 
        sig.append('FALSE')
    else:
        sig.append('TRUE')
df_time['sig'] = sig

In [18]:
df_time.to_csv('Final_Figures/RM_Corr/Correlation_Time_Point_ObsOTU.txt', sep='\t')

## Correlation with Distance to baseline

In [19]:
metabolites_dist = ['time_point','observed_otus','creatinine', 'CAF_caffeine', 'DEX_dextromethorphan', 'MDZ_midazolam', 'CAF_methylxanthine', 'DEX_3hydroxymorphinan', 'DEX_3methoxymorphinan', 'MDZ_5hydroxyMDZ', 'MDZ_hydroxyMDZ_glucuronide', 'OME1_Omeprazole', 'OME3_Omeprazole_sulfide', 'OME4_Omeprazole_sulfone', 'OME5_Hydroxyomeprazole', 'OME7_Hydroxyomeprazole_sulfide', 'OME11_Carboxyomeprazole', 'OME13_Carboxyomeprazole_sulfide', 'OME15_Carboxyomeprazole_sulfone', 'OME17_5_O_desmethylomeprazole', 'OME1920_O_desmethylomeprazole_sulfide', 'OME27_Omeprazole_sulfide_O_glucuronide', 'OME29_Omeprazole_sulfide_4_O_glucuronide']

In [20]:
df_dist = pd.DataFrame({'r':[], 'dof':[], 'pval':[], 'CI95%':[], 'power':[]})

In [21]:
for i in metabolites_dist:
    x = pg.rm_corr(data=met_subset_dist, x='Distance_d0', y=i, subject='host_subject_id')
    df_dist = df_dist.append(x)

In [22]:
df_dist['name'] = (metabolites_dist)

In [23]:
sig = []
for i in df_dist['pval']:
    if i > 0.05: 
        sig.append('FALSE')
    else:
        sig.append('TRUE')
df_dist['sig'] = sig

In [24]:
df_dist.to_csv('Final_Figures/RM_Corr/Correlation_unweighted_uni_ObsOTU.txt', sep='\t')

In [25]:
df_dist.head()

Unnamed: 0,r,dof,pval,CI95%,power,name,sig
rm_corr,0.578,62.0,5.779251e-07,"[0.39, 0.72]",0.999,time_point,True
rm_corr,-0.725,62.0,1.279305e-11,"[-0.82, -0.58]",1.0,observed_otus,True
rm_corr,0.177,62.0,0.1618573,"[-0.07, 0.41]",0.29,creatinine,False
rm_corr,0.399,62.0,0.001086172,"[0.17, 0.59]",0.914,CAF_caffeine,True
rm_corr,0.073,62.0,0.5663662,"[-0.18, 0.31]",0.088,DEX_dextromethorphan,False
