This script was used to generate some miscellaneous extra thesis figures.

### Bar graph showing clinical data methods

In [None]:
import pandas as pd
import numpy as np

In [None]:
df = pd.read_csv('../Data/Su_COVID_metabolomics_processed_commoncases.csv', index_col=0)
samples = list(df.index)
sample_list = [(i+'-1') for i in samples]
print(sample_list)

In [None]:
df_mild = (df[df["WHO_status"] == '1-2']).iloc[:,:-2] #45 samples, remove the metadata
mild_samples = list(df_mild.index)
mild_sample_list = [(i+'-1') for i in mild_samples]

df_severe = (df[(df["WHO_status"] == '3-4') | (df["WHO_status"] == '5-7')]).iloc[:,:-2] #83 samples
severe_samples = list(df_severe.index)
severe_sample_list = [(i+'-1') for i in severe_samples]

In [None]:
clinical_df = pd.read_csv('../Data/Goldman_clinical_data.csv', index_col=0)

In [None]:
clinical_df[:5]

In [None]:
print(len(mild_sample_list))
print(len(severe_sample_list))

In [None]:
mild_factors = clinical_df[clinical_df['Sample ID'].isin(mild_sample_list)]
severe_factors = clinical_df[clinical_df['Sample ID'].isin(severe_sample_list)]

Function to count number of mild samples with and without the comorbidity and number of severe samples with and without the comorbidity:

In [None]:
def clinical_factor_stat(variable):
    print(mild_factors[variable].value_counts()) 
    print(severe_factors[variable].value_counts()) 

    print(mild_factors[variable].isna().sum())
    print(severe_factors[variable].isna().sum())

In [None]:
print(clinical_factor_stat('Chronic Hypertension'))

For non-binary outputs, take the mean and standard deviation for continuous factors:

In [None]:
mild_age = list(mild_factors['Age'])
severe_age = list(severe_factors['Age'])

mild_BMI = list(mild_factors['BMI'])  #nearly all NaN
severe_BMI = list(severe_factors['BMI'])

In [None]:
print(np.nanmean(mild_BMI))
print(np.nanmean(severe_BMI))

print(np.nanstd(mild_BMI))
print(np.nanstd(severe_BMI))

In [None]:
print(sum(i <= 50 for i in mild_age))
print(sum(i > 50 for i in mild_age))

print(sum(i <= 50 for i in severe_age))
print(sum(i > 50 for i in severe_age))

### Small heatmaps to put in the introduction

In [None]:
import pandas as pd
import seaborn as sns
import sspa
import scipy

import numpy as np
import networkx as nx
import random
import matplotlib.pyplot as plt

In [None]:
#Load the common cases dataset
df = pd.read_csv('../Data/Su_COVID_metabolomics_processed_commoncases.csv', index_col=0)


In [None]:
#Randomly choose a part of the dataframe to make the heatmap from

df_sample = df.iloc[35:45,45:58] #'metabolomic' dataset

g = sns.clustermap(df_sample,row_cluster=False,col_cluster=False,xticklabels=False, yticklabels=False,cmap='RdBu_r',figsize=(9,7),cbar=False)
g.cax.set_visible(False) #remove colour bar labels
ax = g.ax_heatmap #set the right axis label to blank
ax.set_ylabel("")

In [None]:
#Randomly choose a part of the dataframe to make the heatmap from

df_sample = df.iloc[35:45,80:97] #'proteomic' dataset

g = sns.clustermap(df_sample,row_cluster=False,col_cluster=False,xticklabels=False, yticklabels=False,cmap='RdBu_r',figsize=(9,5.4),cbar=False)
g.cax.set_visible(False)
ax = g.ax_heatmap
ax.set_ylabel("")

In [None]:
reactome_pathways = sspa.process_gmt("../Data/Reactome_Homo_sapiens_pathways_compounds_R84.gmt")
kpca_scores = sspa.sspa_kpca(df.iloc[:,:-2], reactome_pathways)

#Convert pathway ID to name
root_path = pd.read_excel('../Data/Root_pathways.xlsx', header=None)
root_pathway_dict = {root_path[0][i]:root_path[1][i] for i in range(0,len(root_path))}

root_pathway_names = list(root_pathway_dict.keys())
#Using Sara's code, remove root pathways
kpca_scores = kpca_scores.drop(columns = list(set(root_pathway_names) & set(kpca_scores.columns)))

In [None]:
kpca_scores

In [None]:
#Randomly choose a part of the dataframe to make the heatmap from

kpca_scores_sample = kpca_scores.iloc[35:45,50:62] #12 metabolite pathways long

g = sns.clustermap(kpca_scores_sample,row_cluster=False,col_cluster=False,xticklabels=False, yticklabels=False,cmap='viridis',figsize=(9,7.5),cbar=False)

g.cax.set_visible(False)
ax = g.ax_heatmap
ax.set_ylabel("")

In [None]:
#Randomly choose a part of the dataframe to make the heatmap from

kpca_scores_sample = kpca_scores.iloc[35:45,65:85] #20 protein pathways long

g = sns.clustermap(kpca_scores_sample,row_cluster=False,col_cluster=False,xticklabels=False, yticklabels=False,cmap='viridis',figsize=(9,4.5),cbar=False)

g.cax.set_visible(False)
ax = g.ax_heatmap
ax.set_ylabel("")

In [None]:
#Randomly choose a part of the dataframe to make the heatmap from

kpca_scores_sample = kpca_scores.iloc[35:45,85:108] #23 integrated pathways long

g = sns.clustermap(kpca_scores_sample,row_cluster=False,col_cluster=False,xticklabels=False, yticklabels=False,cmap='viridis',figsize=(9,4.5),cbar=False)

g.cax.set_visible(False)
ax = g.ax_heatmap
ax.set_ylabel("")

In [None]:
spearman_results = scipy.stats.spearmanr(kpca_scores)

spearman_coef = spearman_results[0] #correlation coefficients
spearman_pvals = spearman_results[1] #p-values

spearman_coef = pd.DataFrame(spearman_coef)

In [None]:
spearman_coef_sample = pd.DataFrame(spearman_coef).iloc[35:47,35:47] #12 pathways for metabolomic

corr= spearman_coef_sample.corr()
matrix = np.triu(corr)

g = sns.clustermap(spearman_coef_sample,row_cluster=True,col_cluster=True,xticklabels=False, yticklabels=False,cmap="coolwarm",figsize=(9,9),cbar=False)#,mask=matrix)

g.cax.set_visible(False)
ax = g.ax_heatmap
ax.set_ylabel("")

In [None]:
spearman_coef_sample = pd.DataFrame(spearman_coef).iloc[50:70,50:70] #20 pathways for proteomic

corr= spearman_coef_sample.corr()
matrix = np.triu(corr)

g = sns.clustermap(spearman_coef_sample,row_cluster=True,col_cluster=True,xticklabels=False, yticklabels=False,cmap="coolwarm",figsize=(9,9),cbar=False)#,mask=matrix)

g.cax.set_visible(False)
ax = g.ax_heatmap
ax.set_ylabel("")

In [None]:
spearman_coef_sample = pd.DataFrame(spearman_coef).iloc[100:123,100:123] #23 pathways for integrated

corr= spearman_coef_sample.corr()
matrix = np.triu(corr)

g = sns.clustermap(spearman_coef_sample,row_cluster=True,col_cluster=True,xticklabels=False, yticklabels=False,cmap="coolwarm",figsize=(9,9),cbar=False)#,mask=matrix)

g.cax.set_visible(False)
ax = g.ax_heatmap
ax.set_ylabel("")

In [None]:
#https://stackoverflow.com/questions/61958360/how-to-create-random-graph-where-each-node-has-at-least-1-edge-using-networkx
#Generate random graph

nodes = 16
seed = random.randint(1,10)
probability = 0.6
G = nx.gnp_random_graph(nodes,probability)

plt.figure(figsize=(8,6))
nx.draw(G,node_color='#e69f00',node_size=500) 

#plt.savefig( '../Figures/random_fig1.png' , dpi=300,bbox_inches = 'tight' , pad_inches = 0.2 , facecolor='w')

In [None]:
#https://stackoverflow.com/questions/61958360/how-to-create-random-graph-where-each-node-has-at-least-1-edge-using-networkx
#Generate random graph, then remove most edges but keep same layout

nodes = 16
seed = random.randint(1,10)
probability = 0.4
G = nx.gnp_random_graph(nodes,probability)

#https://cambiocteach.com/accessibility/colourchoice/   for colour palette
#orange #e69f00     sky blue #56b4e9    blue-green #009e73    (yellow #FFD580)

In [None]:
pos = nx.random_layout(G, dim=2, center=None)

In [None]:
plt.figure(figsize=(8,6))
nx.draw(G,pos,node_color='#56b4e9',node_size=500) 
#plt.savefig( '../Figures/random_fig4.png' , dpi=300,bbox_inches = 'tight' , pad_inches = 0.2 , facecolor='w')

In [None]:
#Choose some edges to remove
G.edges()  

In [None]:
H = G.copy()

#Differential network
to_remove=[(0, 12), (0, 14), (1, 3), (1, 4), (1, 5), (1, 6), (1, 7),(2, 11), (3, 4), (3, 9), (3, 11),(4, 13), (4, 14), (4, 15), (5, 6), (5, 9),(6, 12), (6, 15), (7, 8), (7, 12), (7, 13), (7, 15),(9, 15), (10, 11), (10, 12), (10, 14),(11, 14), (12, 13), (13, 14), (13, 15)]

#Mild network
#to_remove = [(0, 2), (0, 3), (0, 7), (0, 10), (0, 12), (0, 14),(1, 3), (1, 4), (1, 5), (1, 6), (1, 7), (1, 8), (1, 12), (1, 13), (1, 15), (2, 6),  (2, 7), (2, 8),(2, 11), (3, 4), (3, 9), (3, 11),(4, 13), (4, 14),(4, 15), (5, 6), (5, 9), (5, 15), (6, 7), (6, 9), (6, 10),(7, 15), (8, 13),(9, 15), (10, 11), (10, 12), (10, 14), (11, 12), (11, 13),(13, 14), (13, 15) ]

#Severe network
#to_remove = [(0, 12), (0, 14), (1, 3), (1, 4), (1, 5), (1, 6), (1, 7), (1, 8), (1, 12), (1, 13),(3, 4), (3, 9),  (4, 13), (4, 14), (4, 15), (5, 6), (5, 9), (5, 15), (6, 7), (6, 9), (6, 10), (6, 11), (6, 12),(8, 13), (8, 15), (9, 10), (9, 11), (13, 14), (13, 15) ]

#Naive network
#to_remove = [(0, 12), (0, 14), (1, 3), (1, 4), (1, 5), (1, 6), (1, 7), (1, 8), (1, 12), (1, 13),(3, 4), (3, 9), (3, 11), (3, 12), (3, 15), (4, 5), (4,8), (4, 13), (4, 14),(4, 15), (5, 6), (5, 9), (5, 15), (6, 7), (6, 9), (6, 10),  (6, 15), (7, 8), (7, 12), (7, 13), (8, 13),  (9,14),(11, 14), (12, 13), (13, 14), (13, 15)]



In [None]:
severe_not_mild= [ (0, 2), (0, 3), (0, 7), (0, 10),(1, 15), (2, 6), (2, 7), (2, 8), (2, 11),(7, 15), (9, 15), (10, 11), (10, 12), (10, 14), (11, 12), (11, 13)]
mild_not_severe = [(6, 11), (6, 12),(8, 15), (9, 10), (9, 11)]
diff_col = [(0, 2), (0, 3), (0, 7), (0, 10),(1, 8), (1, 12), (1, 13), (1, 15), (2, 6), (2, 7), (2, 8),(3, 12), (3, 15), (4, 5), (4, 8),(5, 15), (6, 7), (6, 9), (6, 10), (6, 11),(8, 13), (8, 15), (9, 10), (9, 11), (9, 14),(11, 12), (11, 13)]

edges_col = []
for i in range(len(diff_col)):
     if (i > 9 and i <14) or (i > 16 and i <18) or i==22 :
          edges_col.append("blue")
     else:
         edges_col.append("red")

len(diff_col)

In [None]:
H.remove_edges_from(to_remove)
H.remove_nodes_from(list(nx.isolates(H)))
nx.draw(H, pos, node_color='#56b4e9',edge_color = edges_col,node_size=350,width=2) #,with_labels=True)   
#plt.savefig( '../Figures/random_fig5_diff.png' , dpi=300,bbox_inches = 'tight' , pad_inches = 0.2 , facecolor='w')

### Venn diagram to show the number of common pathways between metabolomic, proteomic and integrated

In [None]:
#https://www.geeksforgeeks.org/how-to-create-and-customize-venn-diagrams-in-python/
#Example code

from matplotlib_venn import venn3, venn3_circles
from matplotlib import pyplot as plt
  

venn3(subsets=(20, 10, 12, 10, 9, 4, 3), 
      set_labels=('Group A', 'Group B', 'Group C'), 
      set_colors=("orange", "blue", "red"), alpha=0.7)
  
#Circle line style and width
venn3_circles(subsets=(20, 10, 12, 10, 9, 4, 3),
              linestyle="dashed", linewidth=2)
  
plt.title("Venn Diagram in geeks for geeks")
plt.show()

In [None]:
#From matplotlib_venn documentation
from matplotlib import pyplot as plt
from matplotlib_venn import venn3, venn3_circles

plt.figure(figsize=(4,4))
v = venn3(subsets=(1, 1, 1, 1, 1, 1, 1), set_labels = ('A', 'B', 'C'))
v.get_patch_by_id('100').set_alpha(1.0)
v.get_patch_by_id('100').set_color('white')
v.get_label_by_id('100').set_text('Unknown')
v.get_label_by_id('A').set_text('Set "A"')
c = venn3_circles(subsets=(1, 1, 1, 1, 1, 1, 1), linestyle='dashed')
c[0].set_lw(1.0)
c[0].set_ls('dotted')
plt.title("Sample Venn diagram")
plt.annotate('Unknown set', xy=v.get_label_by_id('100').get_position() - np.array([0, 0.05]), xytext=(-70,-70),
                ha='center', textcoords='offset points', bbox=dict(boxstyle='round,pad=0.5', fc='gray', alpha=0.1),
                arrowprops=dict(arrowstyle='->', connectionstyle='arc3,rad=0.5',color='gray'))
plt.show()

In [None]:
#For WEIGHTED Venn diagram

from matplotlib_venn import venn3, venn3_circles
from matplotlib import pyplot as plt

plt.figure(figsize=(6, 6))

  
#Draw Venn diagram
vd = venn3(subsets=(0, 0, 0, 35, 97, 531, 47), 
      set_labels=('', '', ''), 
      set_colors=('Red', 'Yellow', 'Blue'),alpha=0.8)   #, alpha=0.1
  
#Circle outline
c = venn3_circles(subsets=(0, 0, 0, 35, 97, 531, 47), linewidth=1)
c[0].set_ls('dashed')
c[1].set_ls('dashed')

#Colours are washed out because they're overlapping, so colour by patch instead
#Difficulty knowing which circle is which omic
vd.get_patch_by_id("001").set_color('#febb81')   #big circle
vd.get_patch_by_id("101").set_color('#f8765c')  
vd.get_patch_by_id("011").set_color('#d3436e')   
vd.get_patch_by_id("111").set_color('#982d80')   #most overlap
 
#Move circle number labels 
vd.get_label_by_id("101").set_x(-0.5)   #111,(110),(100),(000),101
vd.get_label_by_id("001").set_x(-0.004)   #111,(110),(100),(000),101,001
vd.get_label_by_id("001").set_y(-0.53)   #111,(110),(100),(000),101,001,(010),011


#Changing font size
#for text in vd.set_labels:  #Omic labels
#    text.set_fontsize(15)  

for x in range(len(vd.subset_labels)):  #Circle number fontsize
    if vd.subset_labels[x] is not None:
        vd.subset_labels[x].set_fontsize(14)

#plt.annotate('144 edges',xy=[-0.7,.2])  #doesn't show up past the circle
#plt.annotate('578 edges',xy=[0.4,0.45])
#plt.annotate('710 edges',xy=[-0.4,-0.45])

plt.title("Pathway overlap between omics",fontsize=18)    #'#5f187f', '#982d80', '#d3436e', '#f8765c', '#febb81']

In [None]:
vd.get_label_by_id('001').get_position()

In [None]:
#For UNWEIGHTED Venn diagram

import matplotlib.pyplot as plt
import matplotlib_venn as mv

#Draw venn diagram
vd = mv.venn3_unweighted(subsets=(0, 0, 0, 35, 97, 531, 47), 
      #set_labels=('Metabolomic', 'Proteomic', 'Integrated'), 
      set_labels=('', '', ''), 
      set_colors=( '#e34933', '#fee99d', '#51B0DF'), alpha=0.7)   #        '#e34933', '#fee99d', '#588cc0'/'#51B0DF'

#Change font size
#for text in vd.set_labels:  #Omic labels
#    text.set_fontsize(16)  

#Change patch colour  
vd.get_patch_by_id("011").set_color('#C7DFD9') 

#Move circle labels a bit
vd.get_label_by_id("011").set_x(0.31)
vd.get_label_by_id("101").set_x(-0.3) #111

#Circle number fontsize
for x in range(len(vd.subset_labels)):  
    if vd.subset_labels[x] is not None:
        vd.subset_labels[x].set_fontsize(21)

#plt.title("Pathway overlap between omics",fontsize=18)  

#plt.savefig( '../Figures/random_fig6.png' , dpi=300,bbox_inches = 'tight' , pad_inches = 0.2 , facecolor='w')

In [None]:
import seaborn as sns
#Obtain hex codes for sns colour palette
#https://www.practicalpythonfordatascience.com/ap_seaborn_palette
print(sns.color_palette("magma").as_hex()[:])  #'Spectral','RdYlBu'
sns.color_palette("magma")

### Venn diagram to show the number of differential edges between metabolomic, proteomic and integrated

In [None]:
#For UNWEIGHTED Venn diagram

import matplotlib.pyplot as plt
import matplotlib_venn as mv

#Draw venn diagram
vd = mv.venn3_unweighted(subsets=(15,599,0,1437,2,993,0), 
      #set_labels=('Metabolomic', 'Proteomic', 'Integrated'), 
      set_labels=('', '', ''), 
      set_colors=( '#e34933', '#fee99d', '#51B0DF'), alpha=0.7)   #        '#e34933', '#fee99d', '#588cc0'

#for text in vd.set_labels:  #Omic labels
#    text.set_fontsize(16)  


#Change patch colour
vd.get_patch_by_id("011").set_color('#C7DFD9')  

#Move circle labels a bit
vd.get_label_by_id("010").set_x(0.4) 
vd.get_label_by_id("011").set_x(0.296) 
vd.get_label_by_id("101").set_x(-0.3) #111

#Circle number fontsize
for x in range(len(vd.subset_labels)):  
    if vd.subset_labels[x] is not None:
        vd.subset_labels[x].set_fontsize(21)

#plt.title("Pathway overlap between omics",fontsize=18)  

#plt.savefig( '../Figures/random_fig7.png' , dpi=300,bbox_inches = 'tight' , pad_inches = 0.2 , facecolor='w')