In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")
from scipy.stats import spearmanr
from statsmodels.stats.multitest import multipletests
from sklearn.preprocessing import StandardScaler
from matplotlib import colors
import networkx as nx

In [2]:
df1 = pd.read_excel('T18C.xlsx')
df2 = pd.read_excel('T115C.xlsx')
df3 = pd.read_excel('T122C.xlsx')

In [3]:
combined_data = [df1, df2, df3]
df = pd.concat(combined_data, axis=0, ignore_index=True)

In [4]:
print(df)

In [5]:
df.isnull().sum()

In [6]:
df.describe()

In [7]:
columns = df.columns
spearman_corr_matrix = pd.DataFrame(index=columns, columns=columns)
p_values_matrix = pd.DataFrame(index=columns, columns=columns)

for i in range(len(columns)):
    for j in range(i, len(columns)):
        col1, col2 = columns[i], columns[j]
        spearman_corr, p_value = spearmanr(df[col1], df[col2])
        spearman_corr_matrix.loc[col1, col2] = spearman_corr
        spearman_corr_matrix.loc[col2, col1] = spearman_corr
        p_values_matrix.loc[col1, col2] = p_value
        p_values_matrix.loc[col2, col1] = p_value

In [8]:
spearman_corr_matrix = spearman_corr_matrix.apply(pd.to_numeric, errors='coerce')
plt.figure(figsize=(60, 30))
sns.heatmap(spearman_corr_matrix, annot=True, cmap='coolwarm', fmt=".2f", linewidths=.5)
plt.title('Spearman Correlation Matrix')
plt.show()

In [9]:
p_values_matrix = p_values_matrix.apply(pd.to_numeric, errors='coerce')
plt.figure(figsize=(60, 30))
sns.heatmap( p_values_matrix, annot=True, cmap='coolwarm', fmt=".2f", linewidths=.5)
plt.title('P_values_matrix Correlation Matrix')


In [10]:
alpha=0.05
p_values_flat = p_values_matrix.values.flatten()
rejected, corrected_p_values, _, alpha_corrected = multipletests(p_values_flat, alpha=alpha, method='bonferroni')
print(np.sum(rejected))
print(alpha_corrected)

In [11]:
corrected_p_values_matrix = pd.DataFrame(corrected_p_values.reshape(p_values_matrix.shape), 
                                         index=columns, columns=columns)
plt.figure(figsize=(60, 30))
sns.heatmap(corrected_p_values_matrix, annot=True, cmap='coolwarm', fmt=".2f", linewidths=.5)
plt.title('P_values_matrix Correlation Matrix')



In [12]:
significance_level = 0.05
significant_correlations = np.abs(p_values_matrix) < significance_level 

# Select only the upper triangle of the correlation matrix to avoid duplicates
upper_triangle = np.triu(np.ones(p_values_matrix.shape), k=1)

# Apply the significance filter and upper triangle mask
significant_upper_triangle = np.logical_and(significant_correlations, upper_triangle)

# Extract the indices of significant correlations
result_indices = np.where(significant_upper_triangle)

# Create a DataFrame to store the results
results = pd.DataFrame({
    'Variable1': spearman_corr_matrix.columns[result_indices[1]],
    'Variable2': spearman_corr_matrix.columns[result_indices[0]],
    'Correlation': spearman_corr_matrix.values[result_indices],
    'P-Value': p_values_matrix.values[result_indices]
})

sorted_results = results.sort_values(by='Correlation')

# Filter values that are greater than or equal to 0.5 or less than or equal to -0.5
significant_results = sorted_results[(sorted_results['Correlation'] >= 0.5) | (sorted_results['Correlation'] <= -0.5)]
# Display the significant results
print(significant_results)

In [13]:
duplicates_all_columns = significant_results[significant_results.duplicated(keep=False)]
print("Rows with Duplicates Across All Columns:")
print(duplicates_all_columns)


# Correlation Between Cytokines and Metabolites at Baseline1 

In [14]:
cytokines = list(df.columns[0:29])

In [15]:
metabolites = list(df.columns[29:113])

In [16]:
mask = significant_results['Variable1'].isin(metabolites) & significant_results['Variable2'].isin(cytokines)

# Use the mask to select rows containing both 'variable 1' and 'variable 2' in the metabolites list
filtered_df = significant_results[mask]

# Display the filtered DataFrame
print(filtered_df)

In [17]:
corr = {
    'Source': ["Cholesterol", "Cholesterol", "Lysine", "2,3-Butanediol", "2-Aminoheptanedioic acid", "4-Aminobutanoic acid", "Arachidonic acid", "Butanoic acid, 2,4-bis[(trimethylsilyl)oxy]", "Indole-3-Latic Acid", 'Diethanolamine'],
 
    'Target':  ["IL-10 (56)_T1", "Osteopontin (OPN)", "BAFF/TNFSF13B (37)_T1", "Pentraxin-3", "IL-29/IFN-lambda1", "IL-29/IFN-lambda1", "sTNF-R1", "BAFF/TNFSF13B", 'BAFF/TNFSF13B', 'sCD163'],

    'Correlation': [0.636364, 0.692073, 0.705170, 0.729487, 0.762209, 0.766871, 0.780260, 0.819576, 0.820672, 1]
}


links_df = pd.DataFrame(corr)


In [18]:
import holoviews as hv
from holoviews import opts, dim
import holoviews.plotting.bokeh
hv.extension('bokeh')
hv.output(size=250)

corr = {
    'Source': ["Cholesterol", "Cholesterol", "Lysine", "2,3-Butanediol", "2-Aminoheptanedioic acid", "4-Aminobutanoic acid", "Arachidonic acid", "Butanoic acid, 2,4-bis[(trimethylsilyl)oxy]", "Indole-3-Latic Acid", "Diethanolamine"],
 
    'Target':  ["IL-10 (56)_T1", "Osteopontin (OPN)", "BAFF/TNFSF13B (37)_T1", "Pentraxin-3", "IL-29/IFN-lambda1", "IL-29/IFN-lambda1", "sTNF-R1", "BAFF/TNFSF13B", 'BAFF/TNFSF13B', 'sCD163'],

    'Correlation': [0.636364, 0.692073, 0.705170, 0.729487, 0.762209, 0.766871, 0.780260, 0.819576, 0.820672, 1]
}

links_df = pd.DataFrame(corr)

# Sample nodes data
nodes = ["Cholesterol", "Cholesterol", "Lysine", "2,3-Butanediol", "2-Aminoheptanedioic acid", "4-Aminobutanoic acid", "Arachidonic acid", "Butanoic acid, 2,4-bis[(trimethylsilyl)oxy]", "Indole-3-Latic Acid", 'Diethanolamine',
"IL-10 (56)_T1", "Osteopontin (OPN)", "BAFF/TNFSF13B (37)_T1", "Pentraxin-3", "IL-29/IFN-lambda1", "IL-29/IFN-lambda1", "sTNF-R1", "BAFF/TNFSF13B", 'BAFF/TNFSF13B', 'sCD163']

nodes_df = pd.DataFrame(nodes)
nodes_df.rename(columns={nodes_df.columns[0]: 'node'}, inplace=True)
node = hv.Dataset(pd.DataFrame(nodes_df['node']))

# Create HoloViews Chord Diagram
chord = hv.Chord((links_df, node)).select(value=(None, None))

#custom_colormap = {True: 'blue', False: 'red'}
#links_df['Positive'] = links_df['Value'] > 0
#edge_colors = links_df['Positive'].map(custom_colormap)


# Customize chart options
chord.opts(opts.Chord(cmap='Category20', 
                edge_cmap='Category20',
                edge_color=dim('Source').str(),
                labels='node',
                edge_line_width=4,
                node_color=dim('node').str(),
                label_text_font_size='12pt'))
