In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")
from scipy.stats import spearmanr
from statsmodels.stats.multitest import multipletests
from sklearn.preprocessing import StandardScaler
from matplotlib import colors
import networkx as nx

In [2]:
df1 = pd.read_excel('T1.xlsx')
df2 = pd.read_excel('T2.xlsx')

In [3]:
fitness = df1.iloc[:, :10]
fitness

In [4]:
metabolites = df2.iloc[:, 46:130] - df1.iloc[:, 46:130]
cytokines = df2.iloc[:, 17:46] - df1.iloc[:, 17:46]
metacyte = df2.iloc[:, 17:130] - df1.iloc[:, 17:130]

In [5]:
selected = ['BMI (kg·m²)', 'Body fat %', 'VO2max', 'Max Watts']
fit = fitness[selected]

In [6]:
concat = [metacyte, fit]
metacyte_fit = pd.concat(concat, axis=1)

In [7]:
columns = metacyte_fit.columns
spearman_corr_matrix = pd.DataFrame(index=columns, columns=columns)
p_values_matrix = pd.DataFrame(index=columns, columns=columns)

for i in range(len(columns)):
    for j in range(i, len(columns)):
        col1, col2 = columns[i], columns[j]
        spearman_corr, p_value = spearmanr(metacyte_fit[col1], metacyte_fit[col2])
        spearman_corr_matrix.loc[col1, col2] = spearman_corr
        spearman_corr_matrix.loc[col2, col1] = spearman_corr
        p_values_matrix.loc[col1, col2] = p_value
        p_values_matrix.loc[col2, col1] = p_value

spearman_corr_matrix = spearman_corr_matrix.apply(pd.to_numeric, errors='coerce')

In [8]:
significance_level = 0.05
significant_correlations = np.abs(p_values_matrix) < significance_level 

# Select only the upper triangle of the correlation matrix to avoid duplicates
upper_triangle = np.triu(np.ones(p_values_matrix.shape), k=1)

# Apply the significance filter and upper triangle mask
significant_upper_triangle = np.logical_and(significant_correlations, upper_triangle)

# Extract the indices of significant correlations
significant_indices = np.where(significant_upper_triangle)

# Create a DataFrame to store the results
significant_results = pd.DataFrame({
    'Variable1': spearman_corr_matrix.columns[significant_indices[1]],
    'Variable2': spearman_corr_matrix.columns[significant_indices[0]],
    'Correlation': spearman_corr_matrix.values[significant_indices],
    'P-Value': p_values_matrix.values[significant_indices]
})

In [9]:
# Filter values above 0.5
filtered_df = significant_results[(significant_results['Correlation'] >= 0.5) | (significant_results['Correlation'] <= -0.5)]

# Sort by the 'correlation' column in descending order
sorted_df = filtered_df.sort_values(by='Correlation', ascending=False)

# Display the sorted DataFrame
print(sorted_df)

In [10]:
mask = sorted_df['Variable1'].isin(fit) & sorted_df['Variable2'].isin(metacyte)

# Use the mask to select rows containing both 'variable 1' and 'variable 2' in the metabolites list
filtered_df = sorted_df[mask]

# Display the filtered DataFrame
print(filtered_df)

In [11]:
corr = {
    'Source': [
        'BMI (kg·m²)',
 'BMI (kg·m²)',
 'BMI (kg·m²)',
 'BMI (kg·m²)',
 'Body fat %',
 'Max Watts',
 'Max Watts' 
    ],
    'Target': [
        'IL-35',
 'IL-29/IFN-lambda1',
 'MMP-1',
 'IL-12(p70)',
 "N-Acetylglutamine,N,N',O,O'- tetrakis(trimethylsilyl)-",
 'Methyl galactoside',
 'L-Aspartic acid'
    ],
    'Correlation': [
  0.6045632544321692,
 0.5400981781331247,
 0.5295442371260954,
 0.5191717490872948,
 0.5172413793103449,
 0.5203465869380726,
 0.5485842135377754
    ]
}


links_df = pd.DataFrame(corr)

In [12]:
import holoviews as hv
from holoviews import opts, dim
import holoviews.plotting.bokeh
hv.extension('bokeh')
hv.output(size=200)

links_df = pd.DataFrame(corr)

# Sample nodes data
nodes = [
        'BMI (kg·m²)',
 'BMI (kg·m²)',
 'BMI (kg·m²)',
 'BMI (kg·m²)',
 'Body fat %',
 'Max Watts',
 'Max Watts', 
 'IL-35',
 'IL-29/IFN-lambda1',
 'MMP-1',
 'IL-12(p70)',
 "N-Acetylglutamine,N,N',O,O'- tetrakis(trimethylsilyl)-",
 'Methyl galactoside',
 'L-Aspartic acid'
    ]
nodes_df = pd.DataFrame(nodes)
nodes_df.rename(columns={nodes_df.columns[0]: 'node'}, inplace=True)
node = hv.Dataset(pd.DataFrame(nodes_df['node']))

# Create HoloViews Chord Diagram
chord = hv.Chord((links_df, node)).select(value=(None, None))

#custom_colormap = {True: 'blue', False: 'red'}
#links_df['Positive'] = links_df['Value'] > 0
#edge_colors = links_df['Positive'].map(custom_colormap)


# Customize chart options
chord.opts(opts.Chord(cmap='Category20', 
                edge_cmap='Category20',
                edge_color=dim('Source').str(),
                labels='node',
                edge_line_width= 4,
                node_color=dim('node').str(),
                label_text_font_size='12pt'))