In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.model_selection import KFold
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
import warnings
warnings.filterwarnings("ignore")

# Difference in metabolomic profile at T1

In [None]:

d = pd.read_excel('T-One.xlsx')

In [None]:
df1 = d.drop('Group', axis=1)
scaler = StandardScaler()
scaled_data = scaler.fit_transform(df1)
X = pd.DataFrame(scaled_data, columns=df1.columns)

In [None]:
y = d['Group'].replace({'8°C': 0, '15°C/Control': 1, '22°C': 2})

In [None]:
from sklearn.feature_selection import SelectKBest, chi2, f_classif

k_best = SelectKBest(score_func=f_classif, k='all')
fit = k_best.fit(X, y)

# Get feature scores and names
feature_scores = pd.DataFrame({'Feature': X.columns, 'Score': fit.scores_})
feature_scores = feature_scores.sort_values(by='Score', ascending=False)

# Visualize feature scores
plt.figure(figsize=(20, 15))
sns.barplot(x='Score', y='Feature', data=feature_scores)
plt.title('Feature Scores')
plt.show()

In [None]:
features_list = feature_scores['Feature'].head(8).tolist()
x = X[features_list]


In [None]:
pca = PCA(n_components=2)  # You can choose the number of components

# Fit PCA to your data and transform it
X_pca = pca.fit_transform(x)

X = pd.DataFrame(X_pca)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Assuming X_train, X_test, y_train, y_test are your training and test sets

# Initialize LDA model
lda = LDA(n_components=2)
lda.fit(X_train.iloc[:, :2], y_train)   # Train LDA model using only the first two features of X_train

# Predict class labels for the test set
y_pred = lda.predict(X_train.iloc[:, :2])

# Create a meshgrid to plot the decision boundary
x_min, x_max = X_train.iloc[:, 0].min() - 1, X_train.iloc[:, 0].max() + 1
y_min, y_max = X_train.iloc[:, 1].min() - 1, X_train.iloc[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.01),
                     np.arange(y_min, y_max, 0.01))

# Predict the class labels for each point in the meshgrid
Z = lda.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)

plt.figure(figsize=(4, 3))

# Plot the decision boundary
plt.contourf(xx, yy, Z, alpha=0.3)

# Plot the class distributions (scatter plot)
markers = ['o', 's', '^']  # Example marker symbols for each class
colors = ['red', 'blue', 'green']  # Example colors for each class
for class_label in np.unique(y_train):  # Use y_test for plotting
    class_indices = np.where(y_train == class_label)[0]
    plt.scatter(X_train.iloc[class_indices, 0], X_train.iloc[class_indices, 1], marker=markers[class_label], color=colors[class_label], label=f'Class {class_label}')
# Add legend and labels
plt.legend()
plt.xlabel('component 1')
plt.ylabel('component 2')
plt.title('LDA with Decision Boundary and Class Overlap')
plt.show()


In [None]:
from sklearn.metrics import accuracy_score

# Train data accuracy
y_train_pred = lda.predict(X_train.iloc[:, :2])  # Predict on training data
train_accuracy = accuracy_score(y_train, y_train_pred)  # Compute accuracy
print("Training Accuracy:", train_accuracy)

# Test data accuracy
y_test_pred = lda.predict(X_test.iloc[:, :2])  # Predict on test data
test_accuracy = accuracy_score(y_test, y_test_pred)  # Compute accuracy
print("Test Accuracy:", test_accuracy)


# Difference in metabolomic profile at T2

In [None]:

df2 = pd.read_excel('C:/Users/onifa/Downloads/rest/T-Two.xlsx')
# use cytokin/metabolites significantly altered after immersion
selected = ['IL-11 (39)_T1', 'IL-26 (22)_T1', 'gp130/sIL-6Rb (14)_T1', 'MMP-1 (43)_T1', 'Valine (2TMS)', 'Arachidonic acid, TMS derivative',
            'Dodecanoic acid, TMS derivative', 'Indole-3-Latic Acid (3TMS)', 'L-Aspartic acid, 2TMS derivative', 'IL-35 (34)_T1',
            'Aminomalonic acid, tris(trimethylsilyl)-', 'L-Norvaline, 2TMS derivative', 'Oxalic acid, 2TMS derivative']
df = df2[selected]

In [None]:
scaler = StandardScaler()
scaled_data = scaler.fit_transform(df)
X = pd.DataFrame(scaled_data, columns=df.columns)

In [None]:
y = df2['Group'].replace({'8°C': 0, '15°C/Control': 1, '22°C': 2})

In [None]:
from sklearn.feature_selection import SelectKBest, chi2, f_classif

k_best = SelectKBest(score_func=f_classif, k='all')
fit = k_best.fit(X, y)

# Get feature scores and names
feature_scores = pd.DataFrame({'Feature': X.columns, 'Score': fit.scores_})
feature_scores = feature_scores.sort_values(by='Score', ascending=False)

# Visualize feature scores
plt.figure(figsize=(20, 15))
sns.barplot(x='Score', y='Feature', data=feature_scores)
plt.title('Feature Scores')
plt.show()

In [None]:
features_list = feature_scores['Feature'].head(9).tolist()
x = X[features_list]

In [None]:
pca = PCA(n_components=2)  

# Fit PCA to data and transform 
X_pca = pca.fit_transform(x)

X = pd.DataFrame(X_pca)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Assuming X_train, X_test, y_train, y_test are  training and test sets

# Initialize LDA model
lda = LDA(n_components=2)
lda.fit(X_train.iloc[:, :2], y_train)   # Train LDA model using only the first two features of X_train

# Predict class labels for the test set
y_pred = lda.predict(X_train.iloc[:, :2])

# Create a meshgrid to plot the decision boundary
x_min, x_max = X_train.iloc[:, 0].min() - 1, X_train.iloc[:, 0].max() + 1
y_min, y_max = X_train.iloc[:, 1].min() - 1, X_train.iloc[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.01),
                     np.arange(y_min, y_max, 0.01))

# Predict the class labels for each point in the meshgrid
Z = lda.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)

plt.figure(figsize=(4, 3))
# Plot the decision boundary
plt.contourf(xx, yy, Z, alpha=0.3)

# Plot the class distributions (scatter plot)
markers = ['o', 's', '^']  # Example marker symbols for each class
colors = ['red', 'blue', 'green']  # Example colors for each class
for class_label in np.unique(y_train):  # Use y_test for plotting
    class_indices = np.where(y_train == class_label)[0]
    plt.scatter(X_train.iloc[class_indices, 0], X_train.iloc[class_indices, 1], marker=markers[class_label], color=colors[class_label], label=f'Class {class_label}')
# Add legend and labels
plt.legend()
plt.xlabel('component 1')
plt.ylabel('component 2')
plt.title('LDA with Decision Boundary and Class Overlap')
plt.show()


In [None]:
from sklearn.metrics import accuracy_score

# Train data accuracy
y_train_pred = lda.predict(X_train.iloc[:, :2])  # Predict on training data
train_accuracy = accuracy_score(y_train, y_train_pred)  # Compute accuracy
print("Training Accuracy:", train_accuracy)

# Test data accuracy
y_test_pred = lda.predict(X_test.iloc[:, :2])  # Predict on test data
test_accuracy = accuracy_score(y_test, y_test_pred)  # Compute accuracy
print("Test Accuracy:", test_accuracy)
