In [None]:
import numpy as np
import scipy.io as sio
import matplotlib.pyplot as plt
import statsmodels.api as sm
import seaborn as sns
import pandas as pd

In [None]:
# Load the data from the .mat file
data = sio.loadmat('../data/ML_Optimal_method.mat')
Sij = data['Sij']
eddy_double_TBNN = data['eddy_double_TBNN']

# Select features from Sij (columns 0, 1, 3, and 4)
X = Sij[:, [0, 1, 3, 4]]
# Flatten the target variable
Y = eddy_double_TBNN.ravel()

# Compute and print Pearson correlation coefficients for each feature vs. the target
print("Pairwise Pearson correlations between each feature and eddy_double_TBNN:")
for i in range(X.shape[1]):
    corr = np.corrcoef(X[:, i], Y)[0, 1]
    print(f"Feature {i+1} (Sij[:, { [0,1,3,4][i] }]) vs. eddy_double_TBNN: {corr:.3f}")

# Create a DataFrame for easier plotting and analysis
df = pd.DataFrame(X, columns=['Feature_1', 'Feature_2', 'Feature_3', 'Feature_4'])
df['eddy_double_TBNN'] = Y

# Plot scatter plots for each feature vs. the target
fig, axs = plt.subplots(2, 2, figsize=(12, 10))
features = ['Feature_1', 'Feature_2', 'Feature_3', 'Feature_4']
for i, feature in enumerate(features):
    ax = axs[i // 2, i % 2]
    ax.scatter(df[feature], df['eddy_double_TBNN'], alpha=0.5, color='blue')
    ax.set_xlabel(feature)
    ax.set_ylabel('eddy_double_TBNN')
    ax.set_title(f"Scatter plot: {feature} vs eddy_double_TBNN")
plt.tight_layout()
plt.show()

# Optionally, plot a pairplot to see relationships between all variables


# Create a DataFrame for easier handling
df = pd.DataFrame(X, columns=['Feature_1', 'Feature_2', 'Feature_3', 'Feature_4'])
df['Label'] = Y

# Add a constant term for the intercept
X_with_const = sm.add_constant(df[['Feature_1', 'Feature_2', 'Feature_3', 'Feature_4']])

# Fit the multiple linear regression model
model = sm.OLS(df['Label'], X_with_const).fit()

# Print out the summary statistics of the model
print(model.summary())

# Visualize the pairwise relationships
sns.pairplot(df)
plt.show()