In [38]:
from scipy.io import loadmat
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold
from tqdm import tqdm

from master_thesis.classification_models import VectorModel, BaseModel

MAT_PATH = "/Users/wciezobka/sano/projects/masters-thesis/Datasets/Stroke-difference/mean_ed.mat"
K_FOLDS = 10

## Converting matlab matrix to numpy array
The resulting array shoul be 400x130, representing 400 ROIs and 130 subjects (of which the first 26 are controls)

In [21]:
n_nodes = 400
n_total = 130
n_control = 26

sd = loadmat(MAT_PATH)["EDtocontrol_before"]
print(f"Shape of the mat file: {sd.shape}")

assert sd.shape == (n_nodes, n_total), "Shape of the mat file is not as expected"

control_np = sd[:, :n_control].T
stroke_np = sd[:, n_control:].T

Shape of the mat file: (400, 130)


In [34]:
X = sd.T
y = np.array([0] * n_control + [1] * (n_total - n_control), dtype=np.int32)

# Shuffle the data
np.random.seed(42)
shuffle_index = np.random.permutation(n_total)
X, y = X[shuffle_index], y[shuffle_index]

In [37]:
# Define accumulator lists
y_gold_train_acc, y_hat_train_acc = [], []
y_gold_test_acc, y_hat_test_acc = [], []

# Define k-fold cross-validation
kfold = KFold(n_splits=K_FOLDS, shuffle=True, random_state=3346)
for i, (train_index, test_index) in tqdm(enumerate(kfold.split(X)), total=K_FOLDS, desc="Cross-validation"):

    # Split data
    X_train, X_test = [X[i] for i in train_index], [X[i] for i in test_index]
    y_train, y_test = [y[i] for i in train_index], [y[i] for i in test_index]

    # Train model
    model = VectorModel()
    model.fit(X_train, y_train)

    # Predict
    y_hat_train = model.predict(X_train)
    y_hat_train_acc.append(y_hat_train)
    y_gold_train_acc.append(y_train)

    y_hat_test = model.predict(X_test)
    y_hat_test_acc.append(y_hat_test)
    y_gold_test_acc.append(y_test)

# Concatenate lists
y_hat_train = np.concatenate(y_hat_train_acc)
y_gold_train = np.concatenate(y_gold_train_acc)

y_hat_test = np.concatenate(y_hat_test_acc)
y_gold_test = np.concatenate(y_gold_test_acc)


Cross-validation: 100%|██████████| 10/10 [00:00<00:00, 10.91it/s]


In [39]:
# Evaluate classification
print("=== Evaluating model on train data ===")
print(BaseModel.evaluate(y_gold_train, y_hat_train, save_path="ltp_cm_train.png"))

print("=== Evaluating model on test data ====")
print(BaseModel.evaluate(y_gold_test, y_hat_test, save_path="ltp_cm_test.png"))

=== Evaluating model on train data ===
Accuracy:  1.00
Recall:    1.00
Precision: 1.00
f1 score:  1.00
AUC score: 1.00

=== Evaluating model on test data ====
Accuracy:  0.80
Recall:    1.00
Precision: 0.80
f1 score:  0.89
AUC score: 0.50



<Figure size 640x480 with 0 Axes>

In [40]:
control_np_feature_0 = control_np[:, 0]

In [41]:
control_np_feature_0.shape

(26,)