### 1 is left unilateral
### 2 is right unilateral
### 3 is bilateral

In [1]:
import pandas as pd

df = pd.read_csv('complete_vol_visual_jun24.csv')

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score, KFold
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.preprocessing import StandardScaler
import joblib

# Load and filter the data
PCA = df
PCA = PCA[PCA['PCA'] != 0]

X = pd.concat([PCA['PCAL_x'], PCA['PCAR_x']], axis=1)
y = PCA['PCA']

# Standardize features:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Set up cross-validation:
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Initialize the logistic regression model
model = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=1000, class_weight='balanced')

# Perform cross-validation:
cv_scores = cross_val_score(model, X_scaled, y, cv=kf)

# Print cross-validated scores:
print("Cross-validated scores:", cv_scores)
print("Average cross-validation score:", cv_scores.mean())

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42, stratify=y)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

# Evaluate the model:
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

print("Accuracy of the model on the test set:", accuracy)
print("")
print("Confusion Matrix:\n", conf_matrix)

# Reset the index of y_test to avoid index mismatch errors
y_test = y_test.reset_index(drop=True)
y_pred = pd.Series(y_pred)

# Create a DataFrame with the test set and predictions, and undo the scaling:
results_df = pd.DataFrame(scaler.inverse_transform(X_test), columns=['PCAL_x', 'PCAR_x'])
results_df = results_df.reset_index(drop=True)  # Reset the index to align with y_test
results_df['Actual'] = y_test
results_df['Predicted'] = y_pred

# Filter the DataFrame to only include misclassified instances:
misclassified = results_df[results_df['Actual'] != results_df['Predicted']]

print("")
print("Misclassified instances:")
print(misclassified)

# Save the model and scaler for later use
joblib.dump(model, 'PCA_logistic_regression_model.pkl')
joblib.dump(scaler, 'PCA_scaler.pkl')


Cross-validated scores: [0.89285714 0.92857143 1.         0.7037037  0.88888889]
Average cross-validation score: 0.8828042328042329
Accuracy of the model on the test set: 0.9285714285714286

Confusion Matrix:
 [[13  0  0]
 [ 0 13  0]
 [ 2  0  0]]

Misclassified instances:
    PCAL_x  PCAR_x  Actual  Predicted
4   2564.0  2301.0       3          1
24  2587.0  2444.0       3          1


['PCA_scaler.pkl']

### Misclassified PCA IDs
#### sub-2bd4552c
#### sub-1b7deaf6

In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score, KFold
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.preprocessing import StandardScaler
import joblib

# Load and filter the data
Thalamus = df
Thalamus = Thalamus[Thalamus['Thalamus'] != 0]

X = pd.concat([Thalamus['ThalamusL_x'], Thalamus['ThalamusR_x']], axis=1)
y = Thalamus['Thalamus']

# Standardize features:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Set up cross-validation:
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Initialize the logistic regression model
model = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=1000, class_weight='balanced')

# Perform cross-validation:
cv_scores = cross_val_score(model, X_scaled, y, cv=kf)

# Print cross-validated scores:
print("Cross-validated scores:", cv_scores)
print("Average cross-validation score:", cv_scores.mean())

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42, stratify=y)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

# Evaluate the model:
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

print("Accuracy of the model on the test set:", accuracy)
print("")
print("Confusion Matrix:\n", conf_matrix)

# Reset the index of y_test to avoid index mismatch errors
y_test = y_test.reset_index(drop=True)
y_pred = pd.Series(y_pred)

# Create a DataFrame with the test set and predictions, and undo the scaling:
results_df = pd.DataFrame(scaler.inverse_transform(X_test), columns=['ThalamusL_x', 'ThalamusR_x'])
results_df = results_df.reset_index(drop=True)  # Reset the index to align with y_test
results_df['Actual'] = y_test
results_df['Predicted'] = y_pred

# Filter the DataFrame to only include misclassified instances:
misclassified = results_df[results_df['Actual'] != results_df['Predicted']]

print("")
print("Misclassified instances:")
print(misclassified)

# Save the model and scaler for later use
joblib.dump(model, 'Thalamus_logistic_regression_model.pkl')
joblib.dump(scaler, 'Thalamus_scaler.pkl')


Cross-validated scores: [0.96875 0.9375  0.96875 0.96875 1.     ]
Average cross-validation score: 0.96875
Accuracy of the model on the test set: 0.96875

Confusion Matrix:
 [[16  0]
 [ 1 15]]

Misclassified instances:
    ThalamusL_x  ThalamusR_x  Actual  Predicted
25          0.0         70.0       2          1


['Thalamus_scaler.pkl']

# 2b191b6c

In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score, KFold
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.preprocessing import StandardScaler
import joblib

# Load and filter the data
cerebellum = df
cerebellum = cerebellum[cerebellum['cerebellum'] != 0]

X = pd.concat([cerebellum['cerebellumL_x'], cerebellum['cerebellumR_x']], axis=1)
y = cerebellum['cerebellum']

# Standardize features:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Set up cross-validation:
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Initialize the logistic regression model
model = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=1000, class_weight='balanced')

# Perform cross-validation:
cv_scores = cross_val_score(model, X_scaled, y, cv=kf)

# Print cross-validated scores:
print("Cross-validated scores:", cv_scores)
print("Average cross-validation score:", cv_scores.mean())

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42, stratify=y)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

# Evaluate the model:
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

print("Accuracy of the model on the test set:", accuracy)
print("")
print("Confusion Matrix:\n", conf_matrix)

# Reset the index of y_test to avoid index mismatch errors
y_test = y_test.reset_index(drop=True)
y_pred = pd.Series(y_pred)

# Create a DataFrame with the test set and predictions, and undo the scaling:
results_df = pd.DataFrame(scaler.inverse_transform(X_test), columns=['cerebellumL_x', 'cerebellumR_x'])
results_df = results_df.reset_index(drop=True)  # Reset the index to align with y_test
results_df['Actual'] = y_test
results_df['Predicted'] = y_pred

# Filter the DataFrame to only include misclassified instances:
misclassified = results_df[results_df['Actual'] != results_df['Predicted']]

print("")
print("Misclassified instances:")
print(misclassified)

# Save the model and scaler for later use
joblib.dump(model, 'cerebellum_logistic_regression_model.pkl')
joblib.dump(scaler, 'cerebellum_scaler.pkl')


Cross-validated scores: [0.7        0.85       0.8        0.75       0.94736842]
Average cross-validation score: 0.8094736842105263
Accuracy of the model on the test set: 0.85

Confusion Matrix:
 [[6 2 1]
 [0 9 0]
 [0 0 2]]

Misclassified instances:
   cerebellumL_x  cerebellumR_x  Actual  Predicted
4            0.0          221.0       1          2
6            0.0          593.0       1          3
9            0.0           42.0       1          2


['cerebellum_scaler.pkl']

In [5]:
# accuracy, conf tables, etc for each region
# if cerebellum, thalamus, or pca, run bi or uni lateral
# give aspects points
# calculator accuracy, conf tables, etc one strict and one flexible (+- 1)
# calculate accuracy among evalutors 