In [2]:
import pandas as pd

df = pd.read_csv('complete_vol_visual_jun24.csv')

In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score, KFold
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC

# Assuming 'df' is your DataFrame
PCA = df[df['PCA'] != 0]

# Recoding the target variable
PCA['PCA'] = PCA['PCA'].replace({1: 0, 2: 0, 3: 1})

X = pd.concat([PCA['PCAL_x'], PCA['PCAR_x']], axis=1)
y = PCA['PCA']

# Set up cross-validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Configure the Logistic Regression model for binary classification
model = SVC(kernel='rbf', class_weight='balanced', gamma='auto')

# Perform cross-validation
cv_scores = cross_val_score(model, X, y, cv=kf)

# Print cross-validated scores
print("Cross-validated scores:", cv_scores)
print("Average cross-validation score:", cv_scores.mean())

# Splitting the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Fitting the model
model.fit(X_train, y_train)

# Making predictions
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

print("Accuracy of the model on the test set:", accuracy)
print("")
print("Confusion Matrix:\n", conf_matrix)

# Create a DataFrame with the test set and predictions
results_df = pd.DataFrame(X_test)
results_df['Actual'] = y_test
results_df['Predicted'] = y_pred

# Filter the DataFrame to only include misclassified instances
misclassified = results_df[results_df['Actual'] != results_df['Predicted']]

print("")
print("Misclassified instances:")
print(misclassified)

Cross-validated scores: [0.92857143 0.92857143 1.         0.85185185 0.85185185]
Average cross-validation score: 0.9121693121693122
Accuracy of the model on the test set: 0.9285714285714286

Confusion Matrix:
 [[26  0]
 [ 2  0]]

Misclassified instances:
     PCAL_x  PCAR_x  Actual  Predicted
415    2564    2301       1          0
468    2587    2444       1          0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  PCA['PCA'] = PCA['PCA'].replace({1: 0, 2: 0, 3: 1})


### Misclassified PCA IDs
#### sub-2bd4552c
#### sub-1b7deaf6

In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score, KFold
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.preprocessing import StandardScaler

Thalamus = df
Thalamus = Thalamus[Thalamus['Thalamus'] != 0]

X = pd.concat([Thalamus['ThalamusL_x'], Thalamus['ThalamusR_x']], axis=1)
y = Thalamus['Thalamus']

# Set up cross-validation:
kf = KFold(n_splits=5, shuffle=True, random_state=42)

model = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=1000, class_weight='balanced')

# Perform cross-validation:
cv_scores = cross_val_score(model, X, y, cv=kf)

# Print cross-validated scores:
print("Cross-validated scores:", cv_scores)
print("Average cross-validation score:", cv_scores.mean())

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

# Evaluate the model:
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

print("Accuracy of the model on the test set:", accuracy)
print("")
print("Confusion Matrix:\n", conf_matrix)

# Create a DataFrame with the test set and predictions:
results_df = pd.DataFrame(X_test)
results_df['Actual'] = y_test
results_df['Predicted'] = y_pred

# Filter the DataFrame to only include misclassified instances:
misclassified = results_df[results_df['Actual'] != results_df['Predicted']]

print("")
print("Misclassified instances:")
print(misclassified)


Cross-validated scores: [0.90625 1.      1.      1.      1.     ]
Average cross-validation score: 0.98125
Accuracy of the model on the test set: 0.96875

Confusion Matrix:
 [[16  0  0]
 [ 0 15  0]
 [ 1  0  0]]

Misclassified instances:
     ThalamusL_x  ThalamusR_x  Actual  Predicted
465         2876          729       3          1


# 2b191b6c

In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score, KFold
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.preprocessing import StandardScaler

cerebellum = df
cerebellum = cerebellum[cerebellum['cerebellum'] != 0]

X = pd.concat([cerebellum['cerebellumL_x'], cerebellum['cerebellumR_x']], axis=1)
y = cerebellum['cerebellum']

# Set up cross-validation:
kf = KFold(n_splits=5, shuffle=True, random_state=42)

model = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=1000, class_weight='balanced')

# Perform cross-validation:
cv_scores = cross_val_score(model, X, y, cv=kf)

# Print cross-validated scores:
print("Cross-validated scores:", cv_scores)
print("Average cross-validation score:", cv_scores.mean())

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

# Evaluate the model:
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

print("Accuracy of the model on the test set:", accuracy)
print("")
print("Confusion Matrix:\n", conf_matrix)

# Create a DataFrame with the test set and predictions:
results_df = pd.DataFrame(X_test)
results_df['Actual'] = y_test
results_df['Predicted'] = y_pred

# Filter the DataFrame to only include misclassified instances:
misclassified = results_df[results_df['Actual'] != results_df['Predicted']]

print("")
print("Misclassified instances:")
print(misclassified)

Cross-validated scores: [0.95       0.9        1.         1.         0.94736842]
Average cross-validation score: 0.9594736842105263
Accuracy of the model on the test set: 1.0

Confusion Matrix:
 [[9 0 0]
 [0 9 0]
 [0 0 2]]

Misclassified instances:
Empty DataFrame
Columns: [cerebellumL_x, cerebellumR_x, Actual, Predicted]
Index: []
