In [10]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler

In [3]:
data = pd.read_csv('data.csv', index_col=0)
data.rename(columns={"cerebellum_L": "cerebellumL", "cerebellum_R": "cerebellumR", "thalamus_L": "ThalamusL", "thalamus_R": "ThalamusR", "ponsmedulla": "pons"}, inplace=True)
data = data.sort_index(axis=1)
output = pd.read_csv('output.csv', index_col=0)
output = output.fillna(0)
output = output.replace("x", 1)
output = output.sort_index(axis=1)

In [6]:
output = output[output.index.isin(data.index)]


In [7]:
X_train, X_test, y_train, y_test = train_test_split(data, output, test_size=0.2, random_state=42)

In [9]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [11]:
# Initialize the classifier
clf = DecisionTreeClassifier(random_state=42)

# Train the classifier on the training data
# Since we might have multiple columns (parts of the brain) to predict for, we'll need to fit separate models
# For simplicity, let's assume we're building a separate model for each part of the brain
models = {}
for column in y_train.columns:
    # Train a model for each part of the brain
    model = DecisionTreeClassifier(random_state=42)
    model.fit(X_train_scaled, y_train[column])
    models[column] = model

In [12]:
# Now predict for each part and store the results
predictions = {}
for column in models:
    # Predict using the model trained for this part of the brain
    predictions[column] = models[column].predict(X_test_scaled)

# Convert predictions to a DataFrame for easy comparison
predictions_df = pd.DataFrame(predictions, index=X_test.index)

# Compare predictions with the true output
for column in predictions_df.columns:
    print(f"Evaluating salvageability for {column}:")
    accuracy = accuracy_score(y_test[column], predictions_df[column])
    print(f"Accuracy for {column}: {accuracy:.2f}")
    print(classification_report(y_test[column], predictions_df[column]))

Evaluating salvageability for PCAL:
Accuracy for PCAL: 0.62
              precision    recall  f1-score   support

           0       1.00      0.57      0.73         7
           1       0.25      1.00      0.40         1

    accuracy                           0.62         8
   macro avg       0.62      0.79      0.56         8
weighted avg       0.91      0.62      0.69         8

Evaluating salvageability for PCAR:
Accuracy for PCAR: 0.38
              precision    recall  f1-score   support

           0       0.67      0.33      0.44         6
           1       0.20      0.50      0.29         2

    accuracy                           0.38         8
   macro avg       0.43      0.42      0.37         8
weighted avg       0.55      0.38      0.40         8

Evaluating salvageability for ThalamusL:
Accuracy for ThalamusL: 1.00
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         8

    accuracy                           1.00   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
