In [2]:
# Step 1: Import necessary libraries

import os  # To work with file paths
import nibabel as nib  # To load NIfTI format files
import pandas as pd  # To load and work with CSV files
import numpy as np  # To work with arrays and numerical data
from sklearn.model_selection import train_test_split  # To split the data into training and testing sets
from sklearn.tree import DecisionTreeClassifier  # To create and train the decision tree model
from sklearn.metrics import accuracy_score, confusion_matrix  # To evaluate the performance of the model
from graphviz import Source  # To visualize the decision tree
from nilearn.image import resample_img  # To resample the images to a common voxel size

In [3]:
# Step 2: Load the data

# Set the path to the folder containing the NIfTI images
data_folder = "C:/Users/Juan A. Arias/Desktop/TFM/PETmasked"
nifti_data = []

# Loop through all files in the folder and load the ones with the .hdr extension
for filename in os.listdir(data_folder):
    if filename.endswith(".hdr"):
        nifti_file = os.path.join(data_folder, filename)
        nifti_data.append(nib.load(nifti_file).get_fdata())
        
# Concatenate the image data into a single array along the last axis
nifti_array = np.concatenate(nifti_data, axis=-1)

In [4]:
print(nifti_array[12, 29, 100])

0.0


In [4]:
# Slice the 3D data to get the 2D data at Z=30
nifti_array_z30 = nifti_array[:, :, 30]

Necesario testear con varias coordenadas la correspondencia entre el nifti cargado por Jupyter con el nifti visto desde MRIcro

In [5]:
# Flatten each 2D image into a 1D array and transpose to create a single row per patient
nifti_array_flat = np.reshape(nifti_array_z30, (nifti_array_z30.shape[0] * nifti_array_z30.shape[1], -1)).T
nifti_array_flat.shape

(1, 7505)

In [6]:
# read in demographics data
demographic_data = pd.read_csv("Demographics.csv", sep=";")

# Keep only the 'Group' variable
demographic_data = demographic_data[["Group"]]
demographic_data["Group"] = pd.Categorical(demographic_data["Group"])

Cargar también la edad y el sexo como variables demográficas para darle un poco más de juego al DecisionTree

In [7]:
# Combine the demographic data with the flattened neuroimaging data
combined_data = pd.concat([demographic_data, pd.DataFrame(nifti_array_flat)], axis=1)

# Convert the 'Group' column to object type
combined_data['Group'] = combined_data['Group'].astype('object')

# Replace all NaN values with 0
combined_data = combined_data.fillna(0)

combined_data

Unnamed: 0,Group,0,1,2,3,4,5,6,7,8,...,7495,7496,7497,7498,7499,7500,7501,7502,7503,7504
0,AD,0.0,0.0,-0.0,-0.0,-0.0,0.0,0.0,-0.0,-0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.0,-0.0,0.0
1,AD,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,AD,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,CN,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,CN,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
121,CN,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
122,CN,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
123,CN,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
124,CN,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [8]:
# Step 3: Divide the data into train and test sets

X = combined_data.iloc[:, 1:]
y = combined_data["Group"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [9]:
# Step 4: Train the decision tree

# Create the decision tree model
clf = DecisionTreeClassifier(random_state=42)  # hyperparameters posibles de tuneo: max_depth, min_samples_split, etc.

# Train the model with the training data
clf.fit(X_train, y_train)

In [10]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV

# Define los hiperparámetros y sus posibles valores
param_grid = {
    'criterion': ['gini', 'entropy'],
    'splitter': ['best', 'random'],
    'max_depth': [None, 10, 20, 30, 40, 50],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'max_features': [None, 'sqrt', 'log2']
}


In [25]:

# Crea un modelo de árbol de decisión
tree = DecisionTreeClassifier()

# Utiliza GridSearchCV para encontrar la mejor combinación de hiperparámetros
grid_search = GridSearchCV(tree, param_grid, cv=10, scoring='accuracy', n_jobs=-1)
grid_search.fit(X_train, y_train)

# Muestra los mejores hiperparámetros encontrados
print("Mejores hiperparámetros: ", grid_search.best_params_)


Mejores hiperparámetros:  {'criterion': 'gini', 'max_depth': None, 'max_features': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'splitter': 'best'}


In [27]:

# Entrena el árbol de decisión con los mejores hiperparámetros
best_tree = grid_search.best_estimator_
best_tree.fit(X_train, y_train)


In [28]:
# Evalúa el rendimiento del modelo ajustado
y_pred = best_tree.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
print("Accuracy: ", accuracy)
print("Confusion Matrix: \n", conf_matrix)

Accuracy:  0.5769230769230769
Confusion Matrix: 
 [[ 0 11]
 [ 0 15]]


[[Verdaderos Negativos (VN), Falsos Positivos (FP)],
 [Falsos Negativos (FN), Verdaderos Positivos (VP)]]
 
 Recuerda que es sobre los 26 pacientes de test.

In [29]:
from sklearn.tree import export_graphviz
from graphviz import Source

# Entrena el árbol de decisión con los mejores hiperparámetros
best_tree = grid_search.best_estimator_

# Exporta el árbol de decisión como un archivo Graphviz
dot_data = export_graphviz(best_tree, out_file=None, feature_names=X.columns, class_names=best_tree.classes_, filled=True, rounded=True)

# Crea un objeto Graphviz a partir de los datos del archivo DOT y visualízalo
graph = Source(dot_data)
graph.render("best_decision_tree")
graph.view()


Error: Could not open "best_decision_tree.pdf" for writing : Permission denied


CalledProcessError: Command '[WindowsPath('dot'), '-Kdot', '-Tpdf', '-O', 'best_decision_tree']' returned non-zero exit status 1. [stderr: b'Error: Could not open "best_decision_tree.pdf" for writing : Permission denied\r\n']