<a href="https://colab.research.google.com/github/davidarvai/MRI-Image-Viewer/blob/main/XGBOOSTKod.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
import numpy as np
import os
import xgboost as xgb
from sklearn.metrics import confusion_matrix
import pickle
import csv

# Paths
pathSection = '/content/drive/My Drive/Allamvizsga/Teszt_folder'
pathSection1To7 = '/content/drive/My Drive/Allamvizsga/Teszt_folder/hg000.csv'
output_text_file = '/content/drive/My Drive/Allamvizsga/Eredmeny/output_metrics.txt'
output_csv_file = '/content/drive/My Drive/Allamvizsga/Eredmeny/output.csv'

# Header for CSV file
header = ['volumeName', 'tumorType', 'truePositive', 'trueNegative', 'falsePositive', 'falseNegative',
          'truePositiveRate', 'trueNegativeRate', 'positivePredictiveValue',
          'negativePredictiveValue', 'accuracy', 'diceScore']

# Initialize result tracking
res_list_class_elso = {}

# Initialize output files
with open(output_csv_file, 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(header)

# Gather all CSV paths
pathSectionArray = [os.path.join(dirpath, filename)
                    for dirpath, dirnames, filenames in os.walk(pathSection)
                    for filename in filenames if filename.endswith('.csv')]
counterArray = len(pathSectionArray)

# Load training data
dataX = pd.read_csv(pathSection1To7)
dfX = pd.DataFrame(dataX)
arrayX = dfX.values

# Features and labels
X_train = arrayX[:, 5:]  # Features from 6th column onward
y_train = arrayX[:, 0]   # Labels are in the first column
y_train[y_train == 4] = 3  # Adjust label "4" to "3"

# Train/test split
X_test = X_train  # Here, we're using the same data for testing (you should ideally use separate test data)
y_test = y_train

# XGBoost model
xgb_model = xgb.XGBClassifier(
    objective="multi:softprob",  # Multi-class probabilities
    random_state=42,
    tree_method='hist',
    max_bin=16,
    learning_rate=0.05,
    n_estimators=2000,
    max_depth=3,
    min_child_weight=1,
    gamma=0.2,
    reg_alpha=0.1,
    subsample=0.5,
    colsample_bytree=0.95,
    seed=27
)
xgb_model.fit(X_train, y_train)

# Save the model
fileNameSaveModel = "/content/drive/My Drive/Allamvizsga/Eredmeny/XGBModel100KSection2To7Without1Data.pickle"
pickle.dump(xgb_model, open(fileNameSaveModel, "wb"))

# Metrics calculation
def calculate_metrics(tumorType, TN, FP, FN, TP, counter):
    TPR = round(TP / (TP + FN), 3) if TP + FN > 0 else 0
    TNR = round(TN / (TN + FP), 3) if TN + FP > 0 else 0
    PPV = round(TP / (TP + FP), 3) if TP + FP > 0 else 0
    NPV = round(TN / (TN + FN), 3) if TN + FN > 0 else 0
    ACC = round((TP + TN) / (TP + FP + FN + TN), 3)
    DS = round((2 * TP) / ((2 * TP) + FP + FN), 3) if (2 * TP) + FP + FN > 0 else 0

    fileName = res_list_class_elso['volumeName']
    classRow = [fileName, tumorType, TP, TN, FP, FN, TPR, TNR, PPV, NPV, ACC, DS]

    with open(output_csv_file, 'a+', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(classRow)

    with open(output_text_file, 'a') as txt_file:
        txt_file.write(f"\n=== {tumorType} Metrics for file: {fileName} ===\n")
        txt_file.write(f"True positive (TP): {TP}\n")
        txt_file.write(f"True negative (TN): {TN}\n")
        txt_file.write(f"False positive (FP): {FP}\n")
        txt_file.write(f"False negative (FN): {FN}\n")
        txt_file.write(f"True positive rate (TPR): {TPR:.3f}\n")
        txt_file.write(f"True negative rate (TNR): {TNR:.3f}\n")
        txt_file.write(f"Positive predictive value (PPV): {PPV:.3f}\n")
        txt_file.write(f"Negative predictive value (NPV): {NPV:.3f}\n")
        txt_file.write(f"Accuracy (ACC): {ACC:.3f}\n")
        txt_file.write(f"Dice score (DS): {DS:.3f}\n\n")

def evaluate_model(counter):
    fileNameY = pathSectionArray[counter].split('/')
    volumeName = fileNameY[-1]
    res_list_class_elso['volumeName'] = volumeName

    with open(output_text_file, 'a') as txt_file:
        txt_file.write(f"FileName: {volumeName}\n")

    dataY = pd.read_csv(pathSectionArray[counter])
    dfY = pd.DataFrame(dataY)
    arrayY = dfY.values

    y_true = arrayY[:, 0]  # True labels
    y_true[y_true == 4] = 3
    X_test = arrayY[:, 5:]  # Test data

    y_pred = xgb_model.predict(X_test)  # Predictions

    # Calculate confusion matrix
    CM = confusion_matrix(y_true, y_pred, labels=[0, 1, 2, 3])  # Explicit labels

    # Print and save confusion matrix
    print(f"Confusion Matrix for {volumeName}:\n{CM}\n")
    with open(output_text_file, 'a') as txt_file:
        txt_file.write(f"Confusion Matrix for {volumeName}:\n{CM}\n\n")

    # Tumor metrics
    tumor_types = ["Whole Tumor", "Edema", "Tumor Core", "Enhancing Core"]

    for i, tumorType in enumerate(tumor_types):
        TP = CM[i, i]
        FP = CM[i, :].sum() - TP
        FN = CM[:, i].sum() - TP
        TN = CM.sum() - TP - FP - FN

        calculate_metrics(tumorType, TN, FP, FN, TP, counter)


# Loop through test datasets
for i in range(counterArray):
    evaluate_model(i)
