<h1><center>Evaluation </center></h1>

In [None]:
import pandas as pd
import torch
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from transformers import DataCollatorWithPadding
from datasets import Dataset
import re
import numpy as np
from sklearn.metrics import confusion_matrix

In [17]:
# Load the dataset
df = pd.read_csv("dementia_dataset_6.csv")

# Ensure Severity is treated as a categorical label
df['Severity'] = df['Severity'].astype(int)

In [18]:
# Enhanced text cleaning function
def clean_text(text):
    text = str(text).lower()
    text = re.sub(r"\s+", " ", text)  # Preserve punctuation
    return text.strip()

In [19]:
# Apply cleaning to text column
df["Transcript_CTD"] = df["Transcript_CTD"].fillna("").apply(clean_text)
df["Transcript_PFT"] = df["Transcript_PFT"].fillna("").apply(clean_text)
df["Transcript_SFT"] = df["Transcript_SFT"].fillna("").apply(clean_text)

# Combine the transcripts into a single text column
df['Text'] = df['Transcript_CTD'] + " " + df['Transcript_PFT'] + " " + df['Transcript_SFT']


In [20]:
# Split the dataset into train and test sets
train_texts, test_texts, train_labels, test_labels = train_test_split(
    df['Text'], df['Severity'], test_size=0.2, random_state=42
)

In [21]:
# Load the BERT tokenizer
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

# Tokenize the text data
train_encodings = tokenizer(list(train_texts), truncation=True, padding=True, max_length=256)
test_encodings = tokenizer(list(test_texts), truncation=True, padding=True, max_length=256)


In [22]:
# Convert the data into PyTorch datasets
class DementiaDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item


In [23]:
train_dataset = DementiaDataset(train_encodings, list(train_labels))
test_dataset = DementiaDataset(test_encodings, list(test_labels))

# Load the BERT model for multi-class classification
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=4)  # 4 classes: 0, 1, 2, 3


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [24]:
# Define training arguments
training_args = TrainingArguments(
    num_train_epochs=8,  
    warmup_ratio=0.1,  
    per_device_train_batch_size=8,  
    gradient_accumulation_steps=2,
    learning_rate=2e-5,
)

In [25]:
# Define a data collator for padding
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

# Define the evaluation metric
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = torch.argmax(torch.tensor(logits), dim=-1)
    accuracy = accuracy_score(labels, predictions)
    report = classification_report(labels, predictions, output_dict=True)
    conf_matrix = confusion_matrix(labels, predictions)
    
    return {
        "accuracy": accuracy,
        "classification_report": report,
        "confusion_matrix": conf_matrix.tolist()  # Convert to list for JSON serialization
    }

In [26]:
# Initialize the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics
)

  trainer = Trainer(


In [27]:
# Print the number of entries per cluster in training set
train_label_counts = pd.Series(train_labels).value_counts()
print("Number of entries per cluster in training:")
print(train_label_counts)

# Print the number of entries per cluster in testing set
test_label_counts = pd.Series(test_labels).value_counts()
print("Number of entries per cluster in testing:")
print(test_label_counts)


Number of entries per cluster in training:
Severity
0    288
1    210
2     81
3     41
Name: count, dtype: int64
Number of entries per cluster in testing:
Severity
0    72
1    56
2    18
3     9
Name: count, dtype: int64


In [28]:
# Train the model
trainer.train()

# Evaluate the model
results = trainer.evaluate()

Step,Training Loss


In [29]:
print("Evaluation Results:")
print(results["eval_classification_report"])
print("\nConfusion Matrix:")
print(results["eval_confusion_matrix"])
print(f"\nAccuracy: {results['eval_accuracy']}")


Evaluation Results:
{'0': {'precision': 0.9193548387096774, 'recall': 0.7916666666666666, 'f1-score': 0.8507462686567164, 'support': 72.0}, '1': {'precision': 0.8928571428571429, 'recall': 0.8928571428571429, 'f1-score': 0.8928571428571429, 'support': 56.0}, '2': {'precision': 0.48148148148148145, 'recall': 0.7222222222222222, 'f1-score': 0.5777777777777777, 'support': 18.0}, '3': {'precision': 0.6, 'recall': 0.6666666666666666, 'f1-score': 0.631578947368421, 'support': 9.0}, 'accuracy': 0.8129032258064516, 'macro avg': {'precision': 0.7234233657620754, 'recall': 0.7683531746031745, 'f1-score': 0.7382400341650146, 'support': 155.0}, 'weighted avg': {'precision': 0.8403884842178286, 'recall': 0.8129032258064516, 'f1-score': 0.8215351088361251, 'support': 155.0}}

Confusion Matrix:
[[57, 3, 12, 0], [1, 50, 1, 4], [4, 1, 13, 0], [0, 2, 1, 6]]

Accuracy: 0.8129032258064516
