# MiniLM model

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split

# Load your data
df = pd.read_csv('/content/drive/MyDrive/Research/Dataset/Fakeddit/all_samples (also includes non multimodal)/Original dataset/polarity_results.csv')

# Fill missing text with empty string
df['title'] = df['title'].fillna('')

# Select features
text_col = 'title'
tabular_cols = ['num_comments', 'score', 'upvote_ratio', 'polarity', 'emotion_score']
target_col = '2_way_label'

# Split into train/val/test (64/16/20)
X = df[[text_col] + tabular_cols]
y = df[target_col]

X_temp, X_test, y_temp, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)
X_train, X_val, y_train, y_val = train_test_split(
    X_temp, y_temp, test_size=0.2, random_state=42, stratify=y_temp
)


In [None]:
!pip install transformers torch scikit-learn pandas


Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

Encode text with MiniLM

In [None]:
from sentence_transformers import SentenceTransformer
import time

model = SentenceTransformer('all-MiniLM-L6-v2')

def get_embeddings(text_series):
    start = time.time()
    embeddings = model.encode(
        text_series.tolist(),
        batch_size=256,
        show_progress_bar=True
    )
    print(f"Embedding extraction for {len(text_series)} samples took {((time.time()-start)/60):.2f} minutes")
    return embeddings

# Extract embeddings for each split
train_embeddings = get_embeddings(X_train[text_col])
val_embeddings = get_embeddings(X_val[text_col])
test_embeddings = get_embeddings(X_test[text_col])


Batches:   0%|          | 0/2430 [00:00<?, ?it/s]

Embedding extraction for 621955 samples took 96.58 minutes


Batches:   0%|          | 0/608 [00:00<?, ?it/s]

Embedding extraction for 155489 samples took 23.68 minutes


Batches:   0%|          | 0/760 [00:00<?, ?it/s]

Embedding extraction for 194362 samples took 29.72 minutes


Prepare Tabular Features

In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaler.fit(X_train[tabular_cols])  # Fit only on train

train_tabular = scaler.transform(X_train[tabular_cols])
val_tabular = scaler.transform(X_val[tabular_cols])
test_tabular = scaler.transform(X_test[tabular_cols])


Combine Features

In [None]:
import numpy as np

X_train_all = np.hstack([train_embeddings, train_tabular])
X_val_all = np.hstack([val_embeddings, val_tabular])
X_test_all = np.hstack([test_embeddings, test_tabular])


Train and Validate Model - Random forest

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

clf = RandomForestClassifier(n_estimators=100, random_state=42, n_jobs=-1)
clf.fit(X_train_all, y_train)

# Validation performance
y_val_pred = clf.predict(X_val_all)
print("Validation Results:")
print(classification_report(y_val, y_val_pred))


Validation Results:
              precision    recall  f1-score   support

           0       0.91      0.78      0.84     77934
           1       0.81      0.92      0.86     77555

    accuracy                           0.85    155489
   macro avg       0.86      0.85      0.85    155489
weighted avg       0.86      0.85      0.85    155489



In [None]:
from sklearn.metrics import roc_auc_score

# Get predicted probabilities for the positive class (usually class '1')
probs = clf.predict_proba(X_val_all)[:, 1]

# Calculate AUC
auc_score = roc_auc_score(y_val, probs)
print(f"Validation set AUC: {auc_score:.4f}")


Validation set AUC: 0.9412


Final test evaluation

In [None]:
y_test_pred = clf.predict(X_test_all)
print("Test Results:")
print(classification_report(y_test, y_test_pred))


Test Results:
              precision    recall  f1-score   support

           0       0.91      0.79      0.84     97417
           1       0.81      0.92      0.86     96945

    accuracy                           0.85    194362
   macro avg       0.86      0.85      0.85    194362
weighted avg       0.86      0.85      0.85    194362



In [None]:
from sklearn.metrics import roc_auc_score

# Get predicted probabilities for the positive class (usually class '1')
probs = clf.predict_proba(X_test_all)[:, 1]

# Calculate AUC
auc_score = roc_auc_score(y_test, probs)
print(f"AUC: {auc_score:.4f}")


AUC: 0.9417


# **Test for 100000 data by MiniLM**

MiniLM + Random forest

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split

# Load your data
df = pd.read_csv('/content/drive/MyDrive/Research/Dataset/Fakeddit/sampled_polarity_results_100k.csv')
df = df.sample(n=100000, random_state=42)

# Fill missing text with empty string
df['title'] = df['title'].fillna('')

# Select features
text_col = 'title'
tabular_cols = ['num_comments', 'score', 'upvote_ratio', 'polarity', 'emotion_score']
target_col = '2_way_label'

# Split into train/val/test (64/16/20)
X = df[[text_col] + tabular_cols]
y = df[target_col]

# 75% Train (750k), 15% Val (150k), 10% Test (100k)
X_temp, X_test, y_temp, y_test = train_test_split(X, y, test_size=0.10, random_state=42, stratify=y)
X_train, X_val, y_train, y_val = train_test_split(X_temp, y_temp, test_size=0.15/0.90, random_state=42, stratify=y_temp)



In [None]:
X_temp, X_test, y_temp, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
X_train, X_val, y_train, y_val = train_test_split(X_temp, y_temp, test_size=0.2, random_state=42, stratify=y_temp)

In [None]:
from sentence_transformers import SentenceTransformer
import time
start_time = time.time()
model = SentenceTransformer('all-MiniLM-L6-v2')

def get_embeddings(text_series):
    start = time.time()
    embeddings = model.encode(
        text_series.tolist(),
        batch_size=256,
        show_progress_bar=True
    )
    print(f"Embedding extraction for {len(text_series)} samples took {((time.time()-start)/60):.2f} minutes")
    return embeddings

# Extract embeddings for each split
train_embeddings = get_embeddings(X_train[text_col])
val_embeddings = get_embeddings(X_val[text_col])
test_embeddings = get_embeddings(X_test[text_col])
end_time = time.time()
print(f"⏱️ Total training time: {end_time - start_time:.2f} seconds")


Batches:   0%|          | 0/250 [00:00<?, ?it/s]

Embedding extraction for 64000 samples took 9.03 minutes


Batches:   0%|          | 0/63 [00:00<?, ?it/s]

Embedding extraction for 16000 samples took 2.31 minutes


Batches:   0%|          | 0/79 [00:00<?, ?it/s]

Embedding extraction for 20000 samples took 2.88 minutes
⏱️ Total training time: 854.85 seconds


In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaler.fit(X_train[tabular_cols])  # Fit only on train

train_tabular = scaler.transform(X_train[tabular_cols])
val_tabular = scaler.transform(X_val[tabular_cols])
test_tabular = scaler.transform(X_test[tabular_cols])


In [None]:
import numpy as np

X_train_all = np.hstack([train_embeddings, train_tabular])
X_val_all = np.hstack([val_embeddings, val_tabular])
X_test_all = np.hstack([test_embeddings, test_tabular])


In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

clf = RandomForestClassifier(n_estimators=100, random_state=42, n_jobs=-1)
clf.fit(X_train_all, y_train)

# Validation performance
y_val_pred = clf.predict(X_val_all)
print("Validation Results:")
print(classification_report(y_val, y_val_pred))
from sklearn.metrics import roc_auc_score

# Get predicted probabilities for the positive class (usually class '1')
probs = clf.predict_proba(X_val_all)[:, 1]

# Calculate AUC
auc_score = roc_auc_score(y_val, probs)
print(f"Validation set AUC: {auc_score:.4f}")

Validation Results:
              precision    recall  f1-score   support

           0       0.89      0.76      0.82      7983
           1       0.79      0.91      0.85      8017

    accuracy                           0.83     16000
   macro avg       0.84      0.83      0.83     16000
weighted avg       0.84      0.83      0.83     16000

Validation set AUC: 0.9250


In [None]:
y_test_pred = clf.predict(X_test_all)
print("Test Results:")
print(classification_report(y_test, y_test_pred))
from sklearn.metrics import roc_auc_score

# Get predicted probabilities for the positive class (usually class '1')
probs = clf.predict_proba(X_test_all)[:, 1]

# Calculate AUC
auc_score = roc_auc_score(y_test, probs)
print(f"AUC: {auc_score:.4f}")


Test Results:
              precision    recall  f1-score   support

           0       0.89      0.76      0.82      9979
           1       0.79      0.91      0.84     10021

    accuracy                           0.83     20000
   macro avg       0.84      0.83      0.83     20000
weighted avg       0.84      0.83      0.83     20000

AUC: 0.9243


MiniLM + ReLU based neural network

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split

# Load your data
df = pd.read_csv('/content/drive/MyDrive/Research/Dataset/Fakeddit/sampled_polarity_results_100k.csv')

# Fill missing text with empty string
df['title'] = df['title'].fillna('')

# Select features
text_col = 'title'
tabular_cols = ['num_comments', 'score', 'upvote_ratio', 'polarity', 'emotion_score']
target_col = '2_way_label'


# Split into train/val/test (64/16/20)
X = df[[text_col] + tabular_cols]
y = df[target_col]

# 75% Train (750k), 15% Val (150k), 10% Test (100k)
X_temp, X_test, y_temp, y_test = train_test_split(X, y, test_size=0.10, random_state=42, stratify=y)
X_train, X_val, y_train, y_val = train_test_split(X_temp, y_temp, test_size=0.15/0.90, random_state=42, stratify=y_temp)

In [None]:
from sentence_transformers import SentenceTransformer
import time
start_time = time.time()
model = SentenceTransformer('all-MiniLM-L6-v2')

def get_embeddings(text_series):
    start = time.time()
    embeddings = model.encode(
        text_series.tolist(),
        batch_size=256,
        show_progress_bar=True
    )
    print(f"Embedding extraction for {len(text_series)} samples took {((time.time()-start)/60):.2f} minutes")
    return embeddings

# Extract embeddings for each split
train_embeddings = get_embeddings(X_train[text_col])
val_embeddings = get_embeddings(X_val[text_col])
test_embeddings = get_embeddings(X_test[text_col])
end_time = time.time()
print(f"⏱️ Total training time: {end_time - start_time:.2f} seconds")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Batches:   0%|          | 0/293 [00:00<?, ?it/s]

Embedding extraction for 75000 samples took 11.17 minutes


Batches:   0%|          | 0/59 [00:00<?, ?it/s]

Embedding extraction for 15000 samples took 2.31 minutes


Batches:   0%|          | 0/40 [00:00<?, ?it/s]

Embedding extraction for 10000 samples took 1.48 minutes
⏱️ Total training time: 913.54 seconds


In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaler.fit(X_train[tabular_cols])  # Fit only on train

train_tabular = scaler.transform(X_train[tabular_cols])
val_tabular = scaler.transform(X_val[tabular_cols])
test_tabular = scaler.transform(X_test[tabular_cols])


In [None]:
import numpy as np

X_train_all = np.hstack([train_embeddings, train_tabular])
X_val_all = np.hstack([val_embeddings, val_tabular])
X_test_all = np.hstack([test_embeddings, test_tabular])


In [None]:
# Replace NaNs with 0.0, and Infs with large finite numbers
X_train_all = np.nan_to_num(X_train_all, nan=0.0, posinf=1e5, neginf=-1e5)
X_val_all = np.nan_to_num(X_val_all, nan=0.0, posinf=1e5, neginf=-1e5)
X_test_all = np.nan_to_num(X_test_all, nan=0.0, posinf=1e5, neginf=-1e5)


In [None]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, BatchNormalization, Dropout
from tensorflow.keras.optimizers import Adam
# Ensure clean targets


input_layer = Input(shape=(X_train_all.shape[1],))
x = Dense(128, activation='relu')(input_layer)
x = Dropout(0.3)(x)
x = Dense(64, activation='relu')(x)
x = Dropout(0.2)(x)
output = Dense(1, activation='sigmoid')(x)

relu_model = Model(inputs=input_layer, outputs=output)
relu_model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])

# Train
relu_model.fit(X_train_all, y_train, validation_data=(X_val_all, y_val), epochs=5, batch_size=256)


Epoch 1/5
[1m293/293[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 8ms/step - accuracy: 0.7496 - loss: 0.5151 - val_accuracy: 0.8363 - val_loss: 0.3658
Epoch 2/5
[1m293/293[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step - accuracy: 0.8425 - loss: 0.3593 - val_accuracy: 0.8525 - val_loss: 0.3364
Epoch 3/5
[1m293/293[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 6ms/step - accuracy: 0.8548 - loss: 0.3303 - val_accuracy: 0.8607 - val_loss: 0.3207
Epoch 4/5
[1m293/293[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - accuracy: 0.8675 - loss: 0.3058 - val_accuracy: 0.8666 - val_loss: 0.3120
Epoch 5/5
[1m293/293[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.8767 - loss: 0.2913 - val_accuracy: 0.8722 - val_loss: 0.3038


<keras.src.callbacks.history.History at 0x7bbc717ccc90>

In [None]:
# === Validation Set Evaluation ===
val_probs = relu_model.predict(X_val_all, batch_size=256)
val_preds = (val_probs > 0.5).astype(int)

print("\nValidation Classification Report:")
print(classification_report(y_val, val_preds))

if np.isnan(val_probs).any():
    print("⚠️ Warning: NaNs found in validation probabilities. Skipping AUC.")
else:
    val_auc = roc_auc_score(y_val, val_probs)
    print(f"Validation AUC: {val_auc:.4f}")

# === Test Set Evaluation ===
test_probs = relu_model.predict(X_test_all, batch_size=256)
test_preds = (test_probs > 0.5).astype(int)

print("\nTest Classification Report:")
print(classification_report(y_test, test_preds))

if np.isnan(test_probs).any():
    print("⚠️ Warning: NaNs found in test probabilities. Skipping AUC.")
else:
    test_auc = roc_auc_score(y_test, test_probs)
    print(f"Test AUC: {test_auc:.4f}")


[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step

Validation Classification Report:
              precision    recall  f1-score   support

         0.0       0.89      0.85      0.87      7484
         1.0       0.86      0.89      0.87      7516

    accuracy                           0.87     15000
   macro avg       0.87      0.87      0.87     15000
weighted avg       0.87      0.87      0.87     15000

Validation AUC: 0.9433
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step

Test Classification Report:
              precision    recall  f1-score   support

         0.0       0.88      0.85      0.86      4990
         1.0       0.85      0.88      0.87      5010

    accuracy                           0.87     10000
   macro avg       0.87      0.87      0.87     10000
weighted avg       0.87      0.87      0.87     10000

Test AUC: 0.9409


# BERT + numeric hybrid model

In [None]:
import pandas as pd
import numpy as np
import torch
import time
from torch import nn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer, BertModel


In [None]:
# df = pd.read_csv('/content/drive/MyDrive/Research/Dataset/Fakeddit/all_samples (also includes non multimodal)/Original dataset/polarity_results.csv')
# df = df.sample(n=100000, random_state=42)

Preprocess Data

In [None]:
df = df.dropna(subset=['title'])
numeric_cols = ['num_comments', 'score', 'upvote_ratio', 'polarity', 'emotion_score']
df[numeric_cols] = df[numeric_cols].fillna(0)

X_text = df['title'].values
X_numeric = df[numeric_cols].values
y = df['2_way_label'].values

# First: 85% train+val, 15% test
X_text_tv, X_text_test, X_num_tv, X_num_test, y_tv, y_test = train_test_split(
    X_text, X_numeric, y, test_size=0.15, stratify=y, random_state=42
)

# Then: 82.35% train, 17.65% val (which is 70/15 split from total)
X_text_train, X_text_val, X_num_train, X_num_val, y_train, y_val = train_test_split(
    X_text_tv, X_num_tv, y_tv, test_size=0.1765, stratify=y_tv, random_state=42
)


Tokenizer & Dataset Class

In [None]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

class FakeNewsDataset(Dataset):
    def __init__(self, texts, numerics, labels):
        self.texts = texts
        self.numerics = numerics
        self.labels = labels

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        tokens = tokenizer(
            self.texts[idx],
            padding='max_length',
            truncation=True,
            max_length=32,
            return_tensors="pt"
        )
        return {
            'input_ids': tokens['input_ids'].squeeze(0),
            'attention_mask': tokens['attention_mask'].squeeze(0),
            'numerics': torch.tensor(self.numerics[idx], dtype=torch.float32),
            'label': torch.tensor(self.labels[idx], dtype=torch.float32)
        }




tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [None]:
train_dataset = FakeNewsDataset(X_text_train, X_num_train, y_train)
val_dataset   = FakeNewsDataset(X_text_val, X_num_val, y_val)
test_dataset  = FakeNewsDataset(X_text_test, X_num_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader   = DataLoader(val_dataset, batch_size=64)
test_loader  = DataLoader(test_dataset, batch_size=64)


Define Model

In [None]:
class HybridBERTModel(nn.Module):
    def __init__(self, numeric_input_dim):
        super(HybridBERTModel, self).__init__()
        self.bert = BertModel.from_pretrained('bert-base-uncased')
        self.dropout = nn.Dropout(0.3)
        self.fc1 = nn.Linear(768 + numeric_input_dim, 128)
        self.fc2 = nn.Linear(128, 1)
        self.relu = nn.ReLU()

    def forward(self, input_ids, attention_mask, numerics):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        cls_output = outputs.last_hidden_state[:, 0, :]
        combined = torch.cat((cls_output, numerics), dim=1)
        x = self.relu(self.fc1(self.dropout(combined)))
        return torch.sigmoid(self.fc2(x))


Train Model with Time Analysis

In [None]:
model = HybridBERTModel(numeric_input_dim=X_num_train.shape[1])
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=2e-5)

start_time = time.time()

for epoch in range(3):
    model.train()
    epoch_loss = 0
    for batch in train_loader:
        optimizer.zero_grad()
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        numerics = batch['numerics'].to(device)
        labels = batch['label'].to(device).unsqueeze(1)

        outputs = model(input_ids, attention_mask, numerics)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()

    print(f"Epoch {epoch+1} Loss: {epoch_loss:.4f}")

end_time = time.time()
print(f"⏱️ Total training time: {end_time - start_time:.2f} seconds")


model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Epoch 1 Loss: 375.0077
Epoch 2 Loss: 267.3652
Epoch 3 Loss: 186.0791
⏱️ Total training time: 1177.74 seconds


In [None]:
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, precision_score, recall_score


In [None]:
def evaluate_model(model, dataloader, device):
    model.eval()
    all_labels = []
    all_preds = []
    all_probs = []

    with torch.no_grad():
        for batch in dataloader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            numerics = batch['numerics'].to(device)
            labels = batch['label'].to(device)

            outputs = model(input_ids, attention_mask, numerics)
            probs = outputs.squeeze().cpu().numpy()
            preds = (probs > 0.5).astype(int)
            labels = labels.cpu().numpy()

            all_labels.extend(labels)
            all_preds.extend(preds)
            all_probs.extend(probs)

    acc = accuracy_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds)
    auc = roc_auc_score(all_labels, all_probs)
    precision = precision_score(all_labels, all_preds)
    recall = recall_score(all_labels, all_preds)

    return {
        'Accuracy': acc,
        'F1 Score': f1,
        'AUC': auc,
        'Precision': precision,
        'Recall': recall
    }


In [None]:
val_metrics = evaluate_model(model, val_loader, device)
test_metrics = evaluate_model(model, test_loader, device)

print("\n📊 Validation Set Metrics:")
for k, v in val_metrics.items():
    print(f"{k}: {v:.4f}")

print("\n📊 Test Set Metrics:")
for k, v in test_metrics.items():
    print(f"{k}: {v:.4f}")



📊 Validation Set Metrics:
Accuracy: 0.8905
F1 Score: 0.8927
AUC: 0.9560
Precision: 0.8768
Recall: 0.9091

📊 Test Set Metrics:
Accuracy: 0.8839
F1 Score: 0.8856
AUC: 0.9541
Precision: 0.8747
Recall: 0.8969
