# 1. Setup and Configuration

In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence
from collections import Counter, defaultdict
from sklearn.model_selection import train_test_split 
from sklearn.metrics import (
    accuracy_score,
    f1_score,
    precision_score,
    recall_score,
    classification_report,
    confusion_matrix
    
)
import time
import os
import joblib
import logging
import warnings
import gc
import psutil

# --- Basic Configuration ---
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
warnings.filterwarnings('ignore', category=UserWarning)
warnings.filterwarnings('ignore', category=FutureWarning)

# --- Limit CPU Usage ---
p = psutil.Process()
p.cpu_affinity([1, 2, 3, 4, 5, 6, 7])

In [2]:
# --- Project Directory Structure ---
BASE_DIR = ".." 
DATA_DIR = os.path.join(BASE_DIR, "data", "processed")

MODEL_OUTPUT_BASE_DIR = os.path.join(BASE_DIR, "models", "dl")
RESULT_DIR = os.path.join(BASE_DIR, "result")

# --- Specific Dataset Paths ---
BOOK_REVIEW_DATA_DIR = os.path.join(DATA_DIR, "book_reviews")
FINANCIAL_NEWS_DATA_DIR = os.path.join(DATA_DIR, "financial_news")

# --- Model/Result Output Dirs (Ensure they exist) ---
BOOK_REVIEW_MODEL_DIR = os.path.join(MODEL_OUTPUT_BASE_DIR, "book_reviews")
FINANCIAL_NEWS_MODEL_DIR = os.path.join(MODEL_OUTPUT_BASE_DIR, "financial_news")
BOOK_REVIEW_RESULT_DIR = os.path.join(RESULT_DIR, "book_reviews")
FINANCIAL_NEWS_RESULT_DIR = os.path.join(RESULT_DIR, "financial_news")

os.makedirs(BOOK_REVIEW_MODEL_DIR, exist_ok=True)
os.makedirs(FINANCIAL_NEWS_MODEL_DIR, exist_ok=True)
os.makedirs(BOOK_REVIEW_RESULT_DIR, exist_ok=True)
os.makedirs(FINANCIAL_NEWS_RESULT_DIR, exist_ok=True)

# --- GloVe Path ---
GLOVE_PATH = os.path.join(BASE_DIR, "data", "embeddings", "glove.6B.100d.txt")

# --- File Names ---
TRAIN_FN = "train.csv"
VAL_FN = "val.csv"
TEST_FN = "test.csv"

# --- Column Names ---
TEXT_COLUMN = "text"
TARGET_COLUMN = "score"

In [6]:
# --- Model & Training Hyperparameters ---
RANDOM_STATE = 42
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
logging.info(f"Using device: {DEVICE}")

# Vocabulary params
MIN_WORD_FREQ = 3 # Minimum frequency for a word to be included in the vocabulary

# Embedding params
EMBEDDING_DIM = 100 # Must match GloVe dimension if using pre-trained GloVe
LEARNED_EMBEDDING_DIM = 100 # Dimension for embeddings learned from scratch

# Model Arch params (can be tuned)
HIDDEN_DIM_RNN_LSTM = 128
N_LAYERS_RNN_LSTM = 2
DROPOUT = 0.3
N_FILTERS_CNN = 100
FILTER_SIZES_CNN = [3, 4, 5] # Kernel sizes for CNN

# Training params
LEARNING_RATE = 0.001
BATCH_SIZE = 1024
NUM_EPOCHS = 50 # Increase for better performance, but takes longer
GRADIENT_CLIP = 1.0 # Helps prevent exploding gradients in RNNs/LSTMs

# --- Evaluation Metrics ---
METRICS_TO_CALCULATE = [
    "Accuracy",
    "F1 (Macro)", "Precision (Macro)", "Recall (Macro)",
    "F1 (Weighted)", "Precision (Weighted)", "Recall (Weighted)",
    "Train Time (Epoch, s)", "Eval Time (s)" # Train time per epoch is more practical for DL
]

# --- Label Mapping (For PyTorch CrossEntropyLoss) ---
LABEL_MAP = {'negative': 0, 'neutral': 1, 'positive': 2} # Example mapping
NUM_CLASSES = len(LABEL_MAP)

# --- Datasets Configuration ---
DATASETS_TO_PROCESS = {
    # "Book Review": {
    #     "train_path": os.path.join(BOOK_REVIEW_DATA_DIR, f'book_reviews_{TRAIN_FN}'),
    #     "val_path": os.path.join(BOOK_REVIEW_DATA_DIR, f'book_reviews_{VAL_FN}'),
    #     "test_path": os.path.join(BOOK_REVIEW_DATA_DIR, f'book_reviews_{TEST_FN}'),
    #     "model_dir": BOOK_REVIEW_MODEL_DIR,
    #     "result_dir": BOOK_REVIEW_RESULT_DIR,
    #     "vocab_path": os.path.join(BOOK_REVIEW_MODEL_DIR, "vocab.pt"), # Save vocab per dataset
    # },
    "Financial News": {
        "train_path": os.path.join(FINANCIAL_NEWS_DATA_DIR, f'financial_news_{TRAIN_FN}'),
        "val_path": os.path.join(FINANCIAL_NEWS_DATA_DIR, f'financial_news_{VAL_FN}'),
        "test_path": os.path.join(FINANCIAL_NEWS_DATA_DIR, f'financial_news_{TEST_FN}'),
        "model_dir": FINANCIAL_NEWS_MODEL_DIR,
        "result_dir": FINANCIAL_NEWS_RESULT_DIR,
         "vocab_path": os.path.join(FINANCIAL_NEWS_MODEL_DIR, "vocab.pt"),
    }
}

2025-05-01 17:06:13,315 - INFO - Using device: cpu


# 2. Utility Functions and Classes

In [8]:
def load_data(path):
    """Loads data from CSV and handles basic cleaning."""
    try:
        df = pd.read_csv(path)
        df = df.dropna(subset=[TEXT_COLUMN, TARGET_COLUMN]) # Drop rows with NaNs in critical columns
        df[TEXT_COLUMN] = df[TEXT_COLUMN].astype(str) # Ensure text is string
        df[TARGET_COLUMN] = df[TARGET_COLUMN].astype(str) # Ensure labels are string before mapping
        # Map labels to integers
        df[TARGET_COLUMN] = df[TARGET_COLUMN].map(LABEL_MAP)
        # Verify mapping worked - check for NaNs introduced if a label wasn't in LABEL_MAP
        if df[TARGET_COLUMN].isnull().any():
            logging.warning(f"NaNs found in target column after mapping for {path}. Check LABEL_MAP and data labels.")
            # Option: Drop rows with unmapped labels
            original_count = len(df)
            df = df.dropna(subset=[TARGET_COLUMN])
            logging.warning(f"Dropped {original_count - len(df)} rows with unmappable labels.")
        df[TARGET_COLUMN] = df[TARGET_COLUMN].astype(int) # Convert to int after mapping
        return df
    except FileNotFoundError:
        logging.error(f"File not found: {path}")
        return None
    except Exception as e:
        logging.error(f"Error loading data from {path}: {e}")
        return None

def tokenize(text):
    """Simple whitespace tokenizer."""
    return text.lower().split()

def build_vocab(texts, min_freq=MIN_WORD_FREQ):
    """Builds a vocabulary from a list of texts."""
    word_counts = Counter()
    for text in texts:
        word_counts.update(tokenize(text))

    # Create vocab mapping: word -> index
    # Add special tokens: <pad> for padding, <unk> for unknown words
    vocab = {"<pad>": 0, "<unk>": 1}
    idx = 2
    for word, count in word_counts.items():
        if count >= min_freq:
            vocab[word] = idx
            idx += 1
    logging.info(f"Built vocabulary with {len(vocab)} words (min freq: {min_freq}).")
    return vocab

class SentimentDataset(Dataset):
    """PyTorch Dataset for sentiment analysis."""
    def __init__(self, texts, labels, vocab, max_len=None): # max_len can be added for truncation
        self.texts = texts
        self.labels = labels
        self.vocab = vocab
        self.vocab_stoi = vocab # word -> index
        self.vocab_itos = {i: w for w, i in vocab.items()} # index -> word
        self.unk_idx = vocab.get("<unk>", 1)
        # self.max_len = max_len # Optional: truncate sequences

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        label = self.labels[idx]
        tokens = tokenize(text)
        # Convert tokens to indices
        token_ids = [self.vocab_stoi.get(token, self.unk_idx) for token in tokens]

        # Optional Truncation:
        # if self.max_len:
        #     token_ids = token_ids[:self.max_len]

        return torch.tensor(token_ids, dtype=torch.long), torch.tensor(label, dtype=torch.long)

def collate_batch(batch):
    """Collates data samples into batches with padding."""
    label_list, text_list, lengths = [], [], []
    for (_text, _label) in batch:
        label_list.append(_label)
        processed_text = torch.tensor(_text, dtype=torch.long)
        text_list.append(processed_text)
        lengths.append(len(processed_text)) # Store original lengths

    # Pad sequences to the max length in this batch
    # batch_first=True means output shape is (batch_size, seq_len)
    text_list_padded = pad_sequence(text_list, batch_first=True, padding_value=0) # Use PAD index 0

    label_list = torch.tensor(label_list, dtype=torch.long)
    lengths = torch.tensor(lengths, dtype=torch.long) # Useful for packed sequences later if needed

    return text_list_padded, label_list, lengths


def load_glove_embeddings(glove_path, vocab_stoi, embedding_dim):
    """Loads GloVe embeddings for words in the vocabulary."""
    if not os.path.exists(glove_path):
        logging.error(f"GloVe file not found at: {glove_path}")
        return None

    logging.info(f"Loading GloVe embeddings from {glove_path}")
    embeddings_index = {}
    try:
        with open(glove_path, 'r', encoding='utf-8') as f:
            for line in f:
                values = line.split()
                word = values[0]
                try:
                    vector = np.asarray(values[1:], dtype='float32')
                    embeddings_index[word] = vector
                except ValueError:
                    logging.debug(f"Skipping line in GloVe file (could not parse vector): {line[:50]}...")
                    continue # Skip lines that might not parse correctly
    except Exception as e:
        logging.error(f"Error reading GloVe file: {e}")
        return None

    logging.info(f"Found {len(embeddings_index)} word vectors in GloVe file.")

    vocab_size = len(vocab_stoi)
    # Initialize embedding matrix with zeros or small random values
    # np.random.seed(RANDOM_STATE)
    embedding_matrix = np.zeros((vocab_size, embedding_dim))
    # Or random init: embedding_matrix = np.random.rand(vocab_size, embedding_dim) * 0.02 - 0.01

    found_count = 0
    for word, i in vocab_stoi.items():
        embedding_vector = embeddings_index.get(word)
        if embedding_vector is not None:
            # Words not found in embedding index will remain zeros (or random).
            embedding_matrix[i] = embedding_vector
            found_count += 1
        else:
            # Handle <unk> and <pad> specifically
            if word == "<unk>": # Initialize <unk> token vector (e.g., average or random)
                embedding_matrix[i] = np.random.rand(embedding_dim) * 0.02 - 0.01 # Small random
                # pass
            elif word == "<pad>":
                embedding_matrix[i] = np.zeros(embedding_dim) # Ensure PAD is zeros

    logging.info(f"Initialized embedding matrix. Shape: {embedding_matrix.shape}")
    logging.info(f"Found pre-trained vectors for {found_count}/{vocab_size} words in vocabulary.")
    return torch.tensor(embedding_matrix, dtype=torch.float)


def calculate_metrics(y_true, y_pred):
    """Calculates evaluation metrics."""
    accuracy = accuracy_score(y_true, y_pred)
    f1_macro = f1_score(y_true, y_pred, average='macro', zero_division=0)
    precision_macro = precision_score(y_true, y_pred, average='macro', zero_division=0)
    recall_macro = recall_score(y_true, y_pred, average='macro', zero_division=0)
    f1_weighted = f1_score(y_true, y_pred, average='weighted', zero_division=0)
    precision_weighted = precision_score(y_true, y_pred, average='weighted', zero_division=0)
    recall_weighted = recall_score(y_true, y_pred, average='weighted', zero_division=0)
    return {
        "Accuracy": accuracy,
        "F1 (Macro)": f1_macro,
        "Precision (Macro)": precision_macro,
        "Recall (Macro)": recall_macro,
        "F1 (Weighted)": f1_weighted,
        "Precision (Weighted)": precision_weighted,
        "Recall (Weighted)": recall_weighted,
    }

# 3. Model Definitions

In [9]:
# --- Base Model with Embedding Handling ---
class BaseModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim, output_dim, pad_idx, pretrained_embeddings=None, freeze_embeddings=False):
        super().__init__()
        if pretrained_embeddings is not None:
            self.embedding = nn.Embedding.from_pretrained(
                pretrained_embeddings,
                freeze=freeze_embeddings,
                padding_idx=pad_idx
            )
            logging.info(f"Using pre-trained embeddings. Freeze: {freeze_embeddings}")
        else:
            self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=pad_idx)
            logging.info("Using learned embeddings.")
        self.output_dim = output_dim

In [10]:
# --- 1. MLP on Averaged Embeddings ---
# Note: This averages embeddings before passing to MLP, simpler than sequence processing.
class MLPAveraged(BaseModel):
     def __init__(self, vocab_size, embedding_dim, output_dim, pad_idx, hidden_dim1=64, hidden_dim2=32, dropout=DROPOUT, pretrained_embeddings=None, freeze_embeddings=False):
         # embedding_dim is input_dim for MLP part
         super().__init__(vocab_size, embedding_dim, output_dim, pad_idx, pretrained_embeddings, freeze_embeddings)
         self.fc1 = nn.Linear(embedding_dim, hidden_dim1)
         self.fc2 = nn.Linear(hidden_dim1, hidden_dim2)
         self.fc3 = nn.Linear(hidden_dim2, output_dim)
         self.dropout = nn.Dropout(dropout)
         self.relu = nn.ReLU()

     def forward(self, text, text_lengths=None): # text_lengths unused here but kept for consistency
         # text shape: (batch_size, seq_len)
         embedded = self.embedding(text)
         # embedded shape: (batch_size, seq_len, embedding_dim)

         # Average embeddings across sequence length dimension
         # Need to handle padding: Mask out pad tokens before averaging
         pad_mask = (text != self.embedding.padding_idx).float().unsqueeze(-1) # (batch_size, seq_len, 1)
         embedded = embedded * pad_mask # Zero out embeddings for pad tokens
         # Sum embeddings and divide by actual lengths (excluding pad tokens)
         # Calculate actual lengths (sum of non-pad tokens)
         actual_lengths = pad_mask.sum(dim=1)
         actual_lengths = torch.max(actual_lengths, torch.ones_like(actual_lengths)) # Avoid division by zero for empty sequences

         pooled = embedded.sum(dim=1) / actual_lengths # Shape: (batch_size, embedding_dim)

         x = self.dropout(self.relu(self.fc1(pooled)))
         x = self.dropout(self.relu(self.fc2(x)))
         output = self.fc3(x) # Shape: (batch_size, output_dim)
         return output

In [11]:
# --- 2. Basic RNN ---
class RNNModel(BaseModel):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, n_layers, dropout, pad_idx, bidirectional=False, pretrained_embeddings=None, freeze_embeddings=False):
        super().__init__(vocab_size, embedding_dim, output_dim, pad_idx, pretrained_embeddings, freeze_embeddings)
        self.rnn = nn.RNN(embedding_dim,
                          hidden_dim,
                          num_layers=n_layers,
                          bidirectional=bidirectional,
                          batch_first=True, # Input shape: (batch_size, seq_len, embed_dim)
                          dropout=dropout if n_layers > 1 else 0) # Dropout only between layers
        # Adjust linear layer input size for bidirectional
        fc_in_dim = hidden_dim * 2 if bidirectional else hidden_dim
        self.fc = nn.Linear(fc_in_dim, output_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, text, text_lengths): # text_lengths useful for PackedSequence but not used here
        # text shape: (batch_size, seq_len)
        embedded = self.dropout(self.embedding(text))
        # embedded shape: (batch_size, seq_len, embedding_dim)

        # No packing used here for simplicity, RNN processes padded sequences
        # Output shape: (batch_size, seq_len, num_directions * hidden_dim)
        # Hidden shape: (n_layers * num_directions, batch_size, hidden_dim)
        rnn_output, hidden = self.rnn(embedded)

        # Get output from the last time step (or concatenate final forward/backward hidden states)
        # hidden[-1] is the hidden state of the last layer (forward)
        # hidden[-2] would be the last backward state if bidirectional
        if self.rnn.bidirectional:
            # Concatenate the final hidden states of the last layer from both directions
            # hidden shape: (n_layers * 2, batch, hidden_dim)
            # hidden[-2,:,:] is last layer's forward, hidden[-1,:,:] is last layer's backward
            hidden_fwd = hidden[-2,:,:]
            hidden_bwd = hidden[-1,:,:]
            hidden_cat = torch.cat((hidden_fwd, hidden_bwd), dim=1)
        else:
            # hidden shape: (n_layers * 1, batch, hidden_dim)
            hidden_cat = hidden[-1,:,:]

        # Apply dropout and final linear layer
        output = self.fc(self.dropout(hidden_cat)) # Shape: (batch_size, output_dim)
        return output

In [12]:
# --- 3. LSTM Model ---
class LSTMModel(BaseModel):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, n_layers, dropout, pad_idx, bidirectional=True, pretrained_embeddings=None, freeze_embeddings=False):
        super().__init__(vocab_size, embedding_dim, output_dim, pad_idx, pretrained_embeddings, freeze_embeddings)
        self.lstm = nn.LSTM(embedding_dim,
                            hidden_dim,
                            num_layers=n_layers,
                            bidirectional=bidirectional,
                            batch_first=True,
                            dropout=dropout if n_layers > 1 else 0)
        fc_in_dim = hidden_dim * 2 if bidirectional else hidden_dim
        self.fc = nn.Linear(fc_in_dim, output_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, text, text_lengths): # text_lengths can be used with pack_padded_sequence
        embedded = self.dropout(self.embedding(text))

        # Optional: Use packed sequences for efficiency (handles padding)
        # packed_embedded = nn.utils.rnn.pack_padded_sequence(embedded, text_lengths.cpu(), batch_first=True, enforce_sorted=False)
        # packed_output, (hidden, cell) = self.lstm(packed_embedded)
        # output, _ = nn.utils.rnn.pad_packed_sequence(packed_output, batch_first=True)
        # Using padded sequence directly (simpler for this example):
        lstm_output, (hidden, cell) = self.lstm(embedded)

        # hidden shape: (n_layers * num_directions, batch_size, hidden_dim)
        # cell shape: (n_layers * num_directions, batch_size, hidden_dim)
        if self.lstm.bidirectional:
            hidden_fwd = hidden[-2,:,:]
            hidden_bwd = hidden[-1,:,:]
            hidden_cat = torch.cat((hidden_fwd, hidden_bwd), dim=1)
        else:
            hidden_cat = hidden[-1,:,:]

        output = self.fc(self.dropout(hidden_cat))
        return output

In [13]:
# --- 4. CNN Model (1D Convolution) ---
class CNNModel(BaseModel):
    def __init__(self, vocab_size, embedding_dim, n_filters, filter_sizes, output_dim, dropout, pad_idx, pretrained_embeddings=None, freeze_embeddings=False):
        super().__init__(vocab_size, embedding_dim, output_dim, pad_idx, pretrained_embeddings, freeze_embeddings)
        # Create multiple convolutional layers with different kernel sizes
        self.convs = nn.ModuleList([
            nn.Conv1d(in_channels=embedding_dim,
                      out_channels=n_filters,
                      kernel_size=fs)
            for fs in filter_sizes
        ])
        # The output dimension after concatenating pooled features from all kernel sizes
        fc_in_dim = len(filter_sizes) * n_filters
        self.fc = nn.Linear(fc_in_dim, output_dim)
        self.dropout = nn.Dropout(dropout)
        self.relu = nn.ReLU()

    def forward(self, text, text_lengths=None):
        # text: [batch size, seq len]
        embedded = self.dropout(self.embedding(text))
        # embedded: [batch size, seq len, emb dim]

        # Conv1d expects input shape: (batch_size, channels, seq_len)
        # So, permute dimensions: (batch_size, emb dim, seq len)
        embedded = embedded.permute(0, 2, 1)

        # Apply convolutions and pooling
        conved = [self.relu(conv(embedded)) for conv in self.convs]
        # conved[n]: [batch size, n filters, seq len - filter_sizes[n] + 1]

        # Apply max pooling over time (sequence length dimension)
        # Pool size should cover the entire sequence length dimension after convolution
        pooled = [torch.max_pool1d(conv, conv.shape[2]).squeeze(2) for conv in conved]
        # pooled[n]: [batch size, n filters]

        # Concatenate the pooled features from different filter sizes
        cat = self.dropout(torch.cat(pooled, dim=1))
        # cat: [batch size, n filters * len(filter_sizes)]

        return self.fc(cat)

In [14]:
# --- 5. CNN-LSTM Hybrid Model ---
class CNNLSTMModel(BaseModel):
    def __init__(self, vocab_size, embedding_dim, n_filters, filter_size_cnn, # Single filter size for simplicity here
                 hidden_dim_lstm, output_dim, n_layers_lstm, dropout, pad_idx,
                 pretrained_embeddings=None, freeze_embeddings=False):
        super().__init__(vocab_size, embedding_dim, output_dim, pad_idx, pretrained_embeddings, freeze_embeddings)
        self.conv = nn.Conv1d(in_channels=embedding_dim, out_channels=n_filters, kernel_size=filter_size_cnn)
        self.relu = nn.ReLU()
        # Input to LSTM is the output channels of CNN
        self.lstm = nn.LSTM(n_filters, # Input features = CNN output channels
                            hidden_dim_lstm,
                            num_layers=n_layers_lstm,
                            bidirectional=True, # Often good to use bidirectional
                            batch_first=True,
                            dropout=dropout if n_layers_lstm > 1 else 0)
        fc_in_dim = hidden_dim_lstm * 2 # Bidirectional LSTM
        self.fc = nn.Linear(fc_in_dim, output_dim)
        self.dropout_embed = nn.Dropout(dropout)
        self.dropout_final = nn.Dropout(dropout)

    def forward(self, text, text_lengths=None):
        # text: [batch size, seq len]
        embedded = self.dropout_embed(self.embedding(text))
        # embedded: [batch size, seq len, emb dim]

        # --- CNN Part ---
        # Permute for Conv1d: [batch size, emb dim, seq len]
        embedded_permuted = embedded.permute(0, 2, 1)
        conved = self.relu(self.conv(embedded_permuted))
        # conved: [batch size, n filters, new seq len]
        # Permute back for LSTM: [batch size, new seq len, n filters]
        conved_permuted = conved.permute(0, 2, 1)

        # --- LSTM Part ---
        lstm_output, (hidden, cell) = self.lstm(conved_permuted)
        # lstm_output: [batch size, seq len, num directions * hidden dim]
        # hidden: [n layers * num directions, batch size, hidden dim]

        # Concatenate final forward and backward hidden states
        hidden_fwd = hidden[-2,:,:]
        hidden_bwd = hidden[-1,:,:]
        hidden_cat = torch.cat((hidden_fwd, hidden_bwd), dim=1)

        # --- Final Output ---
        output = self.fc(self.dropout_final(hidden_cat))
        return output

# 4. Training and Evaluation Functions

In [15]:
def train_epoch(model, iterator, optimizer, criterion, device, grad_clip=None):
    """Trains the model for one epoch."""
    model.train()
    epoch_loss = 0
    start_time = time.time()

    for batch_idx, (text, labels, lengths) in enumerate(iterator):
        text, labels = text.to(device), labels.to(device)
        lengths = lengths.to('cpu') # lengths for pack_padded_sequence must be on CPU

        optimizer.zero_grad()

        # Forward pass
        predictions = model(text, lengths) # Pass lengths if model uses them

        # Calculate loss
        loss = criterion(predictions, labels)

        # Backward pass and optimization
        loss.backward()

        # Gradient clipping
        if grad_clip:
            torch.nn.utils.clip_grad_norm_(model.parameters(), grad_clip)

        optimizer.step()

        epoch_loss += loss.item()

        # Optional: Print batch progress
        if batch_idx % 100 == 0:
            logging.debug(f"Batch {batch_idx}/{len(iterator)}, Loss: {loss.item():.4f}")

    end_time = time.time()
    train_time_epoch = end_time - start_time
    return epoch_loss / len(iterator), train_time_epoch


def evaluate(model, iterator, criterion, device):
    """Evaluates the model on a given dataset iterator."""
    model.eval()
    epoch_loss = 0
    all_preds = []
    all_labels = []
    start_time = time.time()

    with torch.no_grad():
        for batch_idx, (text, labels, lengths) in enumerate(iterator):
            text, labels = text.to(device), labels.to(device)
            lengths = lengths.to('cpu')

            predictions = model(text, lengths)
            loss = criterion(predictions, labels)
            epoch_loss += loss.item()

            # Get predicted labels
            preds = torch.argmax(predictions, dim=1)
            all_preds.extend(preds.cpu().tolist())
            all_labels.extend(labels.cpu().tolist())

    end_time = time.time()
    eval_time = end_time - start_time
    metrics = calculate_metrics(all_labels, all_preds)
    avg_loss = epoch_loss / len(iterator)

    conf_matrix = confusion_matrix(all_labels, all_preds)

    return avg_loss, metrics, eval_time, conf_matrix

# 5. Run Experiments

In [16]:
all_results = []

In [17]:
# --- Loop through each dataset defined in the configuration ---
for dataset_name, config in DATASETS_TO_PROCESS.items():
    print(f"\n{'='*20} Processing Dataset: {dataset_name} {'='*20}")
    logging.info(f"Processing Dataset: {dataset_name}")

    # 1. Load Data
    train_df = load_data(config['train_path'])
    val_df = load_data(config['val_path'])
    test_df = load_data(config['test_path'])

    if train_df is None or val_df is None or test_df is None:
        logging.error(f"Skipping dataset {dataset_name} due to data loading errors.")
        continue

    # 2. Build or Load Vocabulary
    if os.path.exists(config['vocab_path']):
        vocab = joblib.load(config['vocab_path'])
        logging.info(f"Loaded existing vocabulary from {config['vocab_path']}")
        # Check if special tokens exist, add if missing (backward compatibility)
        if '<pad>' not in vocab: vocab['<pad>'] = 0
        if '<unk>' not in vocab: vocab['<unk>'] = 1
    else:
        vocab = build_vocab(train_df[TEXT_COLUMN].tolist(), min_freq=MIN_WORD_FREQ)
        joblib.dump(vocab, config['vocab_path'])
        logging.info(f"Built and saved vocabulary to {config['vocab_path']}")

    vocab_size = len(vocab)
    pad_idx = vocab['<pad>']

    # 3. Create Datasets and DataLoaders
    train_dataset = SentimentDataset(train_df[TEXT_COLUMN].tolist(), train_df[TARGET_COLUMN].tolist(), vocab)
    val_dataset = SentimentDataset(val_df[TEXT_COLUMN].tolist(), val_df[TARGET_COLUMN].tolist(), vocab)
    test_dataset = SentimentDataset(test_df[TEXT_COLUMN].tolist(), test_df[TARGET_COLUMN].tolist(), vocab)

    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, collate_fn=collate_batch)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, collate_fn=collate_batch)
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, collate_fn=collate_batch)

    # 4. Load Pre-trained Embeddings (if needed)
    glove_embeddings = None
    if os.path.exists(GLOVE_PATH):
        glove_embeddings = load_glove_embeddings(GLOVE_PATH, vocab, EMBEDDING_DIM)
        if glove_embeddings is None:
            logging.warning("Failed to load GloVe embeddings. Models requiring them will use learned embeddings.")
    else:
        logging.warning(f"GloVe path not found: {GLOVE_PATH}. Pre-trained embeddings disabled.")


    # --- Define Models to Run ---
    models_to_run = {
        # --- Mid Level ---
        # Name: (ModelClass, {kwargs}, use_pretrained_embed, freeze_embed)
        "MLP (Avg Learned Emb)": (MLPAveraged, {'hidden_dim1': 64, 'hidden_dim2': 32, 'dropout': DROPOUT, 'embedding_dim': LEARNED_EMBEDDING_DIM}, False, False),
        "RNN (Learned Emb)": (RNNModel, {'hidden_dim': HIDDEN_DIM_RNN_LSTM, 'n_layers': N_LAYERS_RNN_LSTM, 'dropout': DROPOUT, 'bidirectional': False, 'embedding_dim': LEARNED_EMBEDDING_DIM}, False, False),
        "LSTM (Learned Emb)": (LSTMModel, {'hidden_dim': HIDDEN_DIM_RNN_LSTM, 'n_layers': N_LAYERS_RNN_LSTM, 'dropout': DROPOUT, 'bidirectional': False, 'embedding_dim': LEARNED_EMBEDDING_DIM}, False, False),
        "BiLSTM (Learned Emb)": (LSTMModel, {'hidden_dim': HIDDEN_DIM_RNN_LSTM, 'n_layers': N_LAYERS_RNN_LSTM, 'dropout': DROPOUT, 'bidirectional': True, 'embedding_dim': LEARNED_EMBEDDING_DIM}, False, False),
        "CNN (Learned Emb)": (CNNModel, {'n_filters': N_FILTERS_CNN, 'filter_sizes': FILTER_SIZES_CNN, 'dropout': DROPOUT, 'embedding_dim': LEARNED_EMBEDDING_DIM}, False, False),

        # --- Advanced Level (Using Pre-trained) ---
        # Requires GloVe embeddings to be loaded successfully
        "MLP (Avg GloVe Emb)": (MLPAveraged, {'hidden_dim1': 64, 'hidden_dim2': 32, 'dropout': DROPOUT, 'embedding_dim': EMBEDDING_DIM}, True, True), # Freeze GloVe
        "CNN (GloVe Emb)": (CNNModel, {'n_filters': N_FILTERS_CNN, 'filter_sizes': FILTER_SIZES_CNN, 'dropout': DROPOUT, 'embedding_dim': EMBEDDING_DIM}, True, True), # Freeze GloVe
        "LSTM (GloVe Emb)": (LSTMModel, {'hidden_dim': HIDDEN_DIM_RNN_LSTM, 'n_layers': N_LAYERS_RNN_LSTM, 'dropout': DROPOUT, 'bidirectional': False, 'embedding_dim': EMBEDDING_DIM}, True, True), # Freeze GloVe
        "BiLSTM (GloVe Emb)": (LSTMModel, {'hidden_dim': HIDDEN_DIM_RNN_LSTM, 'n_layers': N_LAYERS_RNN_LSTM, 'dropout': DROPOUT, 'bidirectional': True, 'embedding_dim': EMBEDDING_DIM}, True, True), # Freeze GloVe
        "CNN-LSTM (GloVe Emb)": (CNNLSTMModel, {'n_filters': N_FILTERS_CNN, 'filter_size_cnn': 3, 'hidden_dim_lstm': HIDDEN_DIM_RNN_LSTM, 'n_layers_lstm': N_LAYERS_RNN_LSTM, 'dropout': DROPOUT, 'embedding_dim': EMBEDDING_DIM}, True, True), # Freeze GloVe
    }

    # --- Loop through each model configuration ---
    for model_name, (ModelClass, model_kwargs, use_pretrained, freeze_embed) in models_to_run.items():

        # Skip models requiring GloVe if loading failed
        if use_pretrained and glove_embeddings is None:
            logging.warning(f"Skipping model '{model_name}' as pre-trained GloVe embeddings were not loaded.")
            continue

        print(f"\n--- Training Model: {model_name} ---")
        logging.info(f"Starting training for {model_name} on {dataset_name}")
        results = {"Dataset": dataset_name, "Model": model_name}

        try:
            # Instantiate model
            current_embedding_dim = model_kwargs['embedding_dim'] # Get dim from kwargs
            current_pretrained_embeddings = glove_embeddings if use_pretrained else None

            model = ModelClass(
                vocab_size=vocab_size,
                output_dim=NUM_CLASSES,
                pad_idx=pad_idx,
                pretrained_embeddings=current_pretrained_embeddings,
                freeze_embeddings=freeze_embed,
                **model_kwargs # Pass specific model architecture args
            ).to(DEVICE)

            # Count parameters
            num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
            logging.info(f"Model: {model_name}, Trainable Parameters: {num_params:,}")

            # Define optimizer and criterion
            optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
            criterion = nn.CrossEntropyLoss().to(DEVICE) # Handles softmax internally

            best_val_loss = float('inf')
            total_train_time = 0
            model_save_path = os.path.join(config['model_dir'], f"{dataset_name.replace(' ', '')}_{model_name.replace(' ', '')}_best.pt")

            # Training loop
            for epoch in range(NUM_EPOCHS):
                start_epoch_time = time.time()

                train_loss, train_time_epoch = train_epoch(model, train_loader, optimizer, criterion, DEVICE, GRADIENT_CLIP)
                val_loss, val_metrics, _, _ = evaluate(model, val_loader, criterion, DEVICE)

                total_train_time += train_time_epoch
                end_epoch_time = time.time()
                epoch_mins, epoch_secs = divmod(end_epoch_time - start_epoch_time, 60)

                logging.info(f'Epoch: {epoch+1:02} | Time: {int(epoch_mins)}m {epoch_secs:.0f}s')
                logging.info(f'\tTrain Loss: {train_loss:.3f}')
                logging.info(f'\t Val. Loss: {val_loss:.3f} | Val. F1 (Macro): {val_metrics["F1 (Macro)"]:.4f}')

                # Save best model based on validation loss
                if val_loss < best_val_loss:
                    best_val_loss = val_loss
                    torch.save(model.state_dict(), model_save_path)
                    logging.info(f"Saved best model to {model_save_path} (Epoch {epoch+1})")

            # model = ModelClass(
            #     vocab_size=vocab_size,
            #     output_dim=NUM_CLASSES,
            #     pad_idx=pad_idx,
            #     pretrained_embeddings=current_pretrained_embeddings,
            #     freeze_embeddings=freeze_embed,
            #     **model_kwargs # Pass specific model architecture args
            # ).to(DEVICE)

            # # Count parameters
            # num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
            # logging.info(f"Model: {model_name}, Trainable Parameters: {num_params:,}")

            # # Define optimizer and criterion
            # optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
            # criterion = nn.CrossEntropyLoss().to(DEVICE) # Handles softmax internally

            # best_val_loss = float('inf')
            # total_train_time = 0
            # model_save_path = os.path.join(config['model_dir'], f"{dataset_name.replace(' ', '_')}_{model_name.replace(' ', '_')}_best.pt")

            # # Training loop
            # for epoch in range(NUM_EPOCHS):
            #     start_epoch_time = time.time()

            #     train_loss, train_time_epoch = train_epoch(model, train_loader, optimizer, criterion, DEVICE, GRADIENT_CLIP)
            #     val_loss, val_metrics, _ = evaluate(model, val_loader, criterion, DEVICE)

            #     total_train_time += train_time_epoch
            #     end_epoch_time = time.time()
            #     epoch_mins, epoch_secs = divmod(end_epoch_time - start_epoch_time, 60)

            #     logging.info(f'Epoch: {epoch+1:02} | Time: {int(epoch_mins)}m {epoch_secs:.0f}s')
            #     logging.info(f'\tTrain Loss: {train_loss:.3f}')
            #     logging.info(f'\t Val. Loss: {val_loss:.3f} | Val. F1 (Macro): {val_metrics["F1 (Macro)"]:.4f}')

            #     # Save best model based on validation loss
            #     if val_loss < best_val_loss:
            #         best_val_loss = val_loss
            #         torch.save(model.state_dict(), model_save_path)
            #         logging.info(f"Saved best model to {model_save_path} (Epoch {epoch+1})")

            results["Train Time (Epoch, s)"] = round(total_train_time / NUM_EPOCHS, 3) # Avg time per epoch

            # Load best model and evaluate on Test set
            model.load_state_dict(torch.load(model_save_path))
            logging.info(f"Loaded best model from {model_save_path} for final test evaluation.")

            test_loss, test_metrics, test_eval_time, test_conf_matrix = evaluate(model, test_loader, criterion, DEVICE)
            results.update(test_metrics)
            results["Eval Time (s)"] = round(test_eval_time, 3)

            logging.info("Test Set Performance:")
            for key, value in test_metrics.items():
                logging.info(f"\t{key}: {value:.4f}")
            logging.info(f"\tTest Loss: {test_loss:.3f}")
            logging.info(f"\tEval Time: {test_eval_time:.3f}s")

            # --- Save Confusion Matrix CSV ---
            cm_filename = f"{dataset_name.replace(' ', '_')}_{model_name.replace(' ', '_')}_confusion_matrix.csv" # Change extension to .csv
            cm_save_path = os.path.join(config['result_dir'], cm_filename)
            try:
                # Convert numpy array to DataFrame for better CSV formatting with labels
                cm_df = pd.DataFrame(test_conf_matrix, 
                                    index=LABEL_MAP.keys(), # Rows are True Labels
                                    columns=LABEL_MAP.keys()) # Columns are Predicted Labels
                cm_df.index.name = 'True Label'
                cm_df.columns.name = 'Predicted Label'
                
                # Save to CSV
                cm_df.to_csv(cm_save_path, index=True, mode='w+') # index=True to include row/column names
                
                logging.info(f"Saved confusion matrix CSV to {cm_save_path}")
            except Exception as cm_save_e:
                logging.error(f"Failed to save confusion matrix CSV for {model_name}: {cm_save_e}")
        # --- End Save Confusion Matrix CSV ---


        except Exception as e:
            logging.error(f"!!! An error occurred while processing {model_name} for {dataset_name}: {e}", exc_info=True) # Log traceback
            # Record partial results if possible
            results["Accuracy"] = np.nan
            results["F1 (Macro)"] = np.nan
            # Fill other metrics with NaN or error messages
            for metric in METRICS_TO_CALCULATE:
                if metric not in results:
                    results[metric] = np.nan if metric not in ["Train Time (Epoch, s)", "Eval Time (s)"] else 0.0
        finally:
            all_results.append(results)
            # Clean up memory
            del model
            if 'optimizer' in locals(): del optimizer
            if 'criterion' in locals(): del criterion   
            gc.collect()
            if torch.cuda.is_available():
                torch.cuda.empty_cache()

# --- Combine results into a DataFrame ---
results_df = pd.DataFrame(all_results)

2025-05-01 17:14:16,304 - INFO - Processing Dataset: Financial News





2025-05-01 17:14:16,532 - INFO - Built vocabulary with 2845 words (min freq: 3).
2025-05-01 17:14:16,592 - INFO - Built and saved vocabulary to ..\models\dl\financial_news\vocab.pt
2025-05-01 17:14:16,600 - INFO - Loading GloVe embeddings from ..\data\embeddings\glove.6B.100d.txt
2025-05-01 17:14:27,135 - INFO - Found 400000 word vectors in GloVe file.
2025-05-01 17:14:27,151 - INFO - Initialized embedding matrix. Shape: (2845, 100)
2025-05-01 17:14:27,151 - INFO - Found pre-trained vectors for 2672/2845 words in vocabulary.
2025-05-01 17:14:27,429 - INFO - Starting training for MLP (Avg Learned Emb) on Financial News
2025-05-01 17:14:27,451 - INFO - Using learned embeddings.
2025-05-01 17:14:27,457 - INFO - Model: MLP (Avg Learned Emb), Trainable Parameters: 293,143



--- Training Model: MLP (Avg Learned Emb) ---


2025-05-01 17:14:31,113 - INFO - Epoch: 01 | Time: 0m 1s
2025-05-01 17:14:31,116 - INFO - 	Train Loss: 1.133
2025-05-01 17:14:31,117 - INFO - 	 Val. Loss: 1.113 | Val. F1 (Macro): 0.1466
2025-05-01 17:14:31,126 - INFO - Saved best model to ..\models\dl\financial_news\FinancialNews_MLP(AvgLearnedEmb)_best.pt (Epoch 1)
2025-05-01 17:14:31,746 - INFO - Epoch: 02 | Time: 0m 1s
2025-05-01 17:14:31,750 - INFO - 	Train Loss: 1.105
2025-05-01 17:14:31,751 - INFO - 	 Val. Loss: 1.085 | Val. F1 (Macro): 0.1468
2025-05-01 17:14:31,755 - INFO - Saved best model to ..\models\dl\financial_news\FinancialNews_MLP(AvgLearnedEmb)_best.pt (Epoch 2)
2025-05-01 17:14:32,186 - INFO - Epoch: 03 | Time: 0m 0s
2025-05-01 17:14:32,189 - INFO - 	Train Loss: 1.077
2025-05-01 17:14:32,189 - INFO - 	 Val. Loss: 1.048 | Val. F1 (Macro): 0.2913
2025-05-01 17:14:32,196 - INFO - Saved best model to ..\models\dl\financial_news\FinancialNews_MLP(AvgLearnedEmb)_best.pt (Epoch 3)
2025-05-01 17:14:32,538 - INFO - Epoch: 04 


--- Training Model: RNN (Learned Emb) ---


2025-05-01 17:14:54,440 - INFO - Epoch: 01 | Time: 0m 2s
2025-05-01 17:14:54,442 - INFO - 	Train Loss: 1.049
2025-05-01 17:14:54,442 - INFO - 	 Val. Loss: 0.936 | Val. F1 (Macro): 0.2485
2025-05-01 17:14:54,474 - INFO - Saved best model to ..\models\dl\financial_news\FinancialNews_RNN(LearnedEmb)_best.pt (Epoch 1)
2025-05-01 17:14:56,935 - INFO - Epoch: 02 | Time: 0m 2s
2025-05-01 17:14:56,937 - INFO - 	Train Loss: 0.944
2025-05-01 17:14:56,939 - INFO - 	 Val. Loss: 0.933 | Val. F1 (Macro): 0.2485
2025-05-01 17:14:56,945 - INFO - Saved best model to ..\models\dl\financial_news\FinancialNews_RNN(LearnedEmb)_best.pt (Epoch 2)
2025-05-01 17:14:59,699 - INFO - Epoch: 03 | Time: 0m 3s
2025-05-01 17:14:59,701 - INFO - 	Train Loss: 0.935
2025-05-01 17:14:59,701 - INFO - 	 Val. Loss: 0.930 | Val. F1 (Macro): 0.2485
2025-05-01 17:14:59,707 - INFO - Saved best model to ..\models\dl\financial_news\FinancialNews_RNN(LearnedEmb)_best.pt (Epoch 3)
2025-05-01 17:15:02,151 - INFO - Epoch: 04 | Time: 0


--- Training Model: LSTM (Learned Emb) ---


2025-05-01 17:16:53,859 - INFO - Epoch: 01 | Time: 0m 6s
2025-05-01 17:16:53,863 - INFO - 	Train Loss: 1.067
2025-05-01 17:16:53,867 - INFO - 	 Val. Loss: 1.030 | Val. F1 (Macro): 0.2485
2025-05-01 17:16:53,896 - INFO - Saved best model to ..\models\dl\financial_news\FinancialNews_LSTM(LearnedEmb)_best.pt (Epoch 1)
2025-05-01 17:17:01,289 - INFO - Epoch: 02 | Time: 0m 7s
2025-05-01 17:17:01,289 - INFO - 	Train Loss: 0.998
2025-05-01 17:17:01,291 - INFO - 	 Val. Loss: 0.945 | Val. F1 (Macro): 0.2485
2025-05-01 17:17:01,297 - INFO - Saved best model to ..\models\dl\financial_news\FinancialNews_LSTM(LearnedEmb)_best.pt (Epoch 2)
2025-05-01 17:17:08,158 - INFO - Epoch: 03 | Time: 0m 7s
2025-05-01 17:17:08,158 - INFO - 	Train Loss: 0.941
2025-05-01 17:17:08,160 - INFO - 	 Val. Loss: 0.933 | Val. F1 (Macro): 0.2485
2025-05-01 17:17:08,167 - INFO - Saved best model to ..\models\dl\financial_news\FinancialNews_LSTM(LearnedEmb)_best.pt (Epoch 3)
2025-05-01 17:17:14,189 - INFO - Epoch: 04 | Time


--- Training Model: BiLSTM (Learned Emb) ---


2025-05-01 17:21:41,925 - INFO - Epoch: 01 | Time: 0m 12s
2025-05-01 17:21:41,925 - INFO - 	Train Loss: 1.064
2025-05-01 17:21:41,927 - INFO - 	 Val. Loss: 0.985 | Val. F1 (Macro): 0.2485
2025-05-01 17:21:41,935 - INFO - Saved best model to ..\models\dl\financial_news\FinancialNews_BiLSTM(LearnedEmb)_best.pt (Epoch 1)
2025-05-01 17:21:54,323 - INFO - Epoch: 02 | Time: 0m 12s
2025-05-01 17:21:54,323 - INFO - 	Train Loss: 0.955
2025-05-01 17:21:54,325 - INFO - 	 Val. Loss: 0.908 | Val. F1 (Macro): 0.2485
2025-05-01 17:21:54,337 - INFO - Saved best model to ..\models\dl\financial_news\FinancialNews_BiLSTM(LearnedEmb)_best.pt (Epoch 2)
2025-05-01 17:22:06,894 - INFO - Epoch: 03 | Time: 0m 13s
2025-05-01 17:22:06,896 - INFO - 	Train Loss: 0.909
2025-05-01 17:22:06,896 - INFO - 	 Val. Loss: 0.888 | Val. F1 (Macro): 0.2485
2025-05-01 17:22:06,906 - INFO - Saved best model to ..\models\dl\financial_news\FinancialNews_BiLSTM(LearnedEmb)_best.pt (Epoch 3)
2025-05-01 17:22:20,399 - INFO - Epoch: 


--- Training Model: CNN (Learned Emb) ---


2025-05-01 17:32:39,598 - INFO - Epoch: 01 | Time: 0m 4s
2025-05-01 17:32:39,602 - INFO - 	Train Loss: 1.060
2025-05-01 17:32:39,606 - INFO - 	 Val. Loss: 0.901 | Val. F1 (Macro): 0.2912
2025-05-01 17:32:39,612 - INFO - Saved best model to ..\models\dl\financial_news\FinancialNews_CNN(LearnedEmb)_best.pt (Epoch 1)
2025-05-01 17:32:43,690 - INFO - Epoch: 02 | Time: 0m 4s
2025-05-01 17:32:43,692 - INFO - 	Train Loss: 0.943
2025-05-01 17:32:43,693 - INFO - 	 Val. Loss: 0.836 | Val. F1 (Macro): 0.4306
2025-05-01 17:32:43,699 - INFO - Saved best model to ..\models\dl\financial_news\FinancialNews_CNN(LearnedEmb)_best.pt (Epoch 2)
2025-05-01 17:32:48,037 - INFO - Epoch: 03 | Time: 0m 4s
2025-05-01 17:32:48,038 - INFO - 	Train Loss: 0.897
2025-05-01 17:32:48,040 - INFO - 	 Val. Loss: 0.796 | Val. F1 (Macro): 0.4519
2025-05-01 17:32:48,048 - INFO - Saved best model to ..\models\dl\financial_news\FinancialNews_CNN(LearnedEmb)_best.pt (Epoch 3)
2025-05-01 17:32:52,104 - INFO - Epoch: 04 | Time: 0


--- Training Model: MLP (Avg GloVe Emb) ---


2025-05-01 17:36:06,293 - INFO - Epoch: 01 | Time: 0m 0s
2025-05-01 17:36:06,295 - INFO - 	Train Loss: 1.047
2025-05-01 17:36:06,296 - INFO - 	 Val. Loss: 1.034 | Val. F1 (Macro): 0.2485
2025-05-01 17:36:06,300 - INFO - Saved best model to ..\models\dl\financial_news\FinancialNews_MLP(AvgGloVeEmb)_best.pt (Epoch 1)
2025-05-01 17:36:06,649 - INFO - Epoch: 02 | Time: 0m 0s
2025-05-01 17:36:06,651 - INFO - 	Train Loss: 1.021
2025-05-01 17:36:06,653 - INFO - 	 Val. Loss: 1.010 | Val. F1 (Macro): 0.2485
2025-05-01 17:36:06,657 - INFO - Saved best model to ..\models\dl\financial_news\FinancialNews_MLP(AvgGloVeEmb)_best.pt (Epoch 2)
2025-05-01 17:36:06,962 - INFO - Epoch: 03 | Time: 0m 0s
2025-05-01 17:36:06,966 - INFO - 	Train Loss: 0.997
2025-05-01 17:36:06,966 - INFO - 	 Val. Loss: 0.982 | Val. F1 (Macro): 0.2485
2025-05-01 17:36:06,972 - INFO - Saved best model to ..\models\dl\financial_news\FinancialNews_MLP(AvgGloVeEmb)_best.pt (Epoch 3)
2025-05-01 17:36:07,348 - INFO - Epoch: 04 | Time


--- Training Model: CNN (GloVe Emb) ---


2025-05-01 17:36:27,258 - INFO - Epoch: 01 | Time: 0m 4s
2025-05-01 17:36:27,260 - INFO - 	Train Loss: 0.962
2025-05-01 17:36:27,263 - INFO - 	 Val. Loss: 0.888 | Val. F1 (Macro): 0.3278
2025-05-01 17:36:27,270 - INFO - Saved best model to ..\models\dl\financial_news\FinancialNews_CNN(GloVeEmb)_best.pt (Epoch 1)
2025-05-01 17:36:30,096 - INFO - Epoch: 02 | Time: 0m 3s
2025-05-01 17:36:30,098 - INFO - 	Train Loss: 0.887
2025-05-01 17:36:30,099 - INFO - 	 Val. Loss: 0.853 | Val. F1 (Macro): 0.4072
2025-05-01 17:36:30,105 - INFO - Saved best model to ..\models\dl\financial_news\FinancialNews_CNN(GloVeEmb)_best.pt (Epoch 2)
2025-05-01 17:36:33,785 - INFO - Epoch: 03 | Time: 0m 4s
2025-05-01 17:36:33,788 - INFO - 	Train Loss: 0.860
2025-05-01 17:36:33,789 - INFO - 	 Val. Loss: 0.825 | Val. F1 (Macro): 0.4030
2025-05-01 17:36:33,797 - INFO - Saved best model to ..\models\dl\financial_news\FinancialNews_CNN(GloVeEmb)_best.pt (Epoch 3)
2025-05-01 17:36:36,543 - INFO - Epoch: 04 | Time: 0m 3s
2


--- Training Model: LSTM (GloVe Emb) ---


2025-05-01 17:38:54,400 - INFO - Epoch: 01 | Time: 0m 8s
2025-05-01 17:38:54,401 - INFO - 	Train Loss: 1.098
2025-05-01 17:38:54,402 - INFO - 	 Val. Loss: 1.060 | Val. F1 (Macro): 0.2485
2025-05-01 17:38:54,410 - INFO - Saved best model to ..\models\dl\financial_news\FinancialNews_LSTM(GloVeEmb)_best.pt (Epoch 1)
2025-05-01 17:38:59,629 - INFO - Epoch: 02 | Time: 0m 5s
2025-05-01 17:38:59,631 - INFO - 	Train Loss: 1.031
2025-05-01 17:38:59,634 - INFO - 	 Val. Loss: 0.969 | Val. F1 (Macro): 0.2485
2025-05-01 17:38:59,641 - INFO - Saved best model to ..\models\dl\financial_news\FinancialNews_LSTM(GloVeEmb)_best.pt (Epoch 2)
2025-05-01 17:39:04,728 - INFO - Epoch: 03 | Time: 0m 5s
2025-05-01 17:39:04,729 - INFO - 	Train Loss: 0.944
2025-05-01 17:39:04,730 - INFO - 	 Val. Loss: 0.941 | Val. F1 (Macro): 0.2485
2025-05-01 17:39:04,737 - INFO - Saved best model to ..\models\dl\financial_news\FinancialNews_LSTM(GloVeEmb)_best.pt (Epoch 3)
2025-05-01 17:39:10,148 - INFO - Epoch: 04 | Time: 0m 5


--- Training Model: BiLSTM (GloVe Emb) ---


2025-05-01 18:27:13,296 - INFO - Epoch: 01 | Time: 0m 14s
2025-05-01 18:27:13,298 - INFO - 	Train Loss: 1.055
2025-05-01 18:27:13,300 - INFO - 	 Val. Loss: 0.961 | Val. F1 (Macro): 0.2485
2025-05-01 18:27:13,309 - INFO - Saved best model to ..\models\dl\financial_news\FinancialNews_BiLSTM(GloVeEmb)_best.pt (Epoch 1)
2025-05-01 18:27:25,159 - INFO - Epoch: 02 | Time: 0m 12s
2025-05-01 18:27:25,161 - INFO - 	Train Loss: 0.949
2025-05-01 18:27:25,161 - INFO - 	 Val. Loss: 0.939 | Val. F1 (Macro): 0.2485
2025-05-01 18:27:25,171 - INFO - Saved best model to ..\models\dl\financial_news\FinancialNews_BiLSTM(GloVeEmb)_best.pt (Epoch 2)
2025-05-01 18:27:37,769 - INFO - Epoch: 03 | Time: 0m 13s
2025-05-01 18:27:37,769 - INFO - 	Train Loss: 0.927
2025-05-01 18:27:37,771 - INFO - 	 Val. Loss: 0.913 | Val. F1 (Macro): 0.2485
2025-05-01 18:27:37,783 - INFO - Saved best model to ..\models\dl\financial_news\FinancialNews_BiLSTM(GloVeEmb)_best.pt (Epoch 3)
2025-05-01 18:27:50,083 - INFO - Epoch: 04 | T


--- Training Model: CNN-LSTM (GloVe Emb) ---


2025-05-01 18:38:06,199 - INFO - Epoch: 01 | Time: 0m 14s
2025-05-01 18:38:06,201 - INFO - 	Train Loss: 1.034
2025-05-01 18:38:06,201 - INFO - 	 Val. Loss: 0.954 | Val. F1 (Macro): 0.2485
2025-05-01 18:38:06,211 - INFO - Saved best model to ..\models\dl\financial_news\FinancialNews_CNN-LSTM(GloVeEmb)_best.pt (Epoch 1)
2025-05-01 18:38:19,300 - INFO - Epoch: 02 | Time: 0m 13s
2025-05-01 18:38:19,300 - INFO - 	Train Loss: 0.932
2025-05-01 18:38:19,302 - INFO - 	 Val. Loss: 0.928 | Val. F1 (Macro): 0.2485
2025-05-01 18:38:19,315 - INFO - Saved best model to ..\models\dl\financial_news\FinancialNews_CNN-LSTM(GloVeEmb)_best.pt (Epoch 2)
2025-05-01 18:38:33,123 - INFO - Epoch: 03 | Time: 0m 14s
2025-05-01 18:38:33,125 - INFO - 	Train Loss: 0.925
2025-05-01 18:38:33,127 - INFO - 	 Val. Loss: 0.922 | Val. F1 (Macro): 0.2485
2025-05-01 18:38:33,138 - INFO - Saved best model to ..\models\dl\financial_news\FinancialNews_CNN-LSTM(GloVeEmb)_best.pt (Epoch 3)
2025-05-01 18:38:46,788 - INFO - Epoch: 

# 6. Results Summary and Saving

In [18]:
print("\n\n===== Overall Deep Learning Results Summary =====")
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1200)
pd.set_option('display.max_colwidth', 80) # Adjust if needed
pd.set_option('display.float_format', '{:.4f}'.format)

# Ensure all expected columns exist, fill with NaN if necessary
for col in METRICS_TO_CALCULATE:
    if col not in results_df.columns:
        results_df[col] = np.nan

# Reorder columns for clarity
column_order = ["Dataset", "Model"] + METRICS_TO_CALCULATE
# Filter out columns not present if something went wrong during creation
column_order = [col for col in column_order if col in results_df.columns]
results_df = results_df[column_order]


print(results_df)

# --- Save results to CSV for each dataset ---
for dataset_name, config in DATASETS_TO_PROCESS.items():
    dataset_results_df = results_df[results_df['Dataset'] == dataset_name]
    if not dataset_results_df.empty:
        results_filename = f"{dataset_name.replace(' ', '_')}_dl_pytorch_results.csv"
        results_save_path = os.path.join(config['result_dir'], results_filename)
        try:
            dataset_results_df.to_csv(results_save_path, index=False, mode='w+')
            print(f"\nResults for {dataset_name} saved to {results_save_path}")
        except Exception as e:
            print(f"\nError saving results for {dataset_name} to {results_save_path}: {e}")

# --- Save combined results ---
combined_results_path = os.path.join(RESULT_DIR, "combined_dl_pytorch_results.csv")
try:
    results_df.to_csv(combined_results_path, index=False, mode='w+')
    print(f"\nCombined results saved to {combined_results_path}")
except Exception as e:
    print(f"\nError saving combined results to {combined_results_path}: {e}")



===== Overall Deep Learning Results Summary =====
          Dataset                  Model  Accuracy  F1 (Macro)  Precision (Macro)  Recall (Macro)  F1 (Weighted)  Precision (Weighted)  Recall (Weighted)  Train Time (Epoch, s)  Eval Time (s)
0  Financial News  MLP (Avg Learned Emb)    0.6589      0.4333             0.4056          0.4653         0.6170                0.5804             0.6589                 0.3330         0.0840
1  Financial News      RNN (Learned Emb)    0.5942      0.2485             0.1981          0.3333         0.4430                0.3531             0.5942                 2.0420         0.2020
2  Financial News     LSTM (Learned Emb)    0.6630      0.4211             0.4031          0.4478         0.6069                0.5652             0.6630                 5.1800         0.2670
3  Financial News   BiLSTM (Learned Emb)    0.7166      0.6208             0.6583          0.6077         0.7015                0.7106             0.7166                12.3730    