In [1]:
import os
import numpy as np
import pandas as pd
from transformers import AutoTokenizer, AutoModel
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import cohen_kappa_score
from gensim.models import KeyedVectors
from sklearn.metrics import mean_squared_error, f1_score, accuracy_score, confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
import seaborn as sns

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Set CUDA configuration for better debugging
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

In [3]:
# Define Constants
BASE_DIR = '../'  # Root directory for accessing files
DATASET_DIR = os.path.join(BASE_DIR, 'dataset')
SAVE_DIR = os.path.join(BASE_DIR, 'result')
MODEL_NAME = "albert-base-v2"  # ALBERT model identifier
GLOVE_PATH = os.path.join(BASE_DIR, 'word_embeddings/glove.6B.300d.txt')
FASTTEXT_PATH = os.path.join(BASE_DIR, 'word_embeddings/wiki.en.vec')

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # Use GPU if available

# Load Tokenizer and ALBERT model
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
albert_model = AutoModel.from_pretrained(MODEL_NAME).to(device)

# Ensure directories exist
directories = [BASE_DIR, DATASET_DIR, SAVE_DIR, os.path.dirname(GLOVE_PATH), os.path.dirname(FASTTEXT_PATH)]
for directory in directories:
    if not os.path.exists(directory):
        os.makedirs(directory)
        print(f"Directory created: {directory}")
    else:
        print(f"Directory already exists: {directory}")

2024-11-20 16:47:44.551242: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1732106864.565465 1730517 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1732106864.569844 1730517 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-20 16:47:44.583846: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Directory already exists: ../
Directory already exists: ../dataset
Directory already exists: ../result
Directory already exists: ../word_embeddings
Directory already exists: ../word_embeddings


In [4]:
def load_glove_model(glove_file_path):
    """
    Load GloVe embeddings into a dictionary.
    :param glove_file_path: Path to the GloVe embedding file.
    :return: Dictionary with word-to-vector mappings.
    """
    embedding_dict = {}
    with open(glove_file_path, 'r', encoding="utf-8") as f:
        for line in f:
            values = line.split()
            word = values[0]
            vector = torch.tensor(np.asarray(values[1:], dtype='float32'))
            embedding_dict[word] = vector.to(device)
    return embedding_dict

def load_fasttext_model(fasttext_file_path):
    """
    Load FastText embeddings into a dictionary.
    :param fasttext_file_path: Path to the FastText embedding file.
    :return: Dictionary with word-to-vector mappings.
    """
    model = KeyedVectors.load_word2vec_format(fasttext_file_path, binary=False)
    return {word: torch.tensor(model[word]).to(device) for word in model.index_to_key}

# Load embeddings
glove_model = load_glove_model(GLOVE_PATH)
fasttext_model = load_fasttext_model(FASTTEXT_PATH)

In [5]:
# Load and preprocess the dataset
df = pd.read_csv('processed_essay_dataset.csv', sep=',', encoding='ISO-8859-1')
df = df.dropna(subset=['language', 'prompt_adherence', 'narrativity'])  # Ensure all required columns are present
df.fillna(0, inplace=True)

df.head(2)

Unnamed: 0,essay_id,essay_set,essay,essay_type,domain1_score,content,organization,word_choice,sentence_fluency,conventions,language,prompt_adherence,narrativity,style,voice,normalized_score
3573,5978,3,The features of the setting affect the cyclist...,dependent,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,33.333333
3574,5979,3,The features of the setting affected the cycli...,dependent,2.0,3.0,0.0,0.0,0.0,0.0,2.0,3.0,2.0,0.0,0.0,66.666667


In [6]:
len(df)

5303

In [7]:
# Debug: Check unique values
print({
    "Language": df['language'].value_counts().to_dict(),
    "Prompt Adherence": df['prompt_adherence'].value_counts().to_dict(),
    "Narrativity": df['narrativity'].value_counts().to_dict()
})

{'Language': {2.0: 2014, 1.0: 1453, 0.0: 887, 3.0: 821, 4.0: 128}, 'Prompt Adherence': {2.0: 1875, 1.0: 1472, 0.0: 1012, 3.0: 825, 4.0: 119}, 'Narrativity': {2.0: 2117, 1.0: 1421, 0.0: 918, 3.0: 749, 4.0: 98}}


In [8]:
df['language'] = df['language'].astype(int)  # Convert to integers
df['prompt_adherence'] = df['prompt_adherence'].astype(int)
df['narrativity'] = df['narrativity'].astype(int)

In [9]:
df.language.unique()

array([1, 2, 0, 3, 4])

In [10]:
df.prompt_adherence.unique()

array([0, 3, 2, 1, 4])

In [11]:
df.narrativity.unique()

array([1, 2, 0, 3, 4])

In [12]:
class MultiTaskDependent(nn.Module):
    """
    A multitask neural network model for predicting classification scores
    for various essay attributes such as language, prompt adherence, etc.
    """
    def __init__(self, input_shape, num_classes):
        """
        Initialize the MultiTaskDependent model.
        """
        super(MultiTaskDependent, self).__init__()
        # Shared fully connected layers for feature extraction
        self.fc1 = nn.Linear(input_shape, 256)  # First shared dense layer
        self.bn1 = nn.BatchNorm1d(256)  # Batch normalization after the first dense layer
        self.dropout1 = nn.Dropout(0.5)  # Dropout for regularization after the first dense layer
        self.fc2 = nn.Linear(256, 128)  # Second shared dense layer
        self.bn2 = nn.BatchNorm1d(128)  # Batch normalization after the second dense layer
        self.dropout2 = nn.Dropout(0.5)  # Dropout for regularization after the second dense layer

        # Task-specific heads for classification
        self.language_head = nn.Linear(128, num_classes['language'])  # Head for 'language'
        self.prompt_adherence_head = nn.Linear(128, num_classes['prompt_adherence'])  # Head for 'prompt adherence'
        self.narrativity_head = nn.Linear(128, num_classes['narrativity'])  # Head for 'narrativity'

    def forward(self, x):
        """
        Forward pass through the model.
        """
        
        x = torch.relu(self.bn1(self.fc1(x)))
        x = self.dropout1(x)
        x = torch.relu(self.bn2(self.fc2(x)))
        x = self.dropout2(x)

        language_output = self.language_head(x)
        prompt_adherence_output = self.prompt_adherence_head(x)
        narrativity_output = self.narrativity_head(x)

        return language_output, prompt_adherence_output, narrativity_output

    def compute_uncertainty_loss(self, loss_language, loss_prompt_adherence, loss_narrativity):
        """
        Compute dynamically weighted loss using task uncertainty parameters.
        :return: Total weighted loss.
        """
        # Dynamic weighting of losses based on task uncertainty
        language_precision = torch.exp(-self.task_uncertainty[1])  # Precision for language
        prompt_adherence_precision = torch.exp(-self.task_uncertainty[1])  # Precision for word choice
        narrativity_precision = torch.exp(-self.task_uncertainty[1])  # Precision for sentence fluency

        # Weighted loss computation
        loss = (
            language_precision * loss_language + self.task_uncertainty[1] +
            prompt_adherence_precision * loss_prompt_adherence + self.task_uncertainty[1] +
            narrativity_precision * loss_narrativity + self.task_uncertainty[1]
        )
        return loss

    def compute_loss(self, pred_language, pred_prompt_adherence, pred_narrativity,
                     y_language, y_prompt_adherence, y_narrativity):
        """
        Compute the total loss across all tasks using CrossEntropyLoss.
        :return: Combined loss.
        """
        criterion = nn.CrossEntropyLoss()  # Standard cross-entropy loss

        # Compute individual losses for each task
        mse_loss_language = criterion(pred_language, y_language)
        mse_loss_prompt_adherence = criterion(pred_prompt_adherence, y_prompt_adherence)
        mse_loss_narrativity = criterion(pred_narrativity, y_narrativity)

        # Combine losses from all tasks
        total_loss = (
            mse_loss_language + 
            mse_loss_prompt_adherence + 
            mse_loss_narrativity
        )
        
        return total_loss


class LabelSmoothingCrossEntropy(nn.Module):
    """
    Custom loss function that incorporates label smoothing into the standard CrossEntropyLoss.
    """
    def __init__(self, smoothing=0.1):
        super(LabelSmoothingCrossEntropy, self).__init__()
        self.smoothing = smoothing  # Degree of label smoothing

    def forward(self, pred, target):
        """
        Compute the label-smoothed cross-entropy loss.
        :param pred: Predictions (logits) from the model.
        :param target: Ground truth labels.
        :return: Smoothed cross-entropy loss.
        """
        log_probs = F.log_softmax(pred, dim=-1)  # Convert logits to log probabilities

        # Compute negative log likelihood loss
        nll_loss = -log_probs.gather(dim=-1, index=target.unsqueeze(1))
        nll_loss = nll_loss.squeeze(1)  # Remove extra dimension

        # Compute the smoothed loss
        smooth_loss = -log_probs.mean(dim=-1)

        # Combine the two losses
        return (1 - self.smoothing) * nll_loss + self.smoothing * smooth_loss

In [13]:
def get_albert_embedding(text):
    """
    Generate ALBERT embeddings for a given text.

    Args:
        text (str): Input text.

    Returns:
        numpy.ndarray: The embedding vector from ALBERT's last hidden state.
    """
    # Tokenize the input text and send to the device (CPU/GPU)
    inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True, max_length=256).to(device)
    
    # Generate embeddings without computing gradients
    with torch.no_grad():
        outputs = albert_model(**inputs)
    
    # Extract the [CLS] token embedding from the last hidden state
    return outputs.last_hidden_state[:, 0, :].cpu().numpy()

def get_word_embedding(text, embedding_dict):
    """
    Generate word embeddings for a given text using a pre-trained embedding dictionary.

    Args:
        text (str): Input text.
        embedding_dict (dict): Pre-trained word embedding dictionary (e.g., GloVe or FastText).

    Returns:
        numpy.ndarray: The average word embedding vector for the input text.
    """
    # Split text into words and fetch embeddings for each word if available
    words = text.lower().split()
    vectors = [embedding_dict[word] for word in words if word in embedding_dict]
    
    # Compute the average embedding if vectors are found; otherwise return a zero vector
    if vectors:
        return torch.mean(torch.stack(vectors), dim=0).cpu().numpy()
    return np.zeros(300)  # Default to 300 dimensions

def create_attention_based_embedding(albert_emb, additional_emb):
    """
    Create an attention-based fused embedding from ALBERT and additional embeddings.

    Args:
        albert_emb (torch.Tensor): ALBERT embedding vector.
        additional_emb (torch.Tensor): Additional embedding vector (e.g., GloVe or FastText).

    Returns:
        torch.Tensor: Fused embedding based on learned attention weights.
    """
    # Ensure both embeddings have the same shape
    if albert_emb.shape != additional_emb.shape:
        additional_emb = torch.nn.Linear(additional_emb.shape[0], albert_emb.shape[0]).to(albert_emb.device)(additional_emb)
    
    # Combine embeddings into a tensor stack
    combined_emb = torch.cat([albert_emb.unsqueeze(0), additional_emb.unsqueeze(0)], dim=0)
    
    # Learn attention weights dynamically
    attention_weights = torch.nn.Parameter(torch.tensor([0.5, 0.5], device=albert_emb.device), requires_grad=True)
    attention_scores = F.softmax(attention_weights, dim=0)
    
    # Compute the fused embedding as a weighted sum
    fused_embedding = attention_scores[0] * albert_emb + attention_scores[1] * additional_emb
    return fused_embedding

def create_combined_embedding(text, embedding_type=None, _glove_model=None, _fasttext_model=None):
    """
    Generate a combined embedding by fusing ALBERT and an additional embedding (GloVe/FastText).

    Returns:
        tuple: Combined embedding as a numpy array and its size.
    """
    # Get ALBERT embedding
    albert_emb = get_albert_embedding(text).flatten()

    # Get the additional embedding based on the specified type
    if embedding_type == "glove":
        additional_emb = get_word_embedding(text, _glove_model)
    elif embedding_type == "fasttext":
        additional_emb = get_word_embedding(text, _fasttext_model)
    else:
        additional_emb = np.array([])

    # Convert ALBERT embedding to tensor
    albert_emb_tensor = torch.tensor(albert_emb, dtype=torch.float32).to(device)

    # Combine ALBERT and additional embeddings, ensuring equal size
    if additional_emb.size != 0:
        additional_emb_tensor = torch.tensor(additional_emb, dtype=torch.float32).to(device)
        if additional_emb_tensor.size(0) > albert_emb_tensor.size(0):
            additional_emb_tensor = additional_emb_tensor[:albert_emb_tensor.size(0)]
        elif additional_emb_tensor.size(0) < albert_emb_tensor.size(0):
            padding_size = albert_emb_tensor.size(0) - additional_emb_tensor.size(0)
            additional_emb_tensor = F.pad(additional_emb_tensor, (0, padding_size))
        combined_emb = torch.cat([albert_emb_tensor, additional_emb_tensor], dim=0)
    else:
        combined_emb = albert_emb_tensor

    # Return the combined embedding and its size
    return combined_emb.cpu().numpy(), combined_emb.size(0)

In [14]:
def train_and_save_model(X_train_tensor, y_train_language_tensor, 
                         y_train_prompt_adherence_tensor, y_train_narrativity_tensor, 
                         input_shape, save_dir, 
                         embedding_type=None, epochs=10, batch_size=6, learning_rate=1e-4):
    """
    Trains a multi-task classification model and saves its state and metadata.

    Returns:
    - tuple: 
        - model_path (str): Path to the saved model file.
        - train_losses (list): List of average training losses for each epoch.
    """
    # Determine the number of classes for each task based on the label tensors
    num_classes = {
        'language': int(y_train_language_tensor.max().item() + 1),  # Classes for 'language'
        'prompt_adherence': int(y_train_prompt_adherence_tensor.max().item() + 1),  # Classes for 'prompt adherence'
        'narrativity': int(y_train_narrativity_tensor.max().item() + 1),  # Classes for 'narrativity'
    }
    
    print("Number of classes:", num_classes)

    # Initialize the MultiTaskDependent model
    model = MultiTaskDependent(input_shape, num_classes).to(device)

    # Define an Adam optimizer with weight decay for regularization
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-5)
    
    # Prepare the DataLoader for batching and shuffling
    train_loader = DataLoader(
        TensorDataset(
            X_train_tensor,
            y_train_language_tensor, y_train_prompt_adherence_tensor, 
            y_train_narrativity_tensor
        ),
        batch_size=batch_size, shuffle=True  # Shuffle data during training
    )
    
    train_losses = []  # To store average loss per epoch

    # Training loop for multiple epochs
    for epoch in range(epochs):
        model.train()  # Set the model to training mode
        epoch_loss = 0  # Accumulate epoch loss

        # Iterate over each batch in the training DataLoader
        for X_batch, y_language_batch, y_prompt_adherence_batch, y_narrativity_batch in train_loader:
            # Move the data to the appropriate device (CPU/GPU)
            X_batch = X_batch.to(device)
            y_language_batch = y_language_batch.to(device)
            y_prompt_adherence_batch = y_prompt_adherence_batch.to(device)
            y_narrativity_batch = y_narrativity_batch.to(device)
            
            optimizer.zero_grad()  # Clear gradients from the previous step
            
            # Forward pass: Get predictions for each task
            pred_language, pred_prompt_adherence, pred_narrativity = model(X_batch)

            # Compute losses for each task using CrossEntropyLoss
            criterion = nn.CrossEntropyLoss()
            loss_language = criterion(pred_language, y_language_batch.long())
            loss_prompt_adherence = criterion(pred_prompt_adherence, y_prompt_adherence_batch.long())
            loss_narrativity = criterion(pred_narrativity, y_narrativity_batch.long())

            # Combine losses from all tasks
            total_loss = loss_language + loss_prompt_adherence + loss_narrativity
            
            # Backward pass: Compute gradients
            total_loss.backward()

            # Clip gradients to prevent exploding gradients
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=0.5)
            optimizer.step()  # Update the model parameters

            # Accumulate the total loss for the current batch
            epoch_loss += total_loss.item()

        # Compute and store the average loss for the epoch
        avg_epoch_loss = epoch_loss / len(train_loader)
        train_losses.append(avg_epoch_loss)
        print(f"Epoch {epoch + 1}/{epochs}, Total Epoch Loss: {avg_epoch_loss:.4f}")

    # Define filenames for saving the model and embedding size metadata
    model_filename = f"albert_dependent_model_{embedding_type or 'albert'}.pth"  # Model filename
    embedding_size_filename = f"albert_dependent_embedding_size_{embedding_type or 'albert'}.npy"  # Embedding size filename

    # Save the trained model and its metadata
    torch.save({
        'model_state_dict': model.state_dict(),  # Save model weights
        'input_shape': input_shape,             # Save the input shape for reloading the model
        'num_classes': num_classes              # Save number of classes for each task
    }, os.path.join(save_dir, model_filename))

    # Save the input shape as a separate .npy file
    np.save(os.path.join(save_dir, embedding_size_filename), input_shape)

    # Return the path to the saved model and the list of average training losses
    return os.path.join(save_dir, model_filename), train_losses

In [15]:
def evaluate_model(model_path, X_test_tensor, y_test_language_tensor, 
                   y_test_prompt_adherence_tensor, y_test_narrativity_tensor, save_dir, model_name):
    """
    Evaluate the trained MultiTaskDependent model on the test data and compute performance metrics.

    Returns:
    - tuple: Kappa scores for the evaluated tasks:
        - kappa_language (float): Quadratic weighted Kappa for the 'language' task.
        - kappa_prompt_adherence (float): Quadratic weighted Kappa for the 'prompt adherence' task.
        - kappa_narrativity (float): Quadratic weighted Kappa for the 'narrativity' task.
    """
    # Load the model checkpoint
    checkpoint = torch.load(model_path, map_location=device)  # Load the checkpoint onto the appropriate device
    input_shape = checkpoint['input_shape']  # Input shape used during model training
    print(input_shape)  # Debug: Print the input shape
    num_classes = checkpoint['num_classes']  # Number of classes for each task
    print(num_classes)  # Debug: Print the number of classes for each task

    # Initialize the model using the saved configuration
    model = MultiTaskDependent(input_shape, num_classes).to(device)  # Load model onto the device
    model.load_state_dict(checkpoint['model_state_dict'])  # Load the saved model weights
    model.eval()  # Set the model to evaluation mode (disable dropout, etc.)

    # Move test data tensors to the appropriate device
    X_test_tensor = X_test_tensor.to(device)
    y_test_language_tensor = y_test_language_tensor.to(device)
    y_test_prompt_adherence_tensor = y_test_prompt_adherence_tensor.to(device)
    y_test_narrativity_tensor = y_test_narrativity_tensor.to(device)

    with torch.no_grad():  # Disable gradient calculations during evaluation
        # Forward pass: Get predictions from the model
        pred_language, pred_prompt_adherence, pred_narrativity = model(X_test_tensor)

        # Compute Cohen's Kappa score for the attributes
        kappa_language = cohen_kappa_score(y_test_language_tensor.cpu().numpy(),
            np.argmax(pred_language.cpu().numpy(), axis=1), weights='quadratic'
        )
        
        kappa_prompt_adherence = cohen_kappa_score(y_test_prompt_adherence_tensor.cpu().numpy(),
            np.argmax(pred_prompt_adherence.cpu().numpy(), axis=1), weights='quadratic'
        )

        kappa_narrativity = cohen_kappa_score(y_test_narrativity_tensor.cpu().numpy(),
            np.argmax(pred_narrativity.cpu().numpy(), axis=1), weights='quadratic'
        )
        
    # Print evaluation results for debugging or analysis
    print(f"Kappa Language: {kappa_language:.4f}")
    print(f"Kappa Prompt Adherence: {kappa_prompt_adherence:.4f}")
    print(f"Kappa Narrativity: {kappa_narrativity:.4f}")

    return kappa_language, kappa_prompt_adherence, kappa_narrativity


In [16]:
# Main training and evaluation loop
embedding_types = [None, "glove", "fasttext"]  # List of embedding types to evaluate: ALBERT, GloVe, and FastText
all_kappa_scores = []  # Initialize list to store Kappa scores for each model and embedding type

for embedding_type in embedding_types:
    # Set model_name based on the embedding type (default to 'albert' if None is specified)
    model_name = embedding_type or 'albert'

    # Generate combined embeddings for the essays using the current embedding type
    # The embeddings are created by combining ALBERT with optional GloVe or FastText embeddings
    embeddings_and_sizes = df['essay'].apply(
        lambda x: create_combined_embedding(
            x,
            embedding_type=embedding_type,  # Specify the embedding type
            _glove_model=glove_model if embedding_type == "glove" else None,  # Pass GloVe model if applicable
            _fasttext_model=fasttext_model if embedding_type == "fasttext" else None  # Pass FastText model if applicable
        )
    )    
    
    # Split the combined embeddings and their sizes into separate columns
    df['embeddings'], embedding_sizes = zip(*embeddings_and_sizes)

    # Split the dataset into training and testing sets
    # Includes input features (embeddings) and target labels for each task
    X_train, X_test, y_train_language, y_test_language, \
    y_train_prompt_adherence, y_test_prompt_adherence, \
    y_train_narrativity, y_test_narrativity = train_test_split(
        np.stack(df['embeddings'].values),  # Convert embeddings to a stacked NumPy array
        df['language'].values,             # Target labels for 'language'
        df['prompt_adherence'].values,     # Target labels for 'prompt adherence'
        df['narrativity'].values,          # Target labels for 'narrativity'
        test_size=0.2,                     # Use 20% of data for testing
        random_state=42                    # Ensure reproducibility of data splits
    )

    # Convert training data to PyTorch tensors
    X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
    y_train_language_tensor = torch.tensor(y_train_language, dtype=torch.long)
    y_train_prompt_adherence_tensor = torch.tensor(y_train_prompt_adherence, dtype=torch.long)
    y_train_narrativity_tensor = torch.tensor(y_train_narrativity, dtype=torch.long)

    # Convert testing data to PyTorch tensors
    X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
    y_test_language_tensor = torch.tensor(y_test_language, dtype=torch.long)
    y_test_prompt_adherence_tensor = torch.tensor(y_test_prompt_adherence, dtype=torch.long)
    y_test_narrativity_tensor = torch.tensor(y_test_narrativity, dtype=torch.long)

    # Train and save the model for the current embedding type
    model_path, train_losses = train_and_save_model(
        X_train_tensor,                     # Training input features
        y_train_language_tensor,            # Training labels for 'language'
        y_train_prompt_adherence_tensor,    # Training labels for 'prompt adherence'
        y_train_narrativity_tensor,         # Training labels for 'narrativity'
        X_train_tensor.shape[1],            # Input shape (number of features in embeddings)
        SAVE_DIR,                           # Directory to save the trained model
        embedding_type                      # Embedding type (for model naming and metadata)
    )

    # Evaluate the trained model on the test set
    # Compute Kappa scores for all tasks: language, prompt adherence, and narrativity
    kappa_language, kappa_prompt_adherence, kappa_narrativity = evaluate_model(
        model_path,                          # Path to the trained model
        X_test_tensor,                       # Test input features
        y_test_language_tensor,              # Test labels for 'language'
        y_test_prompt_adherence_tensor,      # Test labels for 'prompt adherence'
        y_test_narrativity_tensor,           # Test labels for 'narrativity'
        SAVE_DIR,                            # Directory for additional outputs (if needed)
        model_name                           # Name of the model being evaluated
    )

    # Append the Kappa scores for each task to the results list for comparison
    all_kappa_scores.append([
        kappa_language, 
        kappa_prompt_adherence, 
        kappa_narrativity
    ])

Number of classes: {'language': 5, 'prompt_adherence': 5, 'narrativity': 5}
Epoch 1/10, Total Epoch Loss: 4.1325
Epoch 2/10, Total Epoch Loss: 3.5547
Epoch 3/10, Total Epoch Loss: 3.4231
Epoch 4/10, Total Epoch Loss: 3.3228
Epoch 5/10, Total Epoch Loss: 3.2579
Epoch 6/10, Total Epoch Loss: 3.1994
Epoch 7/10, Total Epoch Loss: 3.1945
Epoch 8/10, Total Epoch Loss: 3.1549
Epoch 9/10, Total Epoch Loss: 3.0842
Epoch 10/10, Total Epoch Loss: 3.0906
768
{'language': 5, 'prompt_adherence': 5, 'narrativity': 5}
Kappa Language: 0.7181
Kappa Prompt Adherence: 0.7259
Kappa Narrativity: 0.6893


  checkpoint = torch.load(model_path, map_location=device)


Number of classes: {'language': 5, 'prompt_adherence': 5, 'narrativity': 5}
Epoch 1/10, Total Epoch Loss: 4.1304
Epoch 2/10, Total Epoch Loss: 3.5630
Epoch 3/10, Total Epoch Loss: 3.3773
Epoch 4/10, Total Epoch Loss: 3.2933
Epoch 5/10, Total Epoch Loss: 3.1959
Epoch 6/10, Total Epoch Loss: 3.1982
Epoch 7/10, Total Epoch Loss: 3.1382
Epoch 8/10, Total Epoch Loss: 3.1475
Epoch 9/10, Total Epoch Loss: 3.1021
Epoch 10/10, Total Epoch Loss: 3.1194
1068
{'language': 5, 'prompt_adherence': 5, 'narrativity': 5}
Kappa Language: 0.7388
Kappa Prompt Adherence: 0.7562
Kappa Narrativity: 0.7212


  checkpoint = torch.load(model_path, map_location=device)


Number of classes: {'language': 5, 'prompt_adherence': 5, 'narrativity': 5}
Epoch 1/10, Total Epoch Loss: 4.1176
Epoch 2/10, Total Epoch Loss: 3.5626
Epoch 3/10, Total Epoch Loss: 3.3746
Epoch 4/10, Total Epoch Loss: 3.2955
Epoch 5/10, Total Epoch Loss: 3.2837
Epoch 6/10, Total Epoch Loss: 3.1598
Epoch 7/10, Total Epoch Loss: 3.1793
Epoch 8/10, Total Epoch Loss: 3.1612
Epoch 9/10, Total Epoch Loss: 3.0859
Epoch 10/10, Total Epoch Loss: 3.0518
1068
{'language': 5, 'prompt_adherence': 5, 'narrativity': 5}
Kappa Language: 0.7319
Kappa Prompt Adherence: 0.7399
Kappa Narrativity: 0.7059


  checkpoint = torch.load(model_path, map_location=device)


In [17]:
content = """
    In “Let there be dark,” Paul Bogard talks about the importance of darkness.
Darkness is essential to humans. Bogard states, “Our bodies need darkness to produce the hormone melatonin, which keeps certain cancers from developing, and our bodies need darkness for sleep, sleep. Sleep disorders have been linked to diabetes, obesity, cardiovascular disease and depression and recent research suggests are main cause of “short sleep” is “long light.” Whether we work at night or simply take our tablets, notebooks and smartphones to bed, there isn’t a place for this much artificial light in our lives.” (Bogard 2). Here, Bogard talks about the importance of darkness to humans. Humans need darkness to sleep in order to be healthy.
Animals also need darkness. Bogard states, “The rest of the world depends on darkness as well, including nocturnal and crepuscular species of birds, insects, mammals, fish and reptiles. Some examples are well known—the 400 species of birds that migrate at night in North America, the sea turtles that come ashore to lay their eggs—and some are not, such as the bats that save American farmers billions in pest control and the moths that pollinate 80% of the world’s flora. Ecological light pollution is like the bulldozer of the night, wrecking habitat and disrupting ecosystems several billion years in the making. Simply put, without darkness, Earth’s ecology would collapse...” (Bogard 2). Here Bogard explains that animals, too, need darkness to survive.
""" 

In [18]:
def testContent(content, embedding_type=None, SAVE_DIR=None, glove_model=None, fasttext_model=None):
    """
    Test the model on a single piece of content by generating predictions for multiple attributes.

    Parameters:
    - content (str): The essay content to evaluate.
    - embedding_type (str, optional): Type of embedding to use ('glove', 'fasttext', or None for ALBERT). Defaults to None.
    - SAVE_DIR (str): Directory where the model and embedding size files are stored.
    - glove_model (dict, optional): Pre-trained GloVe embeddings if 'glove' embedding type is used.
    - fasttext_model (dict, optional): Pre-trained FastText embeddings if 'fasttext' embedding type is used.

    Returns:
    - tuple: Predicted scores for the following attributes:
        - language_score: Predicted score for the 'language' attribute.
        - prompt_adherence_score: Predicted score for the 'word choice' attribute.
        - narrativity_score: Predicted score for the 'sentence fluency' attribute.
    """
    # Generate a combined embedding for the content
    # Combines ALBERT embeddings with optional GloVe or FastText embeddings if specified
    embedding, actual_embedding_size = create_combined_embedding(
        content,
        embedding_type=embedding_type,
        _glove_model=glove_model if embedding_type == "glove" else None,
        _fasttext_model=fasttext_model if embedding_type == "fasttext" else None
    )

    # Convert the embedding to a PyTorch tensor and add a batch dimension for model input
    embedding_tensor = torch.tensor(embedding, dtype=torch.float32).to(device).unsqueeze(0)

    # Define file paths for the model and embedding configuration files
    model_filename = f"albert_dependent_model_{embedding_type or 'albert'}.pth"  # Model filename
    model_path = os.path.join(SAVE_DIR, model_filename)  # Full path to the model file
    
    embedding_size_filename = f"albert_dependent_embedding_size_{embedding_type or 'albert'}.npy"  # Embedding size filename
    embedding_size_path = os.path.join(SAVE_DIR, embedding_size_filename)  # Full path to embedding size file

    # Load the expected embedding size from the configuration file
    expected_embedding_size = int(np.load(embedding_size_path))  # Read and convert to integer

    # Load the model checkpoint containing the weights and configuration
    checkpoint = torch.load(model_path, map_location=device)
    input_shape = checkpoint['input_shape']  # Input size used when training the model
    num_classes = checkpoint['num_classes']  # Number of output classes for each task

    # Initialize the model using the configuration stored in the checkpoint
    model = MultiTaskDependent(input_shape, num_classes).to(device)
    model.load_state_dict(checkpoint['model_state_dict'])  # Load model weights into the initialized model
    model.eval()  # Set the model to evaluation mode to disable dropout and other training behaviors

    # Resize the input embedding if the current size exceeds the expected size
    embedding_resized = embedding_tensor[:, :expected_embedding_size]  # Trim embedding to match model's input size

    # Generate predictions using the trained model
    with torch.no_grad():  # Disable gradient calculations for inference
        # Forward pass through the model
        pred_language, pred_prompt_adherence, pred_narrativity = model(embedding_resized)

        # Extract the predicted class for each attribute (index of the highest probability)
        language_score = torch.argmax(pred_language, dim=1).item() + 1  # Add 1 to convert to 1-based index
        prompt_adherence_score = torch.argmax(pred_prompt_adherence, dim=1).item() + 1  # Add 1 to convert to 1-based index
        narrativity_score = torch.argmax(pred_narrativity, dim=1).item() + 1  # Add 1 to convert to 1-based index

    # Return the predicted scores for the attributes as a tuple
    return language_score, prompt_adherence_score, narrativity_score

In [21]:
"""
Evaluate a sample essay using different embedding types (ALBERT, ALBERT + GloVe, ALBERT + FastText)
and store the results in a dictionary. The evaluation generates scores for the following attributes:
language, Word Choice, Sentence Fluency, and Conventions.

Returns:
    dict: A dictionary with embedding types as keys and scores for each attribute as values.
"""

# Initialize an empty dictionary to store results for each embedding type
results = {}

# Define the embedding types to evaluate
embedding_types = [None, "glove", "fasttext"]

# Loop through each embedding type
for embedding_type in embedding_types:
    # Set a readable name for the embedding type
    if embedding_type is None:
        embedding_type_name = "ALBERT"
    elif embedding_type == "glove":
        embedding_type_name = "ALBERT + GloVe"
    elif embedding_type == "fasttext":
        embedding_type_name = "ALBERT + FastText"

    # Generate predictions for the sample content using the current embedding type
    language_score, prompt_adherence_score, narrativity_score = testContent(
        content=content,                # Content to evaluate
        embedding_type=embedding_type,  # Current embedding type (None, GloVe, or FastText)
        SAVE_DIR=SAVE_DIR,              # Directory containing saved model files
        glove_model=glove_model,        # Preloaded GloVe embeddings (if applicable)
        fasttext_model=fasttext_model   # Preloaded FastText embeddings (if applicable)
    )

    # Store the results for the current embedding type
    try:
        results[embedding_type_name] = {
            "Language Score": float(language_score),  # Convert to float for consistency
            "Prompt Adherence Score": float(prompt_adherence_score),
            "Narrativity Score": float(narrativity_score),
        }
    except ValueError:
        # Handle any potential issues in conversion
        print(f"Error: Unable to convert one or more values to float for embedding type: {embedding_type_name}")
        print(f"Values: {language_score}, {prompt_adherence_score}, {narrativity_score}")

# Display the results for each embedding type
for embedding_name, result in results.items():
    print(f"Sample Essay Scores for {embedding_name}:")
    print(f"  Language Score: {result['Language Score']:.2f}")  # Format to 2 decimal places
    print(f"  Prompt Adherence Score: {result['Prompt Adherence Score']:.2f}")
    print(f"  Narrativity Score: {result['Narrativity Score']:.2f}")

Sample Essay Scores for ALBERT:
  Language Score: 4.00
  Prompt Adherence Score: 4.00
  Narrativity Score: 4.00
Sample Essay Scores for ALBERT + GloVe:
  Language Score: 4.00
  Prompt Adherence Score: 4.00
  Narrativity Score: 4.00
Sample Essay Scores for ALBERT + FastText:
  Language Score: 4.00
  Prompt Adherence Score: 4.00
  Narrativity Score: 4.00


  checkpoint = torch.load(model_path, map_location=device)
