In [1]:
import os
import numpy as np
import pandas as pd
from transformers import AutoTokenizer, AutoModel
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import cohen_kappa_score
from gensim.models import KeyedVectors
from sklearn.metrics import mean_squared_error, f1_score, accuracy_score, confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
import seaborn as sns

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Set CUDA configuration for better debugging
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

In [3]:
# Define Constants
BASE_DIR = '../'  # Root directory for accessing files
DATASET_DIR = os.path.join(BASE_DIR, 'dataset')
SAVE_DIR = os.path.join(BASE_DIR, 'result')
MODEL_NAME = "albert-base-v2"  # ALBERT model identifier
GLOVE_PATH = os.path.join(BASE_DIR, 'word_embeddings/glove.6B.300d.txt')
FASTTEXT_PATH = os.path.join(BASE_DIR, 'word_embeddings/wiki.en.vec')

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # Use GPU if available

# Load Tokenizer and ALBERT model
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
albert_model = AutoModel.from_pretrained(MODEL_NAME).to(device)

# Ensure directories exist
directories = [BASE_DIR, DATASET_DIR, SAVE_DIR, os.path.dirname(GLOVE_PATH), os.path.dirname(FASTTEXT_PATH)]
for directory in directories:
    if not os.path.exists(directory):
        os.makedirs(directory)
        print(f"Directory created: {directory}")
    else:
        print(f"Directory already exists: {directory}")

2024-11-20 21:22:10.988741: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-11-20 21:22:11.003214: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1732123331.020759 1971534 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1732123331.025996 1971534 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-20 21:22:11.043671: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr

Directory already exists: ../
Directory already exists: ../dataset
Directory already exists: ../result
Directory already exists: ../word_embeddings
Directory already exists: ../word_embeddings


In [4]:
def load_glove_model(glove_file_path):
    """
    Load GloVe embeddings into a dictionary.
    :param glove_file_path: Path to the GloVe embedding file.
    :return: Dictionary with word-to-vector mappings.
    """
    embedding_dict = {}
    with open(glove_file_path, 'r', encoding="utf-8") as f:
        for line in f:
            values = line.split()
            word = values[0]
            vector = torch.tensor(np.asarray(values[1:], dtype='float32'))
            embedding_dict[word] = vector.to(device)
    return embedding_dict

def load_fasttext_model(fasttext_file_path):
    """
    Load FastText embeddings into a dictionary.
    :param fasttext_file_path: Path to the FastText embedding file.
    :return: Dictionary with word-to-vector mappings.
    """
    model = KeyedVectors.load_word2vec_format(fasttext_file_path, binary=False)
    return {word: torch.tensor(model[word]).to(device) for word in model.index_to_key}

# Load embeddings
glove_model = load_glove_model(GLOVE_PATH)
fasttext_model = load_fasttext_model(FASTTEXT_PATH)

In [5]:
# Load and preprocess the dataset
df = pd.read_csv('processed_essay_dataset.csv', sep=',', encoding='ISO-8859-1')
df = df.dropna(subset=['organization', 'word_choice', 'sentence_fluency', 'conventions'])  # Ensure all required columns are present
df.fillna(0, inplace=True)

df.head(2)

Unnamed: 0,essay_id,essay_set,essay,essay_type,domain1_score,content,organization,word_choice,sentence_fluency,conventions,language,prompt_adherence,narrativity,style,voice,normalized_score
0,1,1,"Dear local newspaper, I think effects computer...",argumentative,8.0,4.0,3.0,3.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,60.0
1,2,1,"Dear @CAPS1 @CAPS2, I believe that using compu...",argumentative,9.0,4.0,4.0,4.0,3.0,4.0,0.0,0.0,0.0,0.0,0.0,70.0


In [6]:
len(df)

4296

In [7]:
# Count unique values for each feature
organization_counts = df['organization'].value_counts()
word_choice_counts = df['word_choice'].value_counts()
sentence_fluency_counts = df['sentence_fluency'].value_counts()
conventions_counts = df['conventions'].value_counts()

# Display the counts
{
    "Organization": organization_counts.to_dict(),
    "Word Choice": word_choice_counts.to_dict(),
    "Sentence Fluency": sentence_fluency_counts.to_dict(),
    "Conventions": conventions_counts.to_dict()
}

{'Organization': {3.0: 1198,
  4.0: 1068,
  2.0: 578,
  5.0: 548,
  8.0: 221,
  6.0: 171,
  1.0: 154,
  7.0: 135,
  9.0: 87,
  12.0: 69,
  10.0: 29,
  11.0: 19,
  13.0: 11,
  14.0: 3,
  15.0: 3,
  16.0: 2},
 'Word Choice': {3.0: 1283,
  4.0: 1069,
  2.0: 533,
  5.0: 479,
  8.0: 289,
  1.0: 145,
  6.0: 141,
  7.0: 127,
  9.0: 79,
  12.0: 73,
  10.0: 39,
  11.0: 15,
  13.0: 12,
  15.0: 7,
  14.0: 4,
  16.0: 1},
 'Sentence Fluency': {4.0: 1253,
  3.0: 1224,
  5.0: 574,
  2.0: 371,
  8.0: 222,
  6.0: 170,
  7.0: 149,
  1.0: 112,
  9.0: 85,
  12.0: 74,
  10.0: 28,
  11.0: 18,
  14.0: 6,
  13.0: 6,
  15.0: 4},
 'Conventions': {4.0: 1195,
  3.0: 1190,
  5.0: 529,
  2.0: 500,
  6.0: 235,
  8.0: 182,
  1.0: 143,
  7.0: 131,
  9.0: 71,
  10.0: 37,
  12.0: 36,
  11.0: 34,
  13.0: 8,
  14.0: 4,
  15.0: 1}}

In [8]:
class MultiTaskArgumentative(nn.Module):
    """
    A multitask neural network model for predicting classification scores
    for various essay attributes such as organization, word choice, etc.
    """
    def __init__(self, input_shape, num_classes):
        """
        Initialize the MultiTaskArgumentative model.
        """

        super(MultiTaskArgumentative, self).__init__()
        # Shared fully connected layers
        self.fc1 = nn.Linear(input_shape, 256)
        self.bn1 = nn.BatchNorm1d(256)
        self.dropout1 = nn.Dropout(0.5)
        self.fc2 = nn.Linear(256, 128)
        self.bn2 = nn.BatchNorm1d(128)
        self.dropout2 = nn.Dropout(0.5)

        # Task-specific classification heads
        self.organization_head = nn.Linear(128, num_classes['organization'])
        self.word_choice_head = nn.Linear(128, num_classes['word_choice'])
        self.sentence_fluency_head = nn.Linear(128, num_classes['sentence_fluency'])
        self.conventions_head = nn.Linear(128, num_classes['conventions'])

        # Task uncertainty parameters for dynamic loss weighting
        self.task_uncertainty = nn.Parameter(torch.tensor([0.0, 0.0]), requires_grad=True)        

    def forward(self, x):
        """
        Forward pass through the shared and task-specific layers.
        :param x: Input tensor
        :return: Outputs for each task
        """
        x = torch.relu(self.bn1(self.fc1(x)))
        x = self.dropout1(x)
        x = torch.relu(self.bn2(self.fc2(x)))
        x = self.dropout2(x)

        # Output for each feature
        organization_output = self.organization_head(x)
        word_choice_output = self.word_choice_head(x)
        sentence_fluency_output = self.sentence_fluency_head(x)
        conventions_output = self.conventions_head(x)

        return organization_output, word_choice_output, sentence_fluency_output, conventions_output

    def compute_uncertainty_loss(self, loss_organization, loss_word_choice, loss_sentence_fluency, loss_conventions):
        """
        Compute dynamically weighted loss using task uncertainty parameters.
        :return: Total weighted loss
        """
        organization_precision = torch.exp(-self.task_uncertainty[1])
        word_choice_precision = torch.exp(-self.task_uncertainty[1])
        sentence_fluency_precision = torch.exp(-self.task_uncertainty[1])
        conventions_precision = torch.exp(-self.task_uncertainty[1])

        # Weighted loss calculation
        loss = (organization_precision * loss_organization + self.task_uncertainty[1]) + \
                (word_choice_precision * loss_word_choice + self.task_uncertainty[1]) + \
                (sentence_fluency_precision * loss_sentence_fluency + self.task_uncertainty[1]) + \
                (conventions_precision * loss_conventions + self.task_uncertainty[1])
        
        return loss

    def compute_loss(self, pred_organization, pred_word_choice, pred_sentence_fluency, pred_conventions,
                        y_organization, y_word_choice, y_sentence_fluency, y_conventions) :

        """
        Compute total loss across all tasks.
        :return: Combined loss
        """
        criterion = nn.CrossEntropyLoss()
        mse_loss_organization = criterion(pred_organization, y_organization)
        mse_loss_word_choice = criterion(pred_word_choice, y_word_choice)
        mse_loss_sentence_fluency = criterion(pred_sentence_fluency, y_sentence_fluency)
        mse_loss_conventions = criterion(pred_conventions, y_conventions)

        total_loss = mse_loss_organization + mse_loss_word_choice + mse_loss_sentence_fluency + mse_loss_conventions
        
        return total_loss

class LabelSmoothingCrossEntropy(nn.Module):
    """
    Custom loss function that incorporates label smoothing into the standard CrossEntropyLoss.
    """
    def __init__(self, smoothing=0.1):
        super(LabelSmoothingCrossEntropy, self).__init__()
        self.smoothing = smoothing

    def forward(self, pred, target):
        """
        Compute the label-smoothed cross-entropy loss.
        :return: compute loss
        """

        log_probs = F.log_softmax(pred, dim=-1)
        nll_loss = -log_probs.gather(dim=-1, index=target.unsqueeze(1))
        nll_loss = nll_loss.squeeze(1)
        smooth_loss = -log_probs.mean(dim=-1)
        return (1 - self.smoothing) * nll_loss + self.smoothing * smooth_loss

In [9]:
def get_albert_embedding(text):
    """
    Generate ALBERT embeddings for a given text.

    Args:
        text (str): Input text.

    Returns:
        numpy.ndarray: The embedding vector from ALBERT's last hidden state.
    """
    # Tokenize the input text and send to the device (CPU/GPU)
    inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True, max_length=256).to(device)
    
    # Generate embeddings without computing gradients
    with torch.no_grad():
        outputs = albert_model(**inputs)
    
    # Extract the [CLS] token embedding from the last hidden state
    return outputs.last_hidden_state[:, 0, :].cpu().numpy()

def get_word_embedding(text, embedding_dict):
    """
    Generate word embeddings for a given text using a pre-trained embedding dictionary.

    Args:
        text (str): Input text.
        embedding_dict (dict): Pre-trained word embedding dictionary (e.g., GloVe or FastText).

    Returns:
        numpy.ndarray: The average word embedding vector for the input text.
    """
    # Split text into words and fetch embeddings for each word if available
    words = text.lower().split()
    vectors = [embedding_dict[word] for word in words if word in embedding_dict]
    
    # Compute the average embedding if vectors are found; otherwise return a zero vector
    if vectors:
        return torch.mean(torch.stack(vectors), dim=0).cpu().numpy()
    return np.zeros(300)  # Default to 300 dimensions

def create_attention_based_embedding(albert_emb, additional_emb):
    """
    Create an attention-based fused embedding from ALBERT and additional embeddings.

    Args:
        albert_emb (torch.Tensor): ALBERT embedding vector.
        additional_emb (torch.Tensor): Additional embedding vector (e.g., GloVe or FastText).

    Returns:
        torch.Tensor: Fused embedding based on learned attention weights.
    """
    # Ensure both embeddings have the same shape
    if albert_emb.shape != additional_emb.shape:
        additional_emb = torch.nn.Linear(additional_emb.shape[0], albert_emb.shape[0]).to(albert_emb.device)(additional_emb)
    
    # Combine embeddings into a tensor stack
    combined_emb = torch.cat([albert_emb.unsqueeze(0), additional_emb.unsqueeze(0)], dim=0)
    
    # Learn attention weights dynamically
    attention_weights = torch.nn.Parameter(torch.tensor([0.5, 0.5], device=albert_emb.device), requires_grad=True)
    attention_scores = F.softmax(attention_weights, dim=0)
    
    # Compute the fused embedding as a weighted sum
    fused_embedding = attention_scores[0] * albert_emb + attention_scores[1] * additional_emb
    return fused_embedding


def create_combined_embedding(text, embedding_type=None, _glove_model=None, _fasttext_model=None):
    """
    Generate a combined embedding by fusing ALBERT and an additional embedding (GloVe/FastText).

    Returns:
        tuple: Combined embedding as a numpy array and its size.
    """
    # Get ALBERT embedding
    albert_emb = get_albert_embedding(text).flatten()

    # Get the additional embedding based on the specified type
    if embedding_type == "glove":
        additional_emb = get_word_embedding(text, _glove_model)
    elif embedding_type == "fasttext":
        additional_emb = get_word_embedding(text, _fasttext_model)
    else:
        additional_emb = np.array([])

    # Convert ALBERT embedding to tensor
    albert_emb_tensor = torch.tensor(albert_emb, dtype=torch.float32).to(device)

    # Combine ALBERT and additional embeddings, ensuring equal size
    if additional_emb.size != 0:
        additional_emb_tensor = torch.tensor(additional_emb, dtype=torch.float32).to(device)
        if additional_emb_tensor.size(0) > albert_emb_tensor.size(0):
            additional_emb_tensor = additional_emb_tensor[:albert_emb_tensor.size(0)]
        elif additional_emb_tensor.size(0) < albert_emb_tensor.size(0):
            padding_size = albert_emb_tensor.size(0) - additional_emb_tensor.size(0)
            additional_emb_tensor = F.pad(additional_emb_tensor, (0, padding_size))
        combined_emb = torch.cat([albert_emb_tensor, additional_emb_tensor], dim=0)
    else:
        combined_emb = albert_emb_tensor

    # Return the combined embedding and its size
    return combined_emb.cpu().numpy(), combined_emb.size(0)

In [10]:
def train_and_save_model(X_train_tensor, y_train_organization_tensor, 
                         y_train_word_choice_tensor, y_train_sentence_fluency_tensor, 
                         y_train_conventions_tensor, input_shape, save_dir, 
                         embedding_type=None, epochs=10, batch_size=6, learning_rate=1e-4):
    """
    Train a multi-task model for argumentative writing assessment and save the trained model and metadata.

    Returns:
        - model_file_path (str): Path to the saved model file.
        - train_losses (list): List of average training losses per epoch.
    """

    # Determine the number of classes for each task
    num_classes = {
        'organization': int(y_train_organization_tensor.max().item() + 1),  # Classes for 'organization'
        'word_choice': int(y_train_word_choice_tensor.max().item() + 1),    # Classes for 'word choice'
        'sentence_fluency': int(y_train_sentence_fluency_tensor.max().item() + 1),  # Classes for 'sentence fluency'
        'conventions': int(y_train_conventions_tensor.max().item() + 1),  # Classes for 'conventions'
    }
    
    print("Number of classes:", num_classes)

    # Initialize the multi-task model and optimizer
    model = MultiTaskArgumentative(input_shape, num_classes).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-5)
    
    # Create a DataLoader for batch training
    train_loader = DataLoader(TensorDataset(
            X_train_tensor,
            y_train_organization_tensor, y_train_word_choice_tensor, 
            y_train_sentence_fluency_tensor, y_train_conventions_tensor), 
            batch_size=batch_size, shuffle=True
        )
    
    train_losses = []  # Initialize to store the loss for each epoch

    for epoch in range(epochs):  # Loop over epochs
        model.train()  # Set model to training mode
        epoch_loss = 0  # Initialize cumulative loss for the epoch

        # Iterate over batches in the training DataLoader
        for X_batch, y_organization_batch, y_word_choice_batch, y_sentence_fluency_batch, y_conventions_batch in train_loader:
            # Move data to the specified device (e.g., GPU or CPU)
            X_batch = X_batch.to(device)
            y_organization_batch = y_organization_batch.to(device)
            y_word_choice_batch = y_word_choice_batch.to(device)
            y_sentence_fluency_batch = y_sentence_fluency_batch.to(device)
            y_conventions_batch = y_conventions_batch.to(device)
            
            optimizer.zero_grad() # Clear previous gradients
            
            # Forward pass: Get model predictions for each task
            pred_organization, pred_word_choice, pred_sentence_fluency, pred_conventions = model(X_batch)

            # Compute the loss for each task using CrossEntropyLoss
            criterion = nn.CrossEntropyLoss()
            loss_organization = criterion(pred_organization, y_organization_batch.long())
            loss_word_choice = criterion(pred_word_choice, y_word_choice_batch.long())
            loss_sentence_fluency = criterion(pred_sentence_fluency, y_sentence_fluency_batch.long())
            loss_conventions = criterion(pred_conventions, y_conventions_batch.long())

            # Compute the total loss (sum of all task losses)
            total_loss = loss_organization + loss_word_choice + loss_sentence_fluency + loss_conventions
            
            total_loss.backward()  # Backward pass: Compute gradients
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=0.5)  # Gradient clipping
            optimizer.step()  # Update model parameters
            epoch_loss += total_loss.item()  # Accumulate total loss for the epoch

        # Compute the average loss for the epoch
        avg_epoch_loss = epoch_loss / len(train_loader)
        train_losses.append(avg_epoch_loss)  # Append the average epoch loss to train_losses
        print(f"Epoch {epoch + 1}/{epochs}, Total Epoch Loss: {avg_epoch_loss:.4f}")

    # Save the trained model and embedding size
    model_filename = f"albert_argumentative_model_{embedding_type or 'albert'}.pth"  # Model filename
    embedding_size_filename = f"albert_argumentative_embedding_size_{embedding_type or 'albert'}.npy"  # Embedding size filename
    
    torch.save({
        'model_state_dict': model.state_dict(),  # Save model weights
        'input_shape': input_shape,             # Save input shape for reloading the model
        'num_classes': num_classes              # Save number of classes for each task
    }, os.path.join(save_dir, model_filename))    
    np.save(os.path.join(save_dir, embedding_size_filename), input_shape)  # Save embedding size
    
    return os.path.join(save_dir, model_filename), train_losses  # Return model file path and training losses

In [11]:
def evaluate_model(model_path, y_test_organization, y_test_word_choice, y_test_sentence_fluency,
                   y_test_conventions, save_dir, model_name):
    """
    Evaluate the trained MultiTaskArgumentative model on test data and calculate performance metrics.

    Returns:
        - Tuple of kappa scores for all tasks:
            (kappa_organization, kappa_word_choice, kappa_sentence_fluency, kappa_conventions).
    """

    # Load the saved model checkpoint
    checkpoint = torch.load(model_path, map_location=device)  # Load the checkpoint on the specified device
    input_shape = checkpoint['input_shape']  # Retrieve input shape from the checkpoint
    print(input_shape)  # Debug: Print input shape
    num_classes = checkpoint['num_classes']  # Retrieve number of classes for each task
    print(num_classes)  # Debug: Print number of classes

    # Reinitialize the model and load the saved state
    model = MultiTaskArgumentative(input_shape, num_classes).to(device)  # Initialize model with input shape and class info
    model.load_state_dict(checkpoint['model_state_dict'])  # Load model weights
    model.eval()  # Set the model to evaluation mode

    # Move the test labels to the correct device
    y_test_organization = y_test_organization.to(device)
    y_test_word_choice = y_test_word_choice.to(device)
    y_test_sentence_fluency = y_test_sentence_fluency.to(device)
    y_test_conventions = y_test_conventions.to(device)

    with torch.no_grad():  # Disable gradient calculation for evaluation
        # Get model predictions on test data
        pred_organization, pred_word_choice, pred_sentence_fluency, pred_conventions = model(X_test_tensor.to(device))

        # Compute Cohen's Kappa scores for each task using quadratic weighting
        kappa_organization = cohen_kappa_score(y_test_organization.cpu().numpy(),
            np.argmax(pred_organization.cpu().numpy(), axis=1).astype(int), weights='quadratic'
        )
        kappa_word_choice = cohen_kappa_score(y_test_word_choice.cpu().numpy(),
            np.argmax(pred_word_choice.cpu().numpy(), axis=1).astype(int), weights='quadratic'
        )
        kappa_sentence_fluency = cohen_kappa_score(y_test_sentence_fluency.cpu().numpy(),
            np.argmax(pred_sentence_fluency.cpu().numpy(), axis=1).astype(int), weights='quadratic'
        )
        kappa_conventions = cohen_kappa_score(y_test_conventions.cpu().numpy(),
            np.argmax(pred_conventions.cpu().numpy(), axis=1).astype(int), weights='quadratic'
        )

        # Print evaluation results for all tasks
        print(f"Kappa for Organization: {kappa_organization:.5f}")
        print(f"Kappa for Word Choice: {kappa_word_choice:.5f}")
        print(f"Kappa for Sentence Fluency: {kappa_sentence_fluency:.5f}")
        print(f"Kappa for Conventions: {kappa_conventions:.5f}")

        return kappa_organization, kappa_word_choice, kappa_sentence_fluency, kappa_conventions

In [12]:
# Define embedding types to evaluate
embedding_types = [None, "glove", "fasttext"]
all_kappa_scores = []  # List to store Kappa scores for all models and embedding types

for embedding_type in embedding_types:
    # Set model name based on the embedding type for clear tracking in logs and saved files
    model_name = embedding_type or 'albert'  # Default to 'albert' if embedding_type is None
    
    # Generate embeddings for each essay in the dataset using the specified embedding type
    embeddings_and_sizes = df['essay'].apply(lambda x: create_combined_embedding(x, embedding_type, glove_model, fasttext_model))
    df['embeddings'], embedding_sizes = zip(*embeddings_and_sizes)  # Split embeddings and their sizes
    
    embedding_sizes = np.array(embedding_sizes)  # Convert sizes to a NumPy array for consistency

    # Split dataset into training and testing sets
    X_train, X_test, y_train_organization, y_test_organization, \
    y_train_word_choice, y_test_word_choice, y_train_sentence_fluency, y_test_sentence_fluency, \
    y_train_conventions, y_test_conventions = train_test_split(
        np.stack(df['embeddings'].values),  # Stack embeddings into a NumPy array
        df['organization'].values,         # Target labels for 'organization'
        df['word_choice'].values,          # Target labels for 'word choice'
        df['sentence_fluency'].values,     # Target labels for 'sentence fluency'
        df['conventions'].values,          # Target labels for 'conventions'
        test_size=0.2,                     # Use 20% of data for testing
        random_state=42                    # Ensure reproducibility
    )

    # Convert data into PyTorch tensors for training and testing
    X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
    y_train_organization_tensor = torch.tensor(y_train_organization, dtype=torch.long) - 1  # Adjust to 0-based index
    y_train_word_choice_tensor = torch.tensor(y_train_word_choice, dtype=torch.long) - 1
    y_train_sentence_fluency_tensor = torch.tensor(y_train_sentence_fluency, dtype=torch.long) - 1
    y_train_conventions_tensor = torch.tensor(y_train_conventions, dtype=torch.long) - 1

    X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
    y_test_organization_tensor = torch.tensor(y_test_organization, dtype=torch.long) - 1
    y_test_word_choice_tensor = torch.tensor(y_test_word_choice, dtype=torch.long) - 1
    y_test_sentence_fluency_tensor = torch.tensor(y_test_sentence_fluency, dtype=torch.long) - 1
    y_test_conventions_tensor = torch.tensor(y_test_conventions, dtype=torch.long) - 1

    # Train the model for the current embedding type and save the trained model
    print(f"\nTraining model for embedding type: {model_name}")
    model_path, _ = train_and_save_model(
        X_train_tensor,
        y_train_organization_tensor,
        y_train_word_choice_tensor,
        y_train_sentence_fluency_tensor,
        y_train_conventions_tensor,
        input_shape=X_train_tensor.shape[1],  # Input shape derived from embedding size
        save_dir=SAVE_DIR,                    # Directory to save the trained model
        embedding_type=embedding_type,        # Specify embedding type for naming
        epochs=10,                            # Number of training epochs
        batch_size=6,                         # Batch size for training
        learning_rate=1e-3                    # Learning rate for the optimizer
    )

    # Evaluate the trained model on the test set and calculate Kappa scores
    kappa_organization, kappa_word_choice, kappa_sentence_fluency, kappa_conventions = evaluate_model(
        model_path,
        y_test_organization_tensor, y_test_word_choice_tensor, 
        y_test_sentence_fluency_tensor, y_test_conventions_tensor, SAVE_DIR, model_name
    )

    # Append the Kappa scores for the current model to the tracking list
    all_kappa_scores.append([
        kappa_organization, 
        kappa_word_choice, 
        kappa_sentence_fluency, 
        kappa_conventions
    ])


Training model for embedding type: albert
Number of classes: {'organization': 16, 'word_choice': 16, 'sentence_fluency': 15, 'conventions': 14}
Epoch 1/10, Total Epoch Loss: 6.8648
Epoch 2/10, Total Epoch Loss: 6.0153
Epoch 3/10, Total Epoch Loss: 5.8470
Epoch 4/10, Total Epoch Loss: 5.8003
Epoch 5/10, Total Epoch Loss: 5.6624
Epoch 6/10, Total Epoch Loss: 5.6054
Epoch 7/10, Total Epoch Loss: 5.5796
Epoch 8/10, Total Epoch Loss: 5.4750
Epoch 9/10, Total Epoch Loss: 5.5534
Epoch 10/10, Total Epoch Loss: 5.4468
768
{'organization': 16, 'word_choice': 16, 'sentence_fluency': 15, 'conventions': 14}
Kappa for Organization: 0.81912
Kappa for Word Choice: 0.83570
Kappa for Sentence Fluency: 0.81907
Kappa for Conventions: 0.80901


  checkpoint = torch.load(model_path, map_location=device)



Training model for embedding type: glove
Number of classes: {'organization': 16, 'word_choice': 16, 'sentence_fluency': 15, 'conventions': 14}
Epoch 1/10, Total Epoch Loss: 6.8878
Epoch 2/10, Total Epoch Loss: 5.9994
Epoch 3/10, Total Epoch Loss: 5.8643
Epoch 4/10, Total Epoch Loss: 5.7843
Epoch 5/10, Total Epoch Loss: 5.6777
Epoch 6/10, Total Epoch Loss: 5.6068
Epoch 7/10, Total Epoch Loss: 5.5005
Epoch 8/10, Total Epoch Loss: 5.6156
Epoch 9/10, Total Epoch Loss: 5.5211
Epoch 10/10, Total Epoch Loss: 5.4536
1536
{'organization': 16, 'word_choice': 16, 'sentence_fluency': 15, 'conventions': 14}
Kappa for Organization: 0.82719
Kappa for Word Choice: 0.84765
Kappa for Sentence Fluency: 0.83533
Kappa for Conventions: 0.82068


  checkpoint = torch.load(model_path, map_location=device)



Training model for embedding type: fasttext
Number of classes: {'organization': 16, 'word_choice': 16, 'sentence_fluency': 15, 'conventions': 14}
Epoch 1/10, Total Epoch Loss: 6.8836
Epoch 2/10, Total Epoch Loss: 6.0192
Epoch 3/10, Total Epoch Loss: 5.8450
Epoch 4/10, Total Epoch Loss: 5.7370
Epoch 5/10, Total Epoch Loss: 5.6703
Epoch 6/10, Total Epoch Loss: 5.6177
Epoch 7/10, Total Epoch Loss: 5.5794
Epoch 8/10, Total Epoch Loss: 5.5525
Epoch 9/10, Total Epoch Loss: 5.4643
Epoch 10/10, Total Epoch Loss: 5.3614
1536
{'organization': 16, 'word_choice': 16, 'sentence_fluency': 15, 'conventions': 14}
Kappa for Organization: 0.82042
Kappa for Word Choice: 0.84034
Kappa for Sentence Fluency: 0.83988
Kappa for Conventions: 0.83799


  checkpoint = torch.load(model_path, map_location=device)


In [13]:
content = """
    In “Let there be dark,” Paul Bogard talks about the importance of darkness.
Darkness is essential to humans. Bogard states, “Our bodies need darkness to produce the hormone melatonin, which keeps certain cancers from developing, and our bodies need darkness for sleep, sleep. Sleep disorders have been linked to diabetes, obesity, cardiovascular disease and depression and recent research suggests are main cause of “short sleep” is “long light.” Whether we work at night or simply take our tablets, notebooks and smartphones to bed, there isn’t a place for this much artificial light in our lives.” (Bogard 2). Here, Bogard talks about the importance of darkness to humans. Humans need darkness to sleep in order to be healthy.
Animals also need darkness. Bogard states, “The rest of the world depends on darkness as well, including nocturnal and crepuscular species of birds, insects, mammals, fish and reptiles. Some examples are well known—the 400 species of birds that migrate at night in North America, the sea turtles that come ashore to lay their eggs—and some are not, such as the bats that save American farmers billions in pest control and the moths that pollinate 80% of the world’s flora. Ecological light pollution is like the bulldozer of the night, wrecking habitat and disrupting ecosystems several billion years in the making. Simply put, without darkness, Earth’s ecology would collapse...” (Bogard 2). Here Bogard explains that animals, too, need darkness to survive.
""" 

In [14]:
def testContent(content, embedding_type=None, SAVE_DIR=None, glove_model=None, fasttext_model=None):
    """
    Test the model on a single piece of content by generating predictions for multiple attributes.

    Returns:
        - tuple: Predicted scores for the following attributes:
        (organization_score, word_choice_score, sentence_fluency_score, conventions_score)
        where each score is an integer representing the predicted class.
    """
    # Generate a combined embedding for the given content
    embedding, actual_embedding_size = create_combined_embedding(
        content,
        embedding_type=embedding_type,
        _glove_model=glove_model if embedding_type == "glove" else None,
        _fasttext_model=fasttext_model if embedding_type == "fasttext" else None
    )

    # Convert the embedding to a PyTorch tensor and add a batch dimension
    embedding_tensor = torch.tensor(embedding, dtype=torch.float32).to(device).unsqueeze(0)

    # Define file paths for the saved model and embedding size metadata
    embedding_size_filename = f"albert_argumentative_embedding_size_{embedding_type or 'albert'}.npy"
    model_filename = f"albert_argumentative_model_{embedding_type or 'albert'}.pth"
    model_path = os.path.join(SAVE_DIR, model_filename)
    embedding_size_path = os.path.join(SAVE_DIR, embedding_size_filename)

    # Load the expected embedding size
    expected_embedding_size = int(np.load(embedding_size_path))

    # Load the model checkpoint
    checkpoint = torch.load(model_path, map_location=device)
    input_shape = checkpoint['input_shape']  # Input size of the model
    num_classes = checkpoint['num_classes']  # Number of classes for each task

    # Initialize the model and load the saved weights
    model = MultiTaskArgumentative(input_shape, num_classes).to(device)
    model.load_state_dict(checkpoint['model_state_dict'])
    model.eval()  # Set the model to evaluation mode

    # Resize the embedding if its size exceeds the expected size
    embedding_resized = embedding_tensor[:, :expected_embedding_size]

    # Generate predictions for all attributes
    with torch.no_grad():  # Disable gradient calculation for inference
        pred_organization, pred_word_choice, pred_sentence_fluency, pred_conventions = model(embedding_resized)

        # Extract predicted class indices (highest probability) and adjust to 1-based indexing
        organization_score = torch.argmax(pred_organization, dim=1).item() + 1
        word_choice_score = torch.argmax(pred_word_choice, dim=1).item() + 1
        sentence_fluency_score = torch.argmax(pred_sentence_fluency, dim=1).item() + 1
        conventions_score = torch.argmax(pred_conventions, dim=1).item() + 1

    # Return predictions as a tuple of scores
    return organization_score, word_choice_score, sentence_fluency_score, conventions_score

In [15]:
"""
Evaluate a sample essay using different embedding types (ALBERT, ALBERT + GloVe, ALBERT + FastText)
and store the results in a dictionary. The evaluation generates scores for the following attributes:
- Organization
- Word Choice
- Sentence Fluency
- Conventions

Returns:
    dict: A dictionary with embedding types as keys and scores for each attribute as values.
"""

# Initialize an empty dictionary to store results for each embedding type
results = {}

# Define the embedding types to evaluate
embedding_types = [None, "glove", "fasttext"]

# Loop through each embedding type
for embedding_type in embedding_types:
    # Set a readable name for the embedding type
    if embedding_type is None:
        embedding_type_name = "ALBERT"
    elif embedding_type == "glove":
        embedding_type_name = "ALBERT + GloVe"
    elif embedding_type == "fasttext":
        embedding_type_name = "ALBERT + FastText"

    # Generate predictions for the sample content using the current embedding type
    organization_score, word_choice_score, sentence_fluency_score, conventions_score = testContent(
        content=content,                # Content to evaluate
        embedding_type=embedding_type,  # Current embedding type
        SAVE_DIR=SAVE_DIR,              # Directory containing model files
        glove_model=glove_model,        # GloVe embeddings (if applicable)
        fasttext_model=fasttext_model   # FastText embeddings (if applicable)
    )

    # Store the scores in the results dictionary
    try:
        results[embedding_type_name] = {
            "Organization Score": float(organization_score),          # Convert to float for consistency
            "Word Choice Score": float(word_choice_score),
            "Sentence Fluency Score": float(sentence_fluency_score),
            "Conventions Score": float(conventions_score)
        }
    except ValueError:
        # Handle potential conversion issues
        print(f"Error: Unable to convert one or more values to float for embedding type: {embedding_type_name}")
        print(f"Values: {organization_score}, {word_choice_score}, {sentence_fluency_score}, {conventions_score}")

# Display the results for each embedding type
for embedding_name, result in results.items():
    print(f"Sample Essay Scores for {embedding_name}:")
    print(f"  Organization Score: {result['Organization Score']:.2f}")
    print(f"  Word Choice Score: {result['Word Choice Score']:.2f}")
    print(f"  Sentence Fluency Score: {result['Sentence Fluency Score']:.2f}")
    print(f"  Conventions Score: {result['Conventions Score']:.2f}")

  checkpoint = torch.load(model_path, map_location=device)


Sample Essay Scores for ALBERT:
  Organization Score: 5.00
  Word Choice Score: 5.00
  Sentence Fluency Score: 5.00
  Conventions Score: 5.00
Sample Essay Scores for ALBERT + GloVe:
  Organization Score: 4.00
  Word Choice Score: 4.00
  Sentence Fluency Score: 6.00
  Conventions Score: 4.00
Sample Essay Scores for ALBERT + FastText:
  Organization Score: 5.00
  Word Choice Score: 5.00
  Sentence Fluency Score: 5.00
  Conventions Score: 5.00
