In [4]:
import os
import numpy as np
import pandas as pd
from transformers import AutoTokenizer, AutoModel
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import cohen_kappa_score
from gensim.models import KeyedVectors
# from albert import *
from sklearn.metrics import mean_squared_error, f1_score, accuracy_score, cohen_kappa_score
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
import seaborn as sns

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
import os
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

In [6]:
# Constants
BASE_DIR = '../'  # Navigate one level up to access directories outside of albert_ira
DATASET_DIR = os.path.join(BASE_DIR, 'dataset')
SAVE_DIR = os.path.join(BASE_DIR, 'result')
MODEL_NAME = "albert-base-v2"
GLOVE_PATH = os.path.join(BASE_DIR, 'word_embeddings/glove.6B.300d.txt')
FASTTEXT_PATH = os.path.join(BASE_DIR, 'word_embeddings/wiki.en.vec')
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
albert_model = AutoModel.from_pretrained(MODEL_NAME).to(device)

directories = [BASE_DIR, DATASET_DIR, SAVE_DIR, os.path.dirname(GLOVE_PATH), os.path.dirname(FASTTEXT_PATH)]

for directory in directories:
    if not os.path.exists(directory):
        os.makedirs(directory)
        print(f"Directory created: {directory}")
    else:
        print(f"Directory already exists: {directory}")

2024-11-15 08:51:53.078067: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1731646313.091637 3875798 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1731646313.095911 3875798 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-15 08:51:53.109174: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Directory already exists: ../
Directory already exists: ../dataset
Directory already exists: ../result
Directory already exists: ../word_embeddings
Directory already exists: ../word_embeddings


In [7]:
def load_glove_model(glove_file_path):
    """Load GloVe embeddings from file into a dictionary."""
    embedding_dict = {}
    with open(glove_file_path, 'r', encoding="utf-8") as f:
        for line in f:
            values = line.split()
            word = values[0]
            vector = torch.tensor(np.asarray(values[1:], dtype='float32'))
            embedding_dict[word] = vector.to(device)  # Move to device if necessary
    return embedding_dict

def load_fasttext_model(fasttext_file_path):
    """Load FastText embeddings from file into a dictionary."""
    model = KeyedVectors.load_word2vec_format(fasttext_file_path, binary=False)
    return {word: torch.tensor(model[word]).to(device) for word in model.index_to_key}

# Load embeddings
glove_model = load_glove_model(GLOVE_PATH)
fasttext_model = load_fasttext_model(FASTTEXT_PATH)

In [8]:
# Load and preprocess the dataset
df = pd.read_csv('processed_essay_dataset.csv', sep=',', encoding='ISO-8859-1')
df = df.dropna(subset=['normalized_score','content', 'organization', 'word_choice', 'sentence_fluency', 'conventions'])  # Ensure all required columns are present
df.fillna(0, inplace=True)

q1, q3 = df['normalized_score'].quantile([0.25, 0.75])
df['quality_label'] = pd.cut(df['normalized_score'], bins=[-1, q1, q3, 100], labels=[0, 1, 2]).astype(int)
df['quality_label'] = df['quality_label'].map({0: 0, 1: 1, 2: 2})

# Map essay types as before and filter
df['essay_type'] = df['essay_type'].map({'argumentative': 0, 'dependent': 1, 'narrative': 2})
df = df[df['essay_type'].isin([0, 1])]

df.head()


Unnamed: 0,essay_id,essay_set,essay,essay_type,domain1_score,content,organization,word_choice,sentence_fluency,conventions,language,prompt_adherence,narrativity,style,voice,normalized_score,quality_label
0,1,1,"Dear local newspaper, I think effects computer...",0,8.0,4.0,3.0,3.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,60.0,1
1,2,1,"Dear @CAPS1 @CAPS2, I believe that using compu...",0,9.0,4.0,4.0,4.0,3.0,4.0,0.0,0.0,0.0,0.0,0.0,70.0,2
2,3,1,"Dear, @CAPS1 @CAPS2 @CAPS3 More and more peopl...",0,7.0,3.0,3.0,3.0,4.0,4.0,0.0,0.0,0.0,0.0,0.0,50.0,1
3,4,1,"Dear Local Newspaper, @CAPS1 I have found that...",0,10.0,5.0,4.0,5.0,4.0,4.0,0.0,0.0,0.0,0.0,0.0,80.0,2
4,5,1,"Dear @LOCATION1, I know having computers has a...",0,8.0,4.0,3.0,4.0,4.0,4.0,0.0,0.0,0.0,0.0,0.0,60.0,1


In [9]:
class MultiTaskModel(nn.Module):
    """A multitask model for predicting scores for multiple features."""
    def __init__(self, input_shape):
        super(MultiTaskModel, self).__init__()
        # Shared layers
        self.fc1 = nn.Linear(input_shape, 256)
        self.bn1 = nn.BatchNorm1d(256)  # Batch normalization
        self.dropout1 = nn.Dropout(0.5)
        self.fc2 = nn.Linear(256, 128)
        self.bn2 = nn.BatchNorm1d(128)
        self.dropout2 = nn.Dropout(0.5)

        # # Create separate regression heads for each feature
        # self.regression_head = nn.Linear(128, 1)

        self.content_head = nn.Linear(128, 1)
        self.organization_head = nn.Linear(128, 1)
        self.word_choice_head = nn.Linear(128, 1)
        self.sentence_fluency_head = nn.Linear(128, 1)
        self.conventions_head = nn.Linear(128, 1)

        # Optional task uncertainty parameter
        self.task_uncertainty = nn.Parameter(torch.tensor([0.0, 0.0]), requires_grad=True)     

    def forward(self, x):
        x = torch.relu(self.bn1(self.fc1(x)))
        x = self.dropout1(x)
        x = torch.relu(self.bn2(self.fc2(x)))
        x = self.dropout2(x)
        
        # score_output = self.regression_head(x)

        # Output for each feature
        content_output = self.content_head(x)
        organization_output = self.organization_head(x)
        word_choice_output = self.word_choice_head(x)
        sentence_fluency_output = self.sentence_fluency_head(x)
        conventions_output = self.conventions_head(x)

        return content_output, organization_output, word_choice_output, \
            sentence_fluency_output, conventions_output

    def compute_uncertainty_loss(self, loss_content, loss_organization, loss_word_choice, loss_sentence_fluency, loss_conventions):
        """Compute the weighted uncertainty loss for each feature."""
        content_precision = torch.exp(-self.task_uncertainty[0])
        organization_precision = torch.exp(-self.task_uncertainty[1])
        word_choice_precision = torch.exp(-self.task_uncertainty[1])
        sentence_fluency_precision = torch.exp(-self.task_uncertainty[1])
        conventions_precision = torch.exp(-self.task_uncertainty[1])

        # Weighted loss calculation
        loss = (content_precision * loss_content + self.task_uncertainty[0]) + \
               (organization_precision * loss_organization + self.task_uncertainty[1]) + \
                    (word_choice_precision * loss_word_choice + self.task_uncertainty[1]) + \
                        (sentence_fluency_precision * loss_sentence_fluency + self.task_uncertainty[1]) + \
                            (conventions_precision * loss_conventions + self.task_uncertainty[1])
        
        return loss

    def compute_loss(self, pred_content, pred_organization, pred_word_choice, pred_sentence_fluency, pred_conventions,
                    y_content, y_organization, y_word_choice, y_sentence_fluency, y_conventions) :

        # mse_loss = nn.MSELoss()(pred_score, y_score)
        # cross_entropy_loss_quality = nn.CrossEntropyLoss()(pred_quality, y_quality)
        # cross_entropy_loss_essay_type = nn.CrossEntropyLoss()(pred_essay_type, y_essay_type)

        # MSE loss for the additional attributes
        mse_loss_content = nn.MSELoss()(pred_content, y_content)
        mse_loss_organization = nn.MSELoss()(pred_organization, y_organization)
        mse_loss_word_choice = nn.MSELoss()(pred_word_choice, y_word_choice)
        mse_loss_sentence_fluency = nn.MSELoss()(pred_sentence_fluency, y_sentence_fluency)
        mse_loss_conventions = nn.MSELoss()(pred_conventions, y_conventions)
        # mse_loss_language = nn.MSELoss()(pred_language, y_language)
        # mse_loss_prompt_adherence = nn.MSELoss()(pred_prompt_adherence, y_prompt_adherence)
        # mse_loss_narrativity = nn.MSELoss()(pred_narrativity, y_narrativity)
        # mse_loss_style = nn.MSELoss()(pred_style, y_style)
        # mse_loss_voice = nn.MSELoss()(pred_voice, y_voice)

        # Sum all the losses for total loss
        total_loss = mse_loss_content + mse_loss_organization + mse_loss_word_choice + \
                     mse_loss_sentence_fluency + mse_loss_conventions
        
        return total_loss


class LabelSmoothingCrossEntropy(nn.Module):
    def __init__(self, smoothing=0.1):
        super(LabelSmoothingCrossEntropy, self).__init__()
        self.smoothing = smoothing

    def forward(self, pred, target):
        log_probs = F.log_softmax(pred, dim=-1)
        nll_loss = -log_probs.gather(dim=-1, index=target.unsqueeze(1))
        nll_loss = nll_loss.squeeze(1)
        smooth_loss = -log_probs.mean(dim=-1)
        return (1 - self.smoothing) * nll_loss + self.smoothing * smooth_loss

In [11]:
def get_albert_embedding(text):
    inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True, max_length=256).to(device)
    with torch.no_grad():
        outputs = albert_model(**inputs)
    return outputs.last_hidden_state[:, 0, :].cpu().numpy()

def get_word_embedding(text, embedding_dict):
    words = text.lower().split()
    vectors = [embedding_dict[word] for word in words if word in embedding_dict]
    if vectors:
        return torch.mean(torch.stack(vectors), dim=0).cpu().numpy()
    return np.zeros(300)

# Create attention-based embedding fusion
def create_attention_based_embedding(albert_emb, additional_emb):
    if albert_emb.shape != additional_emb.shape:
        additional_emb = torch.nn.Linear(additional_emb.shape[0], albert_emb.shape[0]).to(albert_emb.device)(additional_emb)
    combined_emb = torch.cat([albert_emb.unsqueeze(0), additional_emb.unsqueeze(0)], dim=0)
    attention_weights = torch.nn.Parameter(torch.tensor([0.5, 0.5], device=albert_emb.device), requires_grad=True)
    attention_scores = F.softmax(attention_weights, dim=0)
    fused_embedding = attention_scores[0] * albert_emb + attention_scores[1] * additional_emb
    return fused_embedding

def create_combined_embedding(text, embedding_type=None, _glove_model=None, _fasttext_model=None):
    albert_emb = get_albert_embedding(text).flatten()

    if embedding_type == "glove":
        additional_emb = get_word_embedding(text, _glove_model)
    elif embedding_type == "fasttext":
        additional_emb = get_word_embedding(text, _fasttext_model)
    else:
        additional_emb = np.array([])

    albert_emb_tensor = torch.tensor(albert_emb, dtype=torch.float32).to(device)

    if additional_emb.size != 0:
        additional_emb_tensor = torch.tensor(additional_emb, dtype=torch.float32).to(device)
        
        # Ensure both embeddings have the same size by truncating or padding
        if additional_emb_tensor.size(0) > albert_emb_tensor.size(0):
            additional_emb_tensor = additional_emb_tensor[:albert_emb_tensor.size(0)]  # Truncate
        elif additional_emb_tensor.size(0) < albert_emb_tensor.size(0):
            padding_size = albert_emb_tensor.size(0) - additional_emb_tensor.size(0)
            additional_emb_tensor = F.pad(additional_emb_tensor, (0, padding_size))  # Pad with zeros

        combined_emb = torch.cat([albert_emb_tensor, additional_emb_tensor], dim=0)
    else:
        combined_emb = albert_emb_tensor

    return combined_emb.cpu().numpy(), combined_emb.size(0)


In [12]:
def train_and_save_model(X_train_tensor, y_train_content_tensor, y_train_organization_tensor, 
                         y_train_word_choice_tensor, y_train_sentence_fluency_tensor, 
                         y_train_conventions_tensor, input_shape, save_dir, 
                         embedding_type=None, epochs=10, batch_size=8, learning_rate=1e-4):

    model = MultiTaskModel(input_shape).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-5)
    
    train_loader = DataLoader(TensorDataset(
            X_train_tensor, y_train_content_tensor, 
            y_train_organization_tensor, y_train_word_choice_tensor, 
            y_train_sentence_fluency_tensor, y_train_conventions_tensor), 
            batch_size=batch_size, shuffle=True
        )
    
    train_losses = []  # Initialize to store the loss for each epoch

    for epoch in range(epochs):
        model.train()
        epoch_loss = 0

        for X_batch, y_content_batch, y_organization_batch, y_word_choice_batch, y_sentence_fluency_batch, y_conventions_batch in train_loader:
            # Move data to device
            X_batch = X_batch.to(device)
            y_content_batch = y_content_batch.to(device)
            y_organization_batch = y_organization_batch.to(device)
            y_word_choice_batch = y_word_choice_batch.to(device)
            y_sentence_fluency_batch = y_sentence_fluency_batch.to(device)
            y_conventions_batch = y_conventions_batch.to(device)
            
            optimizer.zero_grad()
            
            # Get model predictions
            pred_content, pred_organization, pred_word_choice, pred_sentence_fluency, pred_conventions = model(X_batch)

            # Compute the losses
            mse_loss_content = nn.MSELoss()(pred_content, y_content_batch)
            mse_loss_organization = nn.MSELoss()(pred_organization, y_organization_batch)
            mse_loss_word_choice = nn.MSELoss()(pred_word_choice, y_word_choice_batch)
            mse_loss_sentence_fluency = nn.MSELoss()(pred_sentence_fluency, y_sentence_fluency_batch)
            mse_loss_conventions = nn.MSELoss()(pred_conventions, y_conventions_batch)

            # Total loss
            total_loss = mse_loss_content + mse_loss_organization + mse_loss_word_choice + \
                         mse_loss_sentence_fluency + mse_loss_conventions

            total_loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=0.5)
            optimizer.step()
            epoch_loss += total_loss.item()

        avg_epoch_loss = epoch_loss / len(train_loader)
        train_losses.append(avg_epoch_loss)  # Append the average epoch loss to train_losses
        print(f"Epoch {epoch + 1}/{epochs}, Total Epoch Loss: {avg_epoch_loss:.4f}")

    # Save the model
    model_filename = f"albert6_model_{embedding_type or 'albert'}.pth"
    embedding_size_filename = f"albert6_embedding_size_{embedding_type or 'albert'}.npy"
    torch.save(model.state_dict(), os.path.join(save_dir, model_filename))
    np.save(os.path.join(save_dir, embedding_size_filename), input_shape)
    
    return os.path.join(save_dir, model_filename), train_losses  # Return model_path and train_losses


In [13]:
def evaluate_model(model_path, y_test_content, y_test_organization, y_test_word_choice, y_test_sentence_fluency,
                   y_test_conventions, save_dir, model_name):

    # Load the model and move it to the appropriate device
    model = MultiTaskModel(X_test_tensor.shape[1]).to(device)  # Move model to device
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.eval()

    # Move test tensors to the correct device
    y_test_content = y_test_content.to(device)
    y_test_organization = y_test_organization.to(device)
    y_test_word_choice = y_test_word_choice.to(device)
    y_test_sentence_fluency = y_test_sentence_fluency.to(device)
    y_test_conventions = y_test_conventions.to(device)

    with torch.no_grad():
        # Get model predictions (all outputs)
        pred_content, pred_organization, pred_word_choice, pred_sentence_fluency, pred_conventions = model(X_test_tensor.to(device))

        # Kappa scores for each attribute
        kappa_content = cohen_kappa_score(y_test_content.cpu().numpy(), np.round(pred_content.cpu().numpy().squeeze()).astype(int), weights='quadratic')
        kappa_organization = cohen_kappa_score(y_test_organization.cpu().numpy(), np.round(pred_organization.cpu().numpy().squeeze()).astype(int), weights='quadratic')
        kappa_word_choice = cohen_kappa_score(y_test_word_choice.cpu().numpy(), np.round(pred_word_choice.cpu().numpy().squeeze()).astype(int), weights='quadratic')
        kappa_sentence_fluency = cohen_kappa_score(y_test_sentence_fluency.cpu().numpy(), np.round(pred_sentence_fluency.cpu().numpy().squeeze()).astype(int), weights='quadratic')
        kappa_conventions = cohen_kappa_score(y_test_conventions.cpu().numpy(), np.round(pred_conventions.cpu().numpy().squeeze()).astype(int), weights='quadratic')

        # Print out the evaluation results
        print(f"Kappa for Content: {kappa_content:.5f}")
        print(f"Kappa for Organization: {kappa_organization:.5f}")
        print(f"Kappa for Word Choice: {kappa_word_choice:.5f}")
        print(f"Kappa for Sentence Fluency: {kappa_sentence_fluency:.5f}")
        print(f"Kappa for Conventions: {kappa_conventions:.5f}")

        return kappa_content, kappa_organization, kappa_word_choice, kappa_sentence_fluency, kappa_conventions


In [52]:
# Main training and evaluation loop
embedding_types = [None, "glove", "fasttext"]
all_train_losses = []  # To store training losses for each embedding type
embedding_labels = [embedding_type or 'albert' for embedding_type in embedding_types]
all_kappa_scores = []  # Initialize list to store Kappa scores for each model

for embedding_type in embedding_types:
    # Set model_name based on the embedding type for evaluation
    model_name = embedding_type or 'albert'
    
    embeddings_and_sizes = df['essay'].apply(lambda x: create_combined_embedding(x, embedding_type, glove_model, fasttext_model))
    df['embeddings'], embedding_sizes = zip(*embeddings_and_sizes)

    embedding_sizes = np.array(embedding_sizes)

    X_train, X_test, y_train_content, y_test_content, y_train_organization, y_test_organization, \
    y_train_word_choice, y_test_word_choice, y_train_sentence_fluency, y_test_sentence_fluency, \
    y_train_conventions, y_test_conventions = train_test_split(
        np.stack(df['embeddings'].values),
        df['content'].values,
        df['organization'].values,
        df['word_choice'].values,
        df['sentence_fluency'].values,
        df['conventions'].values,
        test_size=0.2,
        random_state=42
    )

    # Convert each subset to PyTorch tensors for compatibility with the training process
    X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
    y_train_tensor = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)  # Reshape for regression target

    y_train_content_tensor = torch.tensor(y_train_content, dtype=torch.float32)
    y_train_organization_tensor = torch.tensor(y_train_organization, dtype=torch.float32)
    y_train_word_choice_tensor = torch.tensor(y_train_word_choice, dtype=torch.float32)
    y_train_sentence_fluency_tensor = torch.tensor(y_train_sentence_fluency, dtype=torch.float32)
    y_train_conventions_tensor = torch.tensor(y_train_conventions, dtype=torch.float32)

    X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
    y_test_tensor = torch.tensor(y_test, dtype=torch.float32).unsqueeze(1)

    y_test_content_tensor = torch.tensor(y_test_content, dtype=torch.float32)
    y_test_organization_tensor = torch.tensor(y_test_organization, dtype=torch.float32)
    y_test_word_choice_tensor = torch.tensor(y_test_word_choice, dtype=torch.float32)
    y_test_sentence_fluency_tensor = torch.tensor(y_test_sentence_fluency, dtype=torch.float32)
    y_test_conventions_tensor = torch.tensor(y_test_conventions, dtype=torch.float32)

    # Train the model with the current embedding type and save it for future evaluation
    print(f"\nTraining model for embedding type: {model_name}")
    model_path, _ = train_and_save_model(
        X_train_tensor,
        y_train_content_tensor,
        y_train_organization_tensor,
        y_train_word_choice_tensor,
        y_train_sentence_fluency_tensor,
        y_train_conventions_tensor,
        input_shape=X_train_tensor.shape[1],  # Pass as keyword argument
        save_dir=SAVE_DIR,
        embedding_type=embedding_type,
        epochs=10,
        batch_size=8,
        learning_rate=1e-3
    )

    # Evaluate the trained model and collect metrics, particularly Kappa scores for each attribute
    kappa_content, kappa_organization, kappa_word_choice, kappa_sentence_fluency, kappa_conventions = evaluate_model(
        model_path,
        y_test_content_tensor, y_test_organization_tensor, y_test_word_choice_tensor, 
        y_test_sentence_fluency_tensor, y_test_conventions_tensor, SAVE_DIR, model_name
    )


    # Append Kappa scores for each attribute to all_kappa_scores to plot later
    all_kappa_scores.append([kappa_content, kappa_organization, kappa_word_choice, kappa_sentence_fluency, \
        kappa_conventions])

# # Plot a heatmap of Kappa scores for each model variant to visualize attribute-level performance
# plot_kappa_heatmap(
#     all_kappa_scores,
#     model_names=['Albert', 'Albert + Glove', 'Albert + Fasttext'],
#     attribute_names=[
#         'Content', 'Organization', 'Word Choice', 'Sentence Fluency', 'Conventions'
#     ]
# )


Training model for embedding type: albert
Epoch 1/10, Total Epoch Loss: 12.4695
Epoch 2/10, Total Epoch Loss: 7.6379
Epoch 3/10, Total Epoch Loss: 7.4841
Epoch 4/10, Total Epoch Loss: 7.2380
Epoch 5/10, Total Epoch Loss: 7.1337
Epoch 6/10, Total Epoch Loss: 7.0646
Epoch 7/10, Total Epoch Loss: 7.0196
Epoch 8/10, Total Epoch Loss: 6.9045
Epoch 9/10, Total Epoch Loss: 6.8418
Epoch 10/10, Total Epoch Loss: 6.7760
Kappa for Content: 0.01686
Kappa for Organization: 0.02689
Kappa for Word Choice: 0.01912
Kappa for Sentence Fluency: 0.03552
Kappa for Conventions: 0.02673


  model.load_state_dict(torch.load(model_path, map_location=device))



Training model for embedding type: glove


  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 1/10, Total Epoch Loss: 13.3931
Epoch 2/10, Total Epoch Loss: 7.8071
Epoch 3/10, Total Epoch Loss: 7.5703
Epoch 4/10, Total Epoch Loss: 7.3461
Epoch 5/10, Total Epoch Loss: 7.1659
Epoch 6/10, Total Epoch Loss: 7.0112
Epoch 7/10, Total Epoch Loss: 6.9268
Epoch 8/10, Total Epoch Loss: 6.9814
Epoch 9/10, Total Epoch Loss: 6.8266
Epoch 10/10, Total Epoch Loss: 6.7712
Kappa for Content: -0.08098
Kappa for Organization: -0.06112
Kappa for Word Choice: -0.06509
Kappa for Sentence Fluency: -0.06885
Kappa for Conventions: -0.07251


  model.load_state_dict(torch.load(model_path, map_location=device))



Training model for embedding type: fasttext


  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 1/10, Total Epoch Loss: 12.5424
Epoch 2/10, Total Epoch Loss: 7.6974
Epoch 3/10, Total Epoch Loss: 7.5291
Epoch 4/10, Total Epoch Loss: 7.2858
Epoch 5/10, Total Epoch Loss: 7.1851
Epoch 6/10, Total Epoch Loss: 7.0608
Epoch 7/10, Total Epoch Loss: 6.9573
Epoch 8/10, Total Epoch Loss: 6.9117
Epoch 9/10, Total Epoch Loss: 6.7950
Epoch 10/10, Total Epoch Loss: 6.7691
Kappa for Content: -0.04745
Kappa for Organization: -0.02561
Kappa for Word Choice: -0.03016
Kappa for Sentence Fluency: -0.07495
Kappa for Conventions: -0.04222


  model.load_state_dict(torch.load(model_path, map_location=device))


In [1]:
content = """
    In “Let there be dark,” Paul Bogard talks about the importance of darkness.
Darkness is essential to humans. Bogard states, “Our bodies need darkness to produce the hormone melatonin, which keeps certain cancers from developing, and our bodies need darkness for sleep, sleep. Sleep disorders have been linked to diabetes, obesity, cardiovascular disease and depression and recent research suggests are main cause of “short sleep” is “long light.” Whether we work at night or simply take our tablets, notebooks and smartphones to bed, there isn’t a place for this much artificial light in our lives.” (Bogard 2). Here, Bogard talks about the importance of darkness to humans. Humans need darkness to sleep in order to be healthy.
Animals also need darkness. Bogard states, “The rest of the world depends on darkness as well, including nocturnal and crepuscular species of birds, insects, mammals, fish and reptiles. Some examples are well known—the 400 species of birds that migrate at night in North America, the sea turtles that come ashore to lay their eggs—and some are not, such as the bats that save American farmers billions in pest control and the moths that pollinate 80% of the world’s flora. Ecological light pollution is like the bulldozer of the night, wrecking habitat and disrupting ecosystems several billion years in the making. Simply put, without darkness, Earth’s ecology would collapse...” (Bogard 2). Here Bogard explains that animals, too, need darkness to survive.
""" 

In [14]:
def testContent(content, embedding_type=None, SAVE_DIR=None, glove_model=None, fasttext_model=None):
    # Generate the combined embedding
    embedding, actual_embedding_size = create_combined_embedding(
        content,
        embedding_type=embedding_type,
        _glove_model=glove_model if embedding_type == "glove" else None,
        _fasttext_model=fasttext_model if embedding_type == "fasttext" else None
    )

    embedding_tensor = torch.tensor(embedding, dtype=torch.float32).to(device).unsqueeze(0)

    # Load model files
    embedding_size_filename = f"albert6_embedding_size_{embedding_type or 'albert'}.npy"
    model_filename = f"albert6_model_{embedding_type or 'albert'}.pth"

    # Load the expected embedding size and model
    embedding_size_path = os.path.join(SAVE_DIR, embedding_size_filename)
    expected_embedding_size = int(np.load(embedding_size_path))

    # Initialize model and load weights
    model = MultiTaskModel(expected_embedding_size).to(device)
    model_path = os.path.join(SAVE_DIR, model_filename)
    state_dict = torch.load(model_path, map_location=device)
    model.load_state_dict(state_dict, strict=False)
    model.eval()

    # Adjust embedding size if necessary
    embedding_resized = embedding_tensor[:, :expected_embedding_size]

    # Make predictions
    with torch.no_grad():
        pred_content, pred_organization, pred_word_choice, pred_sentence_fluency, pred_conventions = model(embedding_resized)
        content_score = pred_content.cpu().item()
        language_score = pred_organization.cpu().item()
        prompt_adherence_score = pred_word_choice.cpu().item()
        narrativity_score = pred_sentence_fluency.cpu().item()
        conventions_score = pred_sentence_fluency.cpu().item()

    return content_score, language_score, prompt_adherence_score, narrativity_score, conventions_score

In [15]:
# Initialize an empty dictionary to store results for each embedding type
results = {}

# Define the embedding types
embedding_types = [None, "glove", "fasttext"]

for embedding_type in embedding_types:
    # Define embedding type name for readability
    if embedding_type is None:
        embedding_type_name = "ALBERT"
    elif embedding_type == "glove":
        embedding_type_name = "ALBERT + GloVe"
    elif embedding_type == "fasttext":
        embedding_type_name = "ALBERT + FastText"

    # Generate the scores for the given content and embedding type
    content_score, organization_score, word_choice_score, sentence_fluency_score, conventions_score = testContent(
        content=content,
        embedding_type=embedding_type,
        SAVE_DIR=SAVE_DIR,
        glove_model=glove_model,
        fasttext_model=fasttext_model
    )
    print(f"Embedding Type: {embedding_type_name}")
    print(f"Returned Values: {content_score}, {organization_score}, {word_choice_score}, {sentence_fluency_score}, {conventions_score}")


    try:
        results[embedding_type_name] = {
            "Content Score": float(content_score),
            "Organization Score": float(organization_score),
            "Word Choice Score": float(word_choice_score),
            "Sentence Fluency Score": float(sentence_fluency_score),
            "Conventions Score": float(conventions_score)
        }
    except ValueError:
        print(f"Error: Unable to convert one or more values to float for embedding type: {embedding_type_name}")
        print(f"Values: {content_score}, {organization_score}, {word_choice_score}, {sentence_fluency_score}, {conventions_score}")

# Display the results for each embedding type
for embedding_name, result in results.items():
    print(f"Sample Essay Scores for {embedding_name}:")
    print(f"  Content Score: {result['Content Score']:.2f}")
    print(f"  Organization Score: {result['Organization Score']:.2f}")
    print(f"  Word Choice Score: {result['Word Choice Score']:.2f}")
    print(f"  Sentence Fluency Score: {result['Sentence Fluency Score']:.2f}")
    print(f"  Conventions Score: {result['Conventions Score']:.2f}")

  state_dict = torch.load(model_path, map_location=device)


Embedding Type: ALBERT
Returned Values: 3.431947946548462, 3.305612325668335, 3.2921910285949707, 3.3810877799987793, 3.3810877799987793
Embedding Type: ALBERT + GloVe
Returned Values: 3.2467987537384033, 3.1035304069519043, 3.12103533744812, 3.263153314590454, 3.263153314590454
Embedding Type: ALBERT + FastText
Returned Values: 3.2504689693450928, 3.059922456741333, 3.1035611629486084, 3.2877790927886963, 3.2877790927886963
Sample Essay Scores for ALBERT:
  Content Score: 3.43
  Organization Score: 3.31
  Word Choice Score: 3.29
  Sentence Fluency Score: 3.38
  Conventions Score: 3.38
Sample Essay Scores for ALBERT + GloVe:
  Content Score: 3.25
  Organization Score: 3.10
  Word Choice Score: 3.12
  Sentence Fluency Score: 3.26
  Conventions Score: 3.26
Sample Essay Scores for ALBERT + FastText:
  Content Score: 3.25
  Organization Score: 3.06
  Word Choice Score: 3.10
  Sentence Fluency Score: 3.29
  Conventions Score: 3.29
