## Classifier - Try 3

Classify article frames using aggregated sentence embedding

In [1]:
import os

try:
  import google.colab

  from google.colab import drive
  drive.mount('/content/drive')
  IN_COLAB = True
except:
  IN_COLAB = False

if IN_COLAB:
  os.chdir('drive/MyDrive/Git/MasterThesis/data')
else:
  os.chdir('../../data/')

labels_path = "data/en/train-labels-subtask-2.txt"
articles_path = "data/en/train-articles-subtask-2/"

In [2]:
import pandas as pd

# Read the dev-labels-subtask-2.txt file
labels_df = pd.read_csv(labels_path, sep="\t")

# Rename the columns for easier processing
labels_df.columns = ["article_id", "frames"]


labels_df.head()

Unnamed: 0,article_id,frames
0,832959523,"Morality,Security_and_defense,Policy_prescript..."
1,833039623,"Political,Crime_and_punishment,External_regula..."
2,833032367,"Political,Crime_and_punishment,Fairness_and_eq..."
3,814777937,"Political,Morality,Fairness_and_equality,Exter..."
4,821744708,"Policy_prescription_and_evaluation,Political,L..."


In [3]:
# A function to read the article text given its ID
def get_article_content(article_id):
    try:
        with open(f"{articles_path}/article{article_id}.txt", "r") as f:
            return f.read()
    except FileNotFoundError:
        return None

df = labels_df

# Apply the function to get the article content
df["content"] = df["article_id"].apply(get_article_content)

# Drop rows where content could not be found
df.dropna(subset=["content"], inplace=True)

df.head()


Unnamed: 0,article_id,frames,content
0,832959523,"Morality,Security_and_defense,Policy_prescript...",How Theresa May Botched\n\nThose were the time...
1,833039623,"Political,Crime_and_punishment,External_regula...",Robert Mueller III Rests His Case—Dems NEVER W...
2,833032367,"Political,Crime_and_punishment,Fairness_and_eq...",Robert Mueller Not Recommending Any More Indic...
3,814777937,"Political,Morality,Fairness_and_equality,Exter...",The Far Right Is Trying to Co-opt the Yellow V...
4,821744708,"Policy_prescription_and_evaluation,Political,L...",‘Special place in hell’ for those who promoted...


In [4]:
# Split the frames column into a list of frames
df["frames_list"] = df["frames"].str.split(",")

# create for each frame a new column with the frame as name and 1 if the frame is present in the article and 0 if not
for frame in df["frames_list"].explode().unique():
    df[frame] = df["frames_list"].apply(lambda x: 1 if frame in x else 0)

df.head()

Unnamed: 0,article_id,frames,content,frames_list,Morality,Security_and_defense,Policy_prescription_and_evaluation,Legality_Constitutionality_and_jurisprudence,Economic,Political,Crime_and_punishment,External_regulation_and_reputation,Public_opinion,Fairness_and_equality,Capacity_and_resources,Quality_of_life,Cultural_identity,Health_and_safety
0,832959523,"Morality,Security_and_defense,Policy_prescript...",How Theresa May Botched\n\nThose were the time...,"[Morality, Security_and_defense, Policy_prescr...",1,1,1,1,1,0,0,0,0,0,0,0,0,0
1,833039623,"Political,Crime_and_punishment,External_regula...",Robert Mueller III Rests His Case—Dems NEVER W...,"[Political, Crime_and_punishment, External_reg...",0,0,1,1,0,1,1,1,1,0,0,0,0,0
2,833032367,"Political,Crime_and_punishment,Fairness_and_eq...",Robert Mueller Not Recommending Any More Indic...,"[Political, Crime_and_punishment, Fairness_and...",0,0,0,1,0,1,1,1,0,1,0,0,0,0
3,814777937,"Political,Morality,Fairness_and_equality,Exter...",The Far Right Is Trying to Co-opt the Yellow V...,"[Political, Morality, Fairness_and_equality, E...",1,1,0,0,1,1,0,1,1,1,0,0,0,0
4,821744708,"Policy_prescription_and_evaluation,Political,L...",‘Special place in hell’ for those who promoted...,"[Policy_prescription_and_evaluation, Political...",0,0,1,1,0,1,0,1,0,0,0,0,0,0


In [5]:
X = df["content"]
y = df.drop(columns=["article_id", "frames", "frames_list", "content"])

In [6]:
X.head()

0    How Theresa May Botched\n\nThose were the time...
1    Robert Mueller III Rests His Case—Dems NEVER W...
2    Robert Mueller Not Recommending Any More Indic...
3    The Far Right Is Trying to Co-opt the Yellow V...
4    ‘Special place in hell’ for those who promoted...
Name: content, dtype: object

In [7]:
y.head()

Unnamed: 0,Morality,Security_and_defense,Policy_prescription_and_evaluation,Legality_Constitutionality_and_jurisprudence,Economic,Political,Crime_and_punishment,External_regulation_and_reputation,Public_opinion,Fairness_and_equality,Capacity_and_resources,Quality_of_life,Cultural_identity,Health_and_safety
0,1,1,1,1,1,0,0,0,0,0,0,0,0,0
1,0,0,1,1,0,1,1,1,1,0,0,0,0,0
2,0,0,0,1,0,1,1,1,0,1,0,0,0,0
3,1,1,0,0,1,1,0,1,1,1,0,0,0,0
4,0,0,1,1,0,1,0,1,0,0,0,0,0,0


In [8]:
len(X), len(y)

(432, 432)

### Create Dataset

In [9]:
import warnings
warnings.filterwarnings("ignore", category=UserWarning, module='transformers')

### Extract SRL Embeddings from articles

In [10]:
#X_srl = optimized_extract_srl(X, predictor)
#X_srl.to_pickle("../notebooks/classifier/X_srl.pkl")

In [11]:
import pandas as pd
# Unpickle a pickled pandas Series object
X_srl = pd.read_pickle("../notebooks/classifier/X_srl.pkl")

# Print the unpickled series 
print(type(X_srl))
print(X_srl.head())

<class 'pandas.core.series.Series'>
0    [{'predicate': 'were', 'ARG0': '', 'ARG1': 'Th...
1    [{'predicate': 'Rests', 'ARG0': 'Robert Muelle...
2    [{'predicate': 'Recommending', 'ARG0': 'Robert...
3    [{'predicate': 'Trying', 'ARG0': 'The Far Righ...
4    [{'predicate': 'promoted', 'ARG0': 'those', 'A...
dtype: object


# GPU

In [12]:
import torch

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

def free_gpu():
    print(torch.cuda.mem_get_info())
    print(torch.cuda.memory_summary())
    
free_gpu()

Using device: cuda
(16721772544, 16891248640)
|                  PyTorch CUDA memory summary, device ID 0                 |
|---------------------------------------------------------------------------|
|            CUDA OOMs: 0            |        cudaMalloc retries: 0         |
|        Metric         | Cur Usage  | Peak Usage | Tot Alloc  | Tot Freed  |
|---------------------------------------------------------------------------|
| Allocated memory      |       0 B  |       0 B  |       0 B  |       0 B  |
|       from large pool |       0 B  |       0 B  |       0 B  |       0 B  |
|       from small pool |       0 B  |       0 B  |       0 B  |       0 B  |
|---------------------------------------------------------------------------|
| Active memory         |       0 B  |       0 B  |       0 B  |       0 B  |
|       from large pool |       0 B  |       0 B  |       0 B  |       0 B  |
|       from small pool |       0 B  |       0 B  |       0 B  |       0 B  |
|-----------------

In [13]:
import torch
import gc

def list_gpu_tensors():
    for obj in gc.get_objects():
        try:
            if torch.is_tensor(obj):
                if obj.is_cuda:
                    obj = obj.cpu()
                    obj = obj.to("cpu")
                    print(type(obj), obj.size())
        except:
            pass

        
list_gpu_tensors()



# Dataset

In [14]:
from torch.utils.data import Dataset
from transformers import BertTokenizer
import pandas as pd
import nltk

class ArticleDataset(Dataset):
    def __init__(self, X, X_srl, labels, tokenizer, max_sentences_per_article=32, max_sentence_length=32, max_arg_length=16):
        self.X = X
        self.X_srl = X_srl
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_sentences_per_article = max_sentences_per_article
        self.max_sentence_length = max_sentence_length
        self.max_arg_length = max_arg_length
        nltk.download('punkt')  # Download the Punkt tokenizer model for sentence splitting
        
    def __len__(self):
        return len(self.X)
    
    def _truncate_or_pad(self, lst, target_length, pad_value=0):
        """
        Truncate or pad the input list to match the target length.
        """
        if len(lst) > target_length:
            return lst[:target_length]
        else:
            return lst + [pad_value] * (target_length - len(lst))

    
    def __getitem__(self, idx):
        article = self.X.iloc[idx]
        srl = self.X_srl.iloc[idx]
        labels = self.labels.iloc[idx]

        # Split the article into sentences
        sentences = nltk.sent_tokenize(article)
        # Limit the number of sentences per article
        sentences = sentences[:self.max_sentences_per_article]

        # Tokenize and pad/truncate the sentences
        sentence_ids = [self.tokenizer.encode(sentence, add_special_tokens=True, max_length=self.max_sentence_length, truncation=True, padding='max_length') for sentence in sentences]
        
        # Ensure that the length of sentence_ids is consistent across all articles
        while len(sentence_ids) < self.max_sentences_per_article:
            sentence_ids.append([0] * self.max_sentence_length)

        predicate_ids = []
        arg0_ids = []
        arg1_ids = []

        # Tokenize and pad/truncate the SRL items
        predicate_ids = [self.tokenizer.encode(predicate, add_special_tokens=True, max_length=self.max_arg_length, truncation=True, padding='max_length') for predicate in [item['predicate'] for item in srl]]
        arg0_ids = [self.tokenizer.encode(arg0, add_special_tokens=True, max_length=self.max_arg_length, truncation=True, padding='max_length') for arg0 in [item.get('arg0', '') for item in srl]]
        arg1_ids = [self.tokenizer.encode(arg1, add_special_tokens=True, max_length=self.max_arg_length, truncation=True, padding='max_length') for arg1 in [item.get('arg1', '') for item in srl]]
        
        # Limit the number of sentences per article
        predicate_ids = predicate_ids[:self.max_sentences_per_article]
        arg0_ids = arg0_ids[:self.max_sentences_per_article]
        arg1_ids = arg1_ids[:self.max_sentences_per_article]        
        
        # Ensure consistent length for predicate_ids, arg0_ids, and arg1_ids
        while len(predicate_ids) < self.max_sentences_per_article:
            predicate_ids.append([0] * self.max_arg_length)
        while len(arg0_ids) < self.max_sentences_per_article:
            arg0_ids.append([0] * self.max_arg_length)
        while len(arg1_ids) < self.max_sentences_per_article:
            arg1_ids.append([0] * self.max_arg_length)
        
        # Return the results
        return {
            'sentence_ids': torch.tensor(sentence_ids, dtype=torch.long),
            'predicate_ids': torch.tensor(predicate_ids, dtype=torch.long),
            'arg0_ids': torch.tensor(arg0_ids, dtype=torch.long),
            'arg1_ids': torch.tensor(arg1_ids, dtype=torch.long),
            'labels': labels
        }


In [15]:
from torch.utils.data import DataLoader, random_split

# Initialize the tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Create the dataset
dataset = ArticleDataset(X, X_srl, y, tokenizer)

# Split the dataset into train and test sets (80/20 split)
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

# Define batch size (adjust as per your memory availability)
batch_size = 4

def custom_collate_fn(batch):
    # Extract individual lists from the batch
    sentence_ids = [item['sentence_ids'] for item in batch]
    predicate_ids = [item['predicate_ids'] for item in batch]
    arg0_ids = [item['arg0_ids'] for item in batch]
    arg1_ids = [item['arg1_ids'] for item in batch]
    labels = [item['labels'] for item in batch]
    
    # Pad each list
    sentence_ids = torch.nn.utils.rnn.pad_sequence(sentence_ids, batch_first=True, padding_value=0)
    predicate_ids = torch.nn.utils.rnn.pad_sequence(predicate_ids, batch_first=True, padding_value=0)
    arg0_ids = torch.nn.utils.rnn.pad_sequence(arg0_ids, batch_first=True, padding_value=0)
    arg1_ids = torch.nn.utils.rnn.pad_sequence(arg1_ids, batch_first=True, padding_value=0)

    # Return a dictionary containing the padded tensors
    return {
        'sentence_ids': sentence_ids,
        'predicate_ids': predicate_ids,
        'arg0_ids': arg0_ids,
        'arg1_ids': arg1_ids,
        'labels': torch.Tensor(labels)
    }

# Update DataLoaders with custom collate function
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=custom_collate_fn)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, collate_fn=custom_collate_fn)

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [16]:
dataset[0]["arg0_ids"].shape, dataset[1]["arg0_ids"].shape

(torch.Size([32, 16]), torch.Size([32, 16]))

In [17]:
dataset[123]["sentence_ids"].shape

torch.Size([32, 32])

In [18]:
(dataset[0]["labels"])

Morality                                        1
Security_and_defense                            1
Policy_prescription_and_evaluation              1
Legality_Constitutionality_and_jurisprudence    1
Economic                                        1
Political                                       0
Crime_and_punishment                            0
External_regulation_and_reputation              0
Public_opinion                                  0
Fairness_and_equality                           0
Capacity_and_resources                          0
Quality_of_life                                 0
Cultural_identity                               0
Health_and_safety                               0
Name: 0, dtype: int64

In [19]:
free_gpu()

(16721772544, 16891248640)
|                  PyTorch CUDA memory summary, device ID 0                 |
|---------------------------------------------------------------------------|
|            CUDA OOMs: 0            |        cudaMalloc retries: 0         |
|        Metric         | Cur Usage  | Peak Usage | Tot Alloc  | Tot Freed  |
|---------------------------------------------------------------------------|
| Allocated memory      |       0 B  |       0 B  |       0 B  |       0 B  |
|       from large pool |       0 B  |       0 B  |       0 B  |       0 B  |
|       from small pool |       0 B  |       0 B  |       0 B  |       0 B  |
|---------------------------------------------------------------------------|
| Active memory         |       0 B  |       0 B  |       0 B  |       0 B  |
|       from large pool |       0 B  |       0 B  |       0 B  |       0 B  |
|       from small pool |       0 B  |       0 B  |       0 B  |       0 B  |
|------------------------------------

# Model

In [20]:
import torch
import torch.nn as nn
from transformers import BertModel, BertTokenizer
import torch.nn.functional as F

In [21]:
class EmbeddingLayer(nn.Module):
    def __init__(self, bert_model_name):
        super(EmbeddingLayer, self).__init__()
        self.bert_model = BertModel.from_pretrained(bert_model_name)

    def forward(self, x, max_items):
        batch_size = x.shape[0]
        
        # List to collect embeddings for each batch
        batch_embeddings = []

        # Loop through each item in the batch
        for i in range(batch_size):
            sequence_embeddings = []

            # Loop through each sequence in the item and obtain embeddings
            for j in range(max_items):
                sequence = x[i][j].unsqueeze(0)  # Adding an extra dimension for BERT
                embeddings = self.bert_model(sequence).last_hidden_state.squeeze(0)  # Removing the extra dimension after obtaining embeddings
                sequence_embeddings.append(embeddings)
            
            # Stack embeddings for each sequence in the item
            item_embeddings = torch.stack(sequence_embeddings)
            batch_embeddings.append(item_embeddings)
        
        # Stack embeddings for each item in the batch
        reshaped_embeddings = torch.stack(batch_embeddings)
        
        return reshaped_embeddings


In [22]:
class SentenceAttentionLayer(nn.Module):
    def __init__(self, embedding_dim, heads=8):
        super(SentenceAttentionLayer, self).__init__()
        
        self.embedding_dim = embedding_dim
        self.heads = heads
        self.head_dim = embedding_dim // heads

        # Q, K, V weight matrices
        self.values = nn.Linear(self.head_dim, self.head_dim, bias=False)
        self.keys = nn.Linear(self.head_dim, self.head_dim, bias=False)
        self.queries = nn.Linear(self.head_dim, self.head_dim, bias=False)

        self.fc_out = nn.Linear(heads * self.head_dim, embedding_dim)
        
    def forward(self, values, keys=None, queries=None):
        if keys is None:
            keys = values
        if queries is None:
            queries = values

        N = queries.shape[0]  # batch size
        value_len, key_len, query_len = values.shape[1], keys.shape[1], queries.shape[1]

        # Split the embedding_dim into self.heads pieces
        values = values.reshape(N, value_len, self.heads, self.head_dim)
        keys = keys.reshape(N, key_len, self.heads, self.head_dim)
        queries = queries.reshape(N, query_len, self.heads, self.head_dim)

        values = self.values(values)
        keys = self.keys(keys)
        queries = self.queries(queries)

        # Scaled dot-product attention
        attention = torch.einsum("nqhd,nkhd->nhqk", [queries, keys]) 
        attention = attention / (self.embedding_dim ** (1/2))
        attention = F.softmax(attention, dim=3)

        out = torch.einsum("nhql,nlhd->nqhd", [attention, values]).reshape(N, query_len, self.heads*self.head_dim)
        
        # Combine heads
        out = self.fc_out(out)
        
        # Aggregate to single vector per instance
        out = out.sum(dim=1)
        return out

In [23]:
class AggregationModel(nn.Module):
    def __init__(self, max_sentences_per_article=32, max_sentence_length=32, max_srl_items=None, 
                 max_arg_length=16, bert_model_name="bert-base-uncased"):
        super(AggregationModel, self).__init__()
        self.embedding_layer = EmbeddingLayer(bert_model_name)
        
        embedding_dim = 768  # For bert-base-uncased
        self.attention = SentenceAttentionLayer(embedding_dim=embedding_dim)
        
        # Store the values as attributes
        self.max_sentences_per_article = max_sentences_per_article
        self.max_sentence_length = max_sentence_length
        self.max_srl_items = max_srl_items if max_srl_items is not None else max_sentences_per_article
        self.max_arg_length = max_arg_length
        
        # Classification layer
        self.classifier = nn.Linear(embedding_dim, 14)  # There are 14 frames

    def aggregate_word_embeddings(self, embeddings):
        """
        Apply the attention mechanism to the word embeddings to get the sentence embeddings.
        """
        N, num_sentences, num_words, embedding_dim = embeddings.shape
        embeddings_reshaped = embeddings.reshape(N*num_sentences, num_words, embedding_dim)
        sentence_embeddings = self.attention(embeddings_reshaped)
        return sentence_embeddings.reshape(N, num_sentences, embedding_dim)
    
    def forward(self, sentence_ids, predicate_ids, arg0_ids, arg1_ids):
        # Get embeddings
        sentence_embeddings = self.embedding_layer(sentence_ids, self.max_sentences_per_article)
        predicate_embeddings = self.embedding_layer(predicate_ids, self.max_srl_items)
        arg0_embeddings = self.embedding_layer(arg0_ids, self.max_srl_items)
        arg1_embeddings = self.embedding_layer(arg1_ids, self.max_srl_items)
        
        # Aggregate word embeddings into sentence embeddings
        sentence_embeddings = self.aggregate_word_embeddings(sentence_embeddings)
        predicate_embeddings = self.aggregate_word_embeddings(predicate_embeddings)
        arg0_embeddings = self.aggregate_word_embeddings(arg0_embeddings)
        arg1_embeddings = self.aggregate_word_embeddings(arg1_embeddings)
        
        # Get aggregated embeddings (if you want a second level of attention on the sentence embeddings)
        sentence_aggregated = self.attention(sentence_embeddings)
        predicate_aggregated = self.attention(predicate_embeddings)
        arg0_aggregated = self.attention(arg0_embeddings)
        arg1_aggregated = self.attention(arg1_embeddings)
        
        # Pass the aggregated sentence embedding through the classifier
        frame_predictions = self.classifier(sentence_aggregated)

        return frame_predictions

# Train Model

In [24]:
if 'model' in locals():
    model.to("cpu")
    model.cpu()
    print()
    free_gpu()

In [25]:
# Initialization
model = AggregationModel()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
loss_function = nn.BCEWithLogitsLoss()  # Binary cross-entropy with logits for multi-label classification
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

print()

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).





In [26]:
free_gpu()

(15744499712, 16891248640)
|                  PyTorch CUDA memory summary, device ID 0                 |
|---------------------------------------------------------------------------|
|            CUDA OOMs: 0            |        cudaMalloc retries: 0         |
|        Metric         | Cur Usage  | Peak Usage | Tot Alloc  | Tot Freed  |
|---------------------------------------------------------------------------|
| Allocated memory      |  431236 KB |  431236 KB |  431236 KB |       0 B  |
|       from large pool |  430592 KB |  430592 KB |  430592 KB |       0 B  |
|       from small pool |     644 KB |     644 KB |     644 KB |       0 B  |
|---------------------------------------------------------------------------|
| Active memory         |  431236 KB |  431236 KB |  431236 KB |       0 B  |
|       from large pool |  430592 KB |  430592 KB |  430592 KB |       0 B  |
|       from small pool |     644 KB |     644 KB |     644 KB |       0 B  |
|------------------------------------

In [29]:
import numpy as np
from sklearn.metrics import accuracy_score, f1_score, average_precision_score

num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    total_loss = 0

    for batch in train_dataloader:
        # Zero the gradients
        optimizer.zero_grad()

        # Move data and labels to device
        sentence_ids = batch['sentence_ids'].to(device)
        predicate_ids = batch['predicate_ids'].to(device)
        arg0_ids = batch['arg0_ids'].to(device)
        arg1_ids = batch['arg1_ids'].to(device)
        labels = batch['labels'].to(device)

        # Forward pass
        frame_predictions = model(sentence_ids, predicate_ids, arg0_ids, arg1_ids)

        # Compute loss
        loss = loss_function(frame_predictions, labels)
        
        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

        # Clear GPU cache
        torch.cuda.empty_cache()

    print(f"Epoch {epoch+1}/{num_epochs}, Training Loss: {total_loss/len(train_dataloader)}")

    # Validation
    model.eval()
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for batch in test_dataloader:
            # Move data and labels to device
            sentence_ids = batch['sentence_ids'].to(device)
            predicate_ids = batch['predicate_ids'].to(device)
            arg0_ids = batch['arg0_ids'].to(device)
            arg1_ids = batch['arg1_ids'].to(device)
            labels = batch['labels'].to(device)

            # Forward pass
            frame_predictions = model(sentence_ids, predicate_ids, arg0_ids, arg1_ids)
            all_preds.append(torch.sigmoid(frame_predictions).cpu().numpy())  # Applying sigmoid to get probabilities
            all_labels.append(labels.cpu().numpy())

    # Convert lists to numpy arrays
    all_preds = np.vstack(all_preds)
    all_labels = np.vstack(all_labels)

    # Compute metrics
    accuracy = accuracy_score(all_labels, all_preds > 0.5)  # Convert probabilities to binary predictions with threshold 0.5
    f1 = f1_score(all_labels, all_preds > 0.5, average='micro')  # Using micro average
    avg_precision = average_precision_score(all_labels, all_preds)

    print(f"Epoch {epoch+1}/{num_epochs}, Validation Accuracy: {accuracy:.4f}, F1 Score: {f1:.4f}, Average Precision: {avg_precision:.4f}")

Epoch 1/10, Training Loss: 0.9452386875262205
Epoch 1/10, Validation Accuracy: 0.0000, F1 Score: 0.4561, Average Precision: 0.3064
Epoch 2/10, Training Loss: 0.6766577462355295
Epoch 2/10, Validation Accuracy: 0.0000, F1 Score: 0.4211, Average Precision: 0.3113
Epoch 3/10, Training Loss: 0.6154943064026449
Epoch 3/10, Validation Accuracy: 0.0000, F1 Score: 0.1976, Average Precision: 0.3482
Epoch 4/10, Training Loss: 0.5840342671706759
Epoch 4/10, Validation Accuracy: 0.0115, F1 Score: 0.4211, Average Precision: 0.3105
Epoch 5/10, Training Loss: 0.5828009049097697
Epoch 5/10, Validation Accuracy: 0.0000, F1 Score: 0.4379, Average Precision: 0.3101
Epoch 6/10, Training Loss: 0.5484196046988169
Epoch 6/10, Validation Accuracy: 0.0115, F1 Score: 0.4211, Average Precision: 0.3535
Epoch 7/10, Training Loss: 0.5341029088387544
Epoch 7/10, Validation Accuracy: 0.0000, F1 Score: 0.0000, Average Precision: 0.3179
Epoch 8/10, Training Loss: 0.5372727252971167
Epoch 8/10, Validation Accuracy: 0.00

In [30]:
torch.save(model.state_dict(), '../notebooks/classifier/model_weights.pth')

In [None]:
# Move data and embeddings back to CPU to free up GPU memory
sentence_ids = sentence_ids.cpu()
predicate_ids = predicate_ids.cpu()
arg0_ids = arg0_ids.cpu()
arg1_ids = arg1_ids.cpu()

if 'sentence_emb' in locals():
    sentence_emb = sentence_emb.cpu()

if 'predicate_emb' in locals():
    predicate_emb = predicate_emb.cpu()

if 'arg0_emb' in locals():
    arg0_emb = arg0_emb.cpu()

if 'arg1_emb' in locals():
    arg1_emb = arg1_emb.cpu()


In [None]:
import torch
torch.cuda.empty_cache()
free_gpu()

In [None]:
!nvidia-smi