In [8]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [9]:
import os

os.chdir('drive/MyDrive/Git/MasterThesis/data')

labels_path = "data/en/dev-labels-subtask-3.txt"
articles_path = "data/en/dev-articles-subtask-3/"

In [10]:
import pandas as pd

# Read the dev-labels-subtask-2.txt file
labels_df = pd.read_csv(labels_path, sep='\t', header=None, names=["article_id", "sentence_id", "persuasion_technique"])

labels_df.head()

Unnamed: 0,article_id,sentence_id,persuasion_technique
0,813452859,1,
1,813452859,3,
2,813452859,4,
3,813452859,5,
4,813452859,6,


In [11]:
# Get unique article IDs from the dev-labels data
unique_article_ids = labels_df['article_id'].unique()

# Initialize an empty list to store results
results = []

# For each unique article ID, read the corresponding article file and join with the dev-labels data
for article_id in unique_article_ids:
    # Construct the file path for the article
    file_path = f"{articles_path}/article{article_id}.txt"

    try:
        # Load the article file
        with open(file_path, 'r') as file:
            # Read the article sentences into a list
            sentences = file.readlines()

        # Filter dev-labels data for the current article_id and where persuasion_technique is not NaN
        relevant_rows = labels_df[(labels_df['article_id'] == article_id) & (~labels_df['persuasion_technique'].isna())]

        # For each relevant row, get the corresponding sentence and persuasion technique and append to the results list
        for _, row in relevant_rows.iterrows():
            sentence = sentences[row['sentence_id'] - 1].strip()  # Subtracting 1 because list indexing starts from 0
            technique = row['persuasion_technique']
            results.append([article_id, row['sentence_id'], sentence, technique])

    except FileNotFoundError:
        # If the file for an article_id doesn't exist, continue to the next one
        continue

# Convert the results list to a dataframe
df = pd.DataFrame(results, columns=['article_id', 'sentence_id', 'sentence', 'persuasion_technique'])

df.head()

Unnamed: 0,article_id,sentence_id,sentence,persuasion_technique
0,813452859,7,Michael Swadling: I guess her only chance is i...,"False_Dilemma-No_Choice,Loaded_Language"
1,813452859,9,There is a chance; as unfortunately there are ...,"False_Dilemma-No_Choice,Loaded_Language,Name_C..."
2,813452859,11,Michael Swadling: The EU withdrawal act is in ...,Conversation_Killer
3,813452859,12,I often use the example of an iPhone to people...,"Conversation_Killer,Red_Herring"
4,813452859,15,Michael Swadling: The EU makes a profit on its...,Obfuscation-Vagueness-Confusion


In [12]:
# Split the frames column into a list of frames
df["persuasion_technique_list"] = df["persuasion_technique"].str.split(",")

# create for each frame a new column with the frame as name and 1 if the frame is present in the article and 0 if not
for frame in df["persuasion_technique_list"].explode().unique():
    df[frame] = df["persuasion_technique_list"].apply(lambda x: 1 if frame in x else 0)

df.head()

Unnamed: 0,article_id,sentence_id,sentence,persuasion_technique,persuasion_technique_list,False_Dilemma-No_Choice,Loaded_Language,Name_Calling-Labeling,Conversation_Killer,Red_Herring,...,Flag_Waving,Doubt,Whataboutism,Appeal_to_Fear-Prejudice,Causal_Oversimplification,Appeal_to_Hypocrisy,Appeal_to_Popularity,Appeal_to_Authority,Straw_Man,Guilt_by_Association
0,813452859,7,Michael Swadling: I guess her only chance is i...,"False_Dilemma-No_Choice,Loaded_Language","[False_Dilemma-No_Choice, Loaded_Language]",1,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,813452859,9,There is a chance; as unfortunately there are ...,"False_Dilemma-No_Choice,Loaded_Language,Name_C...","[False_Dilemma-No_Choice, Loaded_Language, Nam...",1,1,1,0,0,...,0,0,0,0,0,0,0,0,0,0
2,813452859,11,Michael Swadling: The EU withdrawal act is in ...,Conversation_Killer,[Conversation_Killer],0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
3,813452859,12,I often use the example of an iPhone to people...,"Conversation_Killer,Red_Herring","[Conversation_Killer, Red_Herring]",0,0,0,1,1,...,0,0,0,0,0,0,0,0,0,0
4,813452859,15,Michael Swadling: The EU makes a profit on its...,Obfuscation-Vagueness-Confusion,[Obfuscation-Vagueness-Confusion],0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [13]:
X = df["sentence"]
y = df.drop(columns=["article_id", "sentence_id", "sentence", "persuasion_technique", "persuasion_technique_list"])

In [14]:
X.head()

0    Michael Swadling: I guess her only chance is i...
1    There is a chance; as unfortunately there are ...
2    Michael Swadling: The EU withdrawal act is in ...
3    I often use the example of an iPhone to people...
4    Michael Swadling: The EU makes a profit on its...
Name: sentence, dtype: object

In [15]:
y.head()

Unnamed: 0,False_Dilemma-No_Choice,Loaded_Language,Name_Calling-Labeling,Conversation_Killer,Red_Herring,Obfuscation-Vagueness-Confusion,Exaggeration-Minimisation,Repetition,Slogans,Flag_Waving,Doubt,Whataboutism,Appeal_to_Fear-Prejudice,Causal_Oversimplification,Appeal_to_Hypocrisy,Appeal_to_Popularity,Appeal_to_Authority,Straw_Man,Guilt_by_Association
0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0


In [16]:
len(X), len(y)

(1120, 1120)

In [17]:
import torch
# Define the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
!pip install allennlp_models

In [None]:
!pip install spacy==3.6.0


In [2]:
from allennlp.predictors.predictor import Predictor
from allennlp_models.structured_prediction.models import srl_bert

# Load the SRL predictor
predictor = Predictor.from_path("https://storage.googleapis.com/allennlp-public-models/structured-prediction-srl-bert.2020.12.15.tar.gz")

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [3]:
import random

def extract_srl_components(article, predictor):
    """
    Extract SRL components for an article.
    """
    srl = predictor.predict(sentence=article)

    extracted_data = []
    for verb_entry in srl['verbs']:
        predicate = verb_entry['verb']
        tags = verb_entry['tags']

        arg0_indices = [i for i, tag in enumerate(tags) if tag in ['B-ARG0', 'I-ARG0']]
        arg1_indices = [i for i, tag in enumerate(tags) if tag in ['B-ARG1', 'I-ARG1']]

        arg0 = [srl['words'][i] for i in arg0_indices] if arg0_indices else []
        arg1 = [srl['words'][i] for i in arg1_indices] if arg1_indices else []

        extracted_data.append({
            'predicate': [predicate],
            'ARG0': arg0,
            'ARG1': arg1
        })

    return extracted_data

In [4]:
extract_srl_components("The red horse simply turned around and fought off the fly with its tail.", predictor)

[{'predicate': ['turned'], 'ARG0': [], 'ARG1': ['The', 'red', 'horse']},
 {'predicate': ['fought'],
  'ARG0': ['The', 'red', 'horse'],
  'ARG1': ['the', 'fly']}]

In [18]:
X_srl = X.apply(lambda x: extract_srl_components(x, predictor))

In [19]:
X_srl

0       [{'predicate': ['guess'], 'ARG0': ['I'], 'ARG1...
1       [{'predicate': ['is'], 'ARG0': [], 'ARG1': ['a...
2       [{'predicate': ['is'], 'ARG0': [], 'ARG1': ['T...
3       [{'predicate': ['use'], 'ARG0': ['I'], 'ARG1':...
4       [{'predicate': ['makes'], 'ARG0': ['The', 'EU'...
                              ...                        
1115    [{'predicate': ['do'], 'ARG0': [], 'ARG1': []}...
1116    [{'predicate': ['are'], 'ARG0': [], 'ARG1': ['...
1117    [{'predicate': ['added'], 'ARG0': ['Trump', 'J...
1118    [{'predicate': ['seen'], 'ARG0': [], 'ARG1': [...
1119    [{'predicate': ['came'], 'ARG0': [], 'ARG1': [...
Name: sentence, Length: 1120, dtype: object

In [25]:
# pickle X_srl to disk
import pickle

with open("X_srl.pkl", "wb") as f:
    pickle.dump(X_srl, f)

In [151]:
X_srl

0       [{'predicate': ['guess'], 'ARG0': ['I'], 'ARG1...
1       [{'predicate': ['is'], 'ARG0': [], 'ARG1': ['a...
2       [{'predicate': ['is'], 'ARG0': [], 'ARG1': ['T...
3       [{'predicate': ['use'], 'ARG0': ['I'], 'ARG1':...
4       [{'predicate': ['makes'], 'ARG0': ['The', 'EU'...
                              ...                        
1115    [{'predicate': ['do'], 'ARG0': [], 'ARG1': []}...
1116    [{'predicate': ['are'], 'ARG0': [], 'ARG1': ['...
1117    [{'predicate': ['added'], 'ARG0': ['Trump', 'J...
1118    [{'predicate': ['seen'], 'ARG0': [], 'ARG1': [...
1119    [{'predicate': ['came'], 'ARG0': [], 'ARG1': [...
Name: sentence, Length: 1120, dtype: object

In [152]:
import torch
from transformers import BertTokenizer, BertModel

# Initialize the tokenizer and model
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')

def get_word_embedding(word):
    """
    Get the BERT embedding for a given word.

    Args:
    - word (str): The input word.

    Returns:
    - torch.Tensor: The BERT embedding for the word.
    """
    # Tokenize the word and get the corresponding IDs
    tokens = tokenizer.tokenize(word)
    token_ids = tokenizer.convert_tokens_to_ids(tokens)

    # Convert token IDs to a torch tensor and add batch dimension
    token_tensor = torch.tensor([token_ids])

    # Forward pass through the BERT model
    with torch.no_grad():
        outputs = model(token_tensor)
        embeddings = outputs.last_hidden_state

    # If the word was split into multiple tokens, average their embeddings
    embedding = embeddings.mean(dim=1)

    return embedding.squeeze()


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [153]:
# Parameters
max_articles = len(X_srl)
embedding_dim = 768

max_srls = 10
max_words = 10

# Pre-allocate tensors
predicates_tensor = torch.zeros((max_articles, max_srls, embedding_dim))
arg0_tensor = torch.zeros((max_articles, max_srls, max_words, embedding_dim))
arg1_tensor = torch.zeros((max_articles, max_srls, max_words, embedding_dim))

for i, article_srls in enumerate(X_srl[:100]):
    for j, srl_dict in enumerate(article_srls[:max_srls]):
        # Handle predicate
        predicates_tensor[i, j] = get_word_embedding(srl_dict['predicate'][0])

        # Handle ARG0
        if srl_dict['ARG0']:
            arg0_embeddings = torch.stack([get_word_embedding(word) for word in srl_dict['ARG0'][:max_words]])
            arg0_tensor[i, j, :arg0_embeddings.shape[0]] = arg0_embeddings

        # Handle ARG1
        if srl_dict['ARG1']:
            arg1_embeddings = torch.stack([get_word_embedding(word) for word in srl_dict['ARG1'][:max_words]])
            arg1_tensor[i, j, :arg1_embeddings.shape[0]] = arg1_embeddings


In [154]:
predicates_tensor.shape, arg0_tensor.shape, arg1_tensor.shape

(torch.Size([1120, 10, 768]),
 torch.Size([1120, 10, 10, 768]),
 torch.Size([1120, 10, 10, 768]))

In [155]:
import torch.nn as nn

# Define the Autoencoder for SRL embeddings
class SRLAutoencoder(nn.Module):
    def __init__(self, input_dim, encoding_dim):
        super(SRLAutoencoder, self).__init__()

        # Encoder
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, encoding_dim),
            nn.ReLU(True),
            nn.Linear(encoding_dim, encoding_dim//2),
            nn.ReLU(True)
        )

        # Decoder
        self.decoder = nn.Sequential(
            nn.Linear(encoding_dim//2, encoding_dim),
            nn.ReLU(True),
            nn.Linear(encoding_dim, input_dim),
            nn.ReLU(True)
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

# Instantiate the SRL autoencoder
input_dim = 768  # Embedding dimension
encoding_dim = 384  # Reduced dimension after encoding
srl_autoencoder = SRLAutoencoder(input_dim, encoding_dim)

srl_autoencoder


SRLAutoencoder(
  (encoder): Sequential(
    (0): Linear(in_features=768, out_features=384, bias=True)
    (1): ReLU(inplace=True)
    (2): Linear(in_features=384, out_features=192, bias=True)
    (3): ReLU(inplace=True)
  )
  (decoder): Sequential(
    (0): Linear(in_features=192, out_features=384, bias=True)
    (1): ReLU(inplace=True)
    (2): Linear(in_features=384, out_features=768, bias=True)
    (3): ReLU(inplace=True)
  )
)

In [162]:
class JointClassifierModel(nn.Module):
    def __init__(self, hidden_size=768, num_labels=19, hidden_dropout_prob=0.1, encoding_dim=384):
        super(JointClassifierModel, self).__init__()

        # RoBERTa Model for sentence embeddings (assuming it will be loaded separately)
        self.roberta = None

        # SRL Autoencoder
        self.srl_autoencoder = SRLAutoencoder(input_dim=hidden_size, encoding_dim=encoding_dim)

        # Classifier Head
        self.classifier_head_dim = hidden_size + encoding_dim*4  # Adding encoding_dim for predicate, arg0, and arg1
        self.classifier = nn.Linear(hidden_size*4, num_labels)

        # Dropout layer
        self.dropout = nn.Dropout(hidden_dropout_prob)

        # Softmax layer (if you're using it for multi-class classification)
        self.log_softmax = nn.LogSoftmax(dim=1)

        # Loss for multi-label classification
        self.loss_fct = nn.BCEWithLogitsLoss()

    def forward(self, input_ids, attention_mask, predicates_tensor, arg0_tensor, arg1_tensor, labels=None):
        # Assuming the RoBERTa model is loaded and available
        outputs = self.roberta(input_ids, attention_mask=attention_mask)
        sentence_embeddings = outputs[0][:, 0, :]  # Extracting the CLS token embeddings

        # Averaging SRL embeddings over the word dimension
        avg_predicates = predicates_tensor.mean(dim=1)
        avg_arg0 = arg0_tensor.mean(dim=2)
        avg_arg1 = arg1_tensor.mean(dim=2)

        # Pass SRL embeddings through the autoencoder to get encoded representations
        encoded_predicates = self.srl_autoencoder(avg_predicates)
        encoded_arg0 = self.srl_autoencoder(avg_arg0)
        encoded_arg1 = self.srl_autoencoder(avg_arg1)

        encoded_arg0 = encoded_arg0.mean(dim=1)
        encoded_arg1 = encoded_arg1.mean(dim=1)

        # Concatenate encoded SRL representations with sentence embeddings
        joint_representation = self.dropout(torch.cat([sentence_embeddings, encoded_predicates, encoded_arg0, encoded_arg1], dim=1))

        # Pass through classifier
        logits = self.classifier(joint_representation)

        # Compute Loss
        if labels is not None:
            loss = self.loss_fct(logits, labels.float())
            return loss, logits
        return logits


# Train Model

In [163]:
import torch
from torch.optim import Adam
from transformers import RobertaModel
from sklearn.metrics import accuracy_score

def train_joint_model(train_dataloader, validation_dataloader=None, epochs=3, learning_rate=3e-4):
    # Initializing the model
    model = JointClassifierModel(hidden_size=768, num_labels=len(y.columns), hidden_dropout_prob=0.1)
    model.roberta = RobertaModel.from_pretrained('roberta-base')  # Load the RoBERTa model

    # Move model to GPU if available
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    # Define the optimizer
    optimizer = Adam(model.parameters(), lr=learning_rate)

    # Training loop
    for epoch in range(epochs):
        model.train()
        total_loss = 0
        total_accuracy = 0

        for batch in train_dataloader:
            # Assuming batch is a tuple (input_ids, attention_mask, predicates_tensor, arg0_tensor, arg1_tensor, labels)
            input_ids, attention_mask, predicates_tensor, arg0_tensor, arg1_tensor, labels = batch
            input_ids, attention_mask, predicates_tensor, arg0_tensor, arg1_tensor, labels = \
                input_ids.to(device), attention_mask.to(device), predicates_tensor.to(device), \
                arg0_tensor.to(device), arg1_tensor.to(device), labels.to(device)

            # Zero the gradients
            optimizer.zero_grad()

            # Forward pass
            loss, logits = model(input_ids, attention_mask, predicates_tensor, arg0_tensor, arg1_tensor, labels)

            # Calculate accuracy
            preds = (torch.sigmoid(logits) > 0.5).float()
            batch_accuracy = accuracy_score(labels.cpu().numpy(), preds.cpu().numpy())
            total_accuracy += batch_accuracy

            # Backward pass
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        avg_accuracy = total_accuracy / len(train_dataloader)
        print(f"Epoch {epoch+1}/{epochs}, Training Loss: {total_loss/len(train_dataloader)}, Training Accuracy: {avg_accuracy}")

        # Validation loop (optional)
        if validation_dataloader:
            model.eval()
            val_loss = 0
            val_accuracy = 0
            with torch.no_grad():
                for batch in validation_dataloader:
                    input_ids, attention_mask, predicates_tensor, arg0_tensor, arg1_tensor, labels = batch
                    input_ids, attention_mask, predicates_tensor, arg0_tensor, arg1_tensor, labels = \
                        input_ids.to(device), attention_mask.to(device), predicates_tensor.to(device), \
                        arg0_tensor.to(device), arg1_tensor.to(device), labels.to(device)

                    loss, logits = model(input_ids, attention_mask, predicates_tensor, arg0_tensor, arg1_tensor, labels)
                    val_loss += loss.item()

                    # Calculate validation accuracy
                    preds = (torch.sigmoid(logits) > 0.5).float()
                    batch_accuracy = accuracy_score(labels.cpu().numpy(), preds.cpu().numpy(), average='samples')
                    val_accuracy += batch_accuracy

            avg_val_accuracy = val_accuracy / len(validation_dataloader)
            print(f"Epoch {epoch+1}/{epochs}, Validation Loss: {val_loss/len(validation_dataloader)}, Validation Accuracy: {avg_val_accuracy}")

    return model

In [164]:
from transformers import RobertaTokenizer
from torch.utils.data import Dataset, DataLoader, RandomSampler

class PersuasionTechniqueDataset(Dataset):
    def __init__(self, sentences, predicates, arg0, arg1, labels, tokenizer):
        self.sentences = sentences.tolist() if isinstance(sentences, pd.Series) else sentences
        self.predicates = predicates
        self.arg0 = arg0
        self.arg1 = arg1
        self.labels = labels
        self.tokenizer = tokenizer

    def __len__(self):
        return len(self.sentences)

    def __getitem__(self, idx):
        # Tokenize the sentence
        encoded_sentence = self.tokenizer.encode_plus(
            self.sentences[idx], add_special_tokens=True, max_length=256,
            padding='max_length', return_tensors='pt', truncation=True
        )
        input_ids = encoded_sentence['input_ids'].squeeze()
        attention_mask = encoded_sentence['attention_mask'].squeeze()

        # Get SRL tensors
        predicate_tensor = self.predicates[idx]
        arg0_tensor = self.arg0[idx]
        arg1_tensor = self.arg1[idx]

        # Get labels
        label = torch.tensor(self.labels.iloc[idx])

        return input_ids, attention_mask, predicate_tensor, arg0_tensor, arg1_tensor, label

In [165]:
from sklearn.model_selection import train_test_split

# Assuming your data is loaded into the variables: X, predicates_tensor, arg0_tensor, arg1_tensor, and y

# Split the data into training and testing sets (80% train, 20% test by default)
X_train, X_test, predicates_train, predicates_test, arg0_train, arg0_test, arg1_train, arg1_test, y_train, y_test = \
    train_test_split(X, predicates_tensor, arg0_tensor, arg1_tensor, y, test_size=0.2, random_state=42)

# Tokenizer
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')

# Create datasets for training and testing sets
train_dataset = PersuasionTechniqueDataset(X_train, predicates_train, arg0_train, arg1_train, y_train, tokenizer)
test_dataset = PersuasionTechniqueDataset(X_test, predicates_test, arg0_test, arg1_test, y_test, tokenizer)

# Create dataloaders for training and testing sets
train_loader = DataLoader(train_dataset, shuffle=True, batch_size=8)
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=8)

del X_train, X_test, predicates_train, predicates_test, arg0_train, arg0_test, arg1_train, arg1_test, y_train, y_test

NameError: ignored

In [None]:
# 3. Call the Training Function
trained_model = train_joint_model(train_loader, epochs=10, learning_rate=3e-4)

In [None]:
def predict(model, input_ids, attention_mask, predicates_tensor, arg0_tensor, arg1_tensor, threshold=0.5):
    """
    Predict the labels for given inputs using the provided model.

    Args:
    - model (torch.nn.Module): The trained model.
    - input_ids (torch.Tensor): Tensor of token ids.
    - attention_mask (torch.Tensor): Tensor indicating which tokens are padding and which aren't.
    - predicates_tensor (torch.Tensor): Tensor of predicates embeddings.
    - arg0_tensor (torch.Tensor): Tensor of arg0 embeddings.
    - arg1_tensor (torch.Tensor): Tensor of arg1 embeddings.
    - threshold (float): Threshold for classifying logits as 0 or 1.

    Returns:
    - predictions (torch.Tensor): Predicted labels for each input.
    """

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    model.to(device)
    input_ids = input_ids.to(device)
    attention_mask = attention_mask.to(device)
    predicates_tensor = predicates_tensor.to(device)
    arg0_tensor = arg0_tensor.to(device)
    arg1_tensor = arg1_tensor.to(device)

    # Set the model to evaluation mode
    model.eval()

    # Disable gradient computation
    with torch.no_grad():
        logits = model(input_ids, attention_mask, predicates_tensor, arg0_tensor, arg1_tensor)

        # Convert logits to probabilities
        probs = torch.sigmoid(logits)

        # Convert probabilities to binary predictions based on the threshold
        predictions = (probs > threshold).float()

    return predictions

In [None]:
# Assuming `model` is your loaded trained JointClassifierModel

# Extracting a few samples from the test_loader
sample_data = next(iter(test_loader))
input_ids, attention_mask, predicates_tensor, arg0_tensor, arg1_tensor, labels = sample_data

labels.shape

In [None]:
# Using the predict function to get predictions
predictions = predict(trained_model, input_ids, attention_mask, predicates_tensor, arg0_tensor, arg1_tensor)

# Converting tensor predictions to numpy for display
predictions_np = predictions.cpu().numpy()

predictions_np.shape


In [142]:
predictions_np

array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0.]], dtype=float32)

In [118]:
print(f"Allocated Memory: {torch.cuda.memory_allocated() / (1024**2)} MB")
print(f"Reserved Memory: {torch.cuda.memory_reserved() / (1024**2)} MB")

Allocated Memory: 973.13330078125 MB
Reserved Memory: 14156.0 MB


In [113]:
def tensor_memory(tensor):
    return tensor.element_size() * tensor.nelement()

print(f"Tensor Memory Usage: {tensor_memory(arg1_embeddings) / (1024**2)} MB")

Tensor Memory Usage: 0.005859375 MB


In [101]:
torch.cuda.empty_cache()

In [117]:
!nvidia-smi

Wed Oct  4 20:13:55 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.105.17   Driver Version: 525.105.17   CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   64C    P0    30W /  70W |  15085MiB / 15360MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
import sys
def sizeof_fmt(num, suffix='B'):
    ''' by Fred Cirera,  https://stackoverflow.com/a/1094933/1870254, modified'''
    for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']:
        if abs(num) < 1024.0:
            return "%3.1f %s%s" % (num, unit, suffix)
        num /= 1024.0
    return "%.1f %s%s" % (num, 'Yi', suffix)

for name, size in sorted(((name, sys.getsizeof(value)) for name, value in list(
                          locals().items())), key= lambda x: -x[1]):
    print("{:>30}: {:>8}".format(name, sizeof_fmt(size)))