In [1]:
import pandas as pd
import numpy as np
from collections import Counter
import json
from tqdm import tqdm
import random
import pickle
from torch.utils.data.distributed import DistributedSampler
from torch.utils.data import SequentialSampler, TensorDataset, RandomSampler
from torch.cuda.amp import GradScaler
from torch.cuda.amp import autocast
from transformers import RobertaTokenizer, RobertaConfig, RobertaModel
from sklearn.metrics import classification_report, confusion_matrix, f1_score, accuracy_score
import torch
import torch.nn as nn
from datasets import load_dataset
import time
from torch.utils.data import DataLoader
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import GPT2Tokenizer, GPT2LMHeadModel, AdamW
import json


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
val_cats = json.load(open("/Users/aayush/Documents/IIITD/Assignments/NLP/Project/Final_Project/Project_Data/value-categories_aayush.json"))
tags = ["training", "validation"]
data_dict = {}
ratio_hard = 0.5

In [3]:
for tag in tags:
    data_dict[tag] = {}
    arg_df = pd.read_csv("/Users/aayush/Documents/IIITD/Assignments/NLP/Project/Final_Project/Project_Data/arguments-"+tag+".tsv", sep="\t")
    label_df = pd.read_csv("/Users/aayush/Documents/IIITD/Assignments/NLP/Project/Final_Project/Project_Data/labels-"+tag+".tsv", sep="\t")
    level1_label_df = pd.read_csv("/Users/aayush/Documents/IIITD/Assignments/NLP/Project/Final_Project/Project_Data/level1-labels-"+tag+".tsv", sep="\t")
    merged_df = arg_df.merge(label_df, how="inner", on ="Argument ID").merge(level1_label_df, 
                                                                             how="inner", 
                                                                             on ="Argument ID").reset_index(drop=True)
    print(arg_df.shape, label_df.shape, level1_label_df.shape, merged_df.shape)
#     merged_df.head(2)

    labels = [i for i in label_df.columns if i != 'Argument ID']
    level_1 = [i for i in level1_label_df.columns if i != 'Argument ID']
#     len(labels), len(level_1)

    option_map = {}
    for ix, row in merged_df.iterrows():
        options = {}
        used = []
        for l in labels:
            tmp = {}
            if row[l] == 1:
                for l1 in val_cats[l].keys():
                    if row[l1] == 1:
                        tmp[l1] = val_cats[l][l1]
                        used.extend([l, l1])
                options[l] = tmp
        all_tagged = set([c for c in labels + level_1 if row[c] == 1])
        assert len(all_tagged.difference(set(used))) == 0
        option_map[row["Argument ID"]] = options
#     len(option_map)

    for ix, row in tqdm(merged_df.iterrows()):
        dct = {"id": row["Argument ID"], "stance": row["Stance"], "premise": row["Premise"], 
               "conclusion": row["Conclusion"], "labels": list(option_map[row["Argument ID"]].keys())}
        stance = " against. " if dct["stance"] == "against" else " in favor of. "
        dct["sent"] = dct["premise"] + stance + dct["conclusion"]
        dct["opts"] = list(set([k2 + " by " + i for k, v in option_map[dct["id"]].items() for k2, v2 in v.items() for i in v2]))

        na_options_hard, na_options_easy = [], []
        for k, v in option_map[dct["id"]].items():
            l1_present = set(v.keys())
            l1_all = set(val_cats[k].keys())
            assert len(l1_all) >= len(l1_present)
            l1_not_present = l1_all.difference(l1_present)
            na_options_hard.extend([i + " by " + j for i in list(l1_not_present) for j in val_cats[k][i]])

        na_options_easy = [k + " by " + j for l in set(labels).difference(set(dct["labels"])) 
                           for k, v in val_cats[l].items() 
                           for j in v]
        random.shuffle(na_options_hard)
        random.shuffle(na_options_easy)

        hard_opts = na_options_hard[:int(len(dct["opts"])*ratio_hard)]
        easy_opts = na_options_easy[:(len(dct["opts"]) - len(hard_opts))]
        assert len(hard_opts) + len(easy_opts) == len(dct["opts"])
        dct["adverse_hard_opts"], dct["adverse_easy_opts"] = hard_opts, easy_opts
        data_dict[tag][row["Argument ID"]] = dct
#     break
        

(5393, 4) (5393, 21) (5393, 55) (5393, 78)


5393it [00:00, 14777.21it/s]


(1896, 4) (1896, 21) (1896, 55) (1896, 78)


1896it [00:00, 18297.11it/s]


In [4]:
data = data_dict['training']

tokenizer = GPT2Tokenizer.from_pretrained('gpt2')

class ArgumentDataset(Dataset):
    def __init__(self, data, tokenizer, max_length):
        self.tokenizer = tokenizer
        self.input_texts = []
        self.target_texts = []

        for item in data.values():
            # id = list(item.keys())[0]
            id = item['id']
            premise = item['premise']
            stance = "against" if item['stance'] == "against" else "in favor of"
            labels = ', '.join(item['labels'])
            conclusion = item['conclusion']
            input_text = f"Premise: {premise} Stance: {stance} Labels: {labels} Conclusion:"
            target_text = conclusion
            
            self.input_texts.append(input_text)
            self.target_texts.append(target_text)

        # Ensure to handle padding here correctly
        self.inputs = tokenizer(self.input_texts, padding='max_length', truncation=True, max_length=max_length, return_tensors="pt")
        self.targets = tokenizer(self.target_texts, padding='max_length', truncation=True, max_length=max_length, return_tensors="pt")

    def __len__(self):
        return len(self.input_texts)

    def __getitem__(self, idx):
        input_ids = self.inputs['input_ids'][idx]
        attention_mask = self.inputs['attention_mask'][idx]
        target_ids = self.targets['input_ids'][idx]
        # Set padding parts in target_ids to -100 so they are not considered in loss calculation
        target_ids[target_ids == tokenizer.pad_token_id] = -100
        return input_ids, attention_mask, target_ids

# Initialize Dataset and DataLoader
tokenizer.pad_token = tokenizer.eos_token
dataset = ArgumentDataset(data, tokenizer, max_length=512)
loader = DataLoader(dataset, batch_size=2, shuffle=True)

# Check dimensions before training
for input_ids, attention_mask, labels in loader:
    print(f'Input IDs Shape: {input_ids.shape}')
    print(f'Labels Shape: {labels.shape}')
    break  # Break after the first batch to check


Input IDs Shape: torch.Size([2, 512])
Labels Shape: torch.Size([2, 512])


In [5]:
for item in data.values():
    id = list(item.keys())[0]
    print(id)
    break

id


In [6]:
# for i in data.values():
#     print(i['A01002']['premise'])
#     break
# print(len(data.values()))
for item in data.values():
    # id = list(item.keys())[0]
    id = item['id']
    premise = item['premise']
    # print(premise)
    stance = "against" if item['stance'] == "against" else "in favor of"
    labels = ', '.join(item['labels'])
    conclusion = item['conclusion']
    input_text = f"Premise: {premise} Stance: {stance} Labels: {labels} Conclusion:"
    target_text = conclusion
    
    # self.input_texts.append(input_text)
    # self.target_texts.append(target_text)

In [7]:
device = torch.device("cuda" if torch.cuda.is_available() else "mps")
model = GPT2LMHeadModel.from_pretrained('gpt2').to(device)
model.train()

optimizer = AdamW(model.parameters(), lr=5e-5)
num_epochs = 5

for epoch in range(num_epochs):
    for i, (input_ids, attention_mask, labels) in enumerate(loader):
        input_ids, attention_mask, labels = input_ids.to(device), attention_mask.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        loss.backward()
        optimizer.step()
        if(i%100==0):
            print(f"{i}/{len(loader)}")
    ### Print the loss after each epoch
    print(f'Epoch {epoch+1}/{num_epochs} -> Loss: {loss.item()}')

# Save the fine-tuned model
model.save_pretrained('/Users/aayush/Documents/IIITD/Assignments/NLP/Project/Final_Project/Project_Data/conclusion_generation.pth')





0/2697
100/2697
200/2697
300/2697
400/2697
500/2697
600/2697
700/2697
800/2697


In [None]:
model.eval()
prompt = "Premise: [Your premise] Stance: [in favor of/against] Labels: [Your labels] Conclusion:"
inputs = tokenizer(prompt, return_tensors="pt").to(device)
generated_ids = model.generate(inputs['input_ids'], attention_mask=inputs['attention_mask'], max_length=100)
conclusion = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
print(conclusion)


In [None]:
import torch
from torch.utils.data import DataLoader
from transformers import GPT2Tokenizer, GPT2LMHeadModel
from nltk.translate.bleu_score import corpus_bleu
import json
from tqdm import tqdm

# Assuming the class ArgumentDataset and model training code have been defined and executed as previously discussed

# Function to generate text from the model
def generate_text(model, tokenizer, device, loader):
    model.eval()
    predictions = []
    actuals = []
    with torch.no_grad():
        for input_ids, attention_mask, labels in tqdm(loader, desc="Generating text"):
            input_ids = input_ids.to(device)
            attention_mask = attention_mask.to(device)
            
            # Generate predicted token ids
            outputs = model.generate(input_ids, attention_mask=attention_mask, max_length=100)
            pred_text = [tokenizer.decode(output, skip_special_tokens=True) for output in outputs]
            actual_text = [tokenizer.decode(label[label != -100], skip_special_tokens=True) for label in labels.cpu()]
            
            predictions.extend(pred_text)
            actuals.extend([[actual.split()] for actual in actual_text])  # BLEU expects a list of tokens for the references

    return predictions, actuals

# Load validation data
val_data = data_dict['validation']  # Assuming data_dict contains the validation data
val_dataset = ArgumentDataset(val_data, tokenizer, max_length=512)
val_loader = DataLoader(val_dataset, batch_size=2, shuffle=False)

# Generate predictions and actuals
predictions, actuals = generate_text(model, tokenizer, device, val_loader)

# Calculate BLEU score
bleu_score = corpus_bleu(actuals, [pred.split() for pred in predictions])
print(f"BLEU Score on the Validation Set: {bleu_score:.4f}")
