In [None]:
import numpy as np
import pandas as pd
from datasets import load_dataset
import itertools

dataset = load_dataset("McGill-NLP/feedbackQA")

import os
os.environ["CUDA_VISIBLE_DEVICES"]=""
import torch
torch.cuda.is_available()

In [None]:
rating_class = {'Excellent':3 , 'Acceptable':2 , 'Could be Improved':1, 'Bad': 0}

def process_df(df):
    df['list_feedback'] = df['feedback'].apply(lambda x: [ r + "___" + e for r,e in zip(x['rating'],x['explanation']) ])
    df['sampled_feedback'] = df['list_feedback'].apply(lambda x: x[0].split("___") if (x[0].split("___")[0]!='Excellent' and x[0].split("___")[0]!='Acceptable') else (x[1].split("___") if (x[1].split("___")[0]!='Excellent' and x[1].split("___")[0]!='Acceptable') else np.random.choice(x).split("___")) )
    df['rating_class'] = df['sampled_feedback'].apply(lambda x: rating_class[x[0]])
    df['rating'] = df['sampled_feedback'].apply(lambda x: x[0])
    df['explanation'] = df['sampled_feedback'].apply(lambda x: x[1])
    return df

In [None]:
train_df = process_df(pd.DataFrame(dataset['train']))
val_df = process_df(pd.DataFrame(dataset['validation']))
test_df = process_df(pd.DataFrame(dataset['test']))

from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import torch.nn.functional as F
from torch.utils.data import Dataset,DataLoader

# Load model from HuggingFace Hub
bert_chkpt = "meta-llama/Llama-2-7b-chat-hf"
tokenizer = AutoTokenizer.from_pretrained(bert_chkpt,cache_dir='/home/jupyter/Ravi_new/HF_cache')
model = AutoModelForCausalLM.from_pretrained(bert_chkpt,cache_dir='/home/jupyter/Ravi_new/HF_cache')

tokenizer.all_special_tokens

tokenizer.pad_token = tokenizer.eos_token

train_df.head()

train_df['answer'].loc[0]

tokenizer('Hello, how are you doing?'+ f" {tokenizer.eos_token} " + "Hemlooooo",add_special_tokens=True,return_tensors='pt', return_length=1)

import nltk
nltk.download('punkt')

from nltk import tokenize as nltk_tokenizer
len(nltk_tokenizer.sent_tokenize(train_df['answer'].loc[0]))

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import torch.nn.functional as F
from torch.utils.data import Dataset,DataLoader

# Load model from HuggingFace Hub
bert_chkpt = "meta-llama/Llama-2-7b-chat-hf"
# tokenizer = AutoTokenizer.from_pretrained(bert_chkpt,cache_dir='/home/jupyter/Ravi_new/HF_cache')
# model = AutoModelForCausalLM.from_pretrained(bert_chkpt,cache_dir='/home/jupyter/Ravi_new/HF_cache')

In [None]:
import tqdm
from torch.utils.data import Dataset

class feedback_QA_dataset(Dataset):
    
    def __init__(self,df,tokenizer,max_length=2048):
        self.df = df
        self.max_len = max_length
        self.data = []
        self.tokenizer = tokenizer
        skipped = 0
        
        for i in range(len(self.df)):
            
            d = {}
            if self.df.iloc[i]['rating_class']==3:
                skipped += 1
                continue
                
            prompt = "I will give you a question, an initial answer to the question, and feedback critiquing that answer. Based on the feedback, provide a refined answer. Do NOT generate anything other than the refined answer."
            question = self.df.iloc[i]['question']
            answer = self.df.iloc[i]['answer']
            feedback = self.df.iloc[i]['explanation']
            
            tok_input = self.tokenizer(f"{prompt}\nQuestion:{question}\nAnswer:{answer}\n\nFeedback:{feedback}\n\nRefined answer: ",
                                  add_special_tokens=True
                                 )
            if len(tok_input['input_ids']) > self.max_len:
                skipped += 1
                continue
                

            
            PAD_LEN = self.max_len - len(tok_input['input_ids'])

            d['input'] = tok_input['input_ids'] + [self.tokenizer.eos_token_id]*PAD_LEN
            d['attention_mask'] = tok_input['attention_mask'] + [0]*PAD_LEN
            d['id'] = i

            for k in d.keys():
                d[k] = torch.tensor(d[k])

            self.data.append(d)
        # print(f'skipped: {skipped}')

    def __len__(self):
        return len(self.data)
    
    def __getitem__(self,idx):
        return self.data[idx]

train_dataset = feedback_QA_dataset(train_df)
valid_dataset = feedback_QA_dataset(val_df)
test_dataset = feedback_QA_dataset(test_df)

train_DL = DataLoader(train_dataset,batch_size=3,shuffle=False)
valid_DL = DataLoader(valid_dataset,batch_size=3,shuffle=False)
test_DL = DataLoader(test_dataset,batch_size=1,shuffle=False)

In [None]:

def refine(bert_chkpt,val_df):#,Accelerator):
    # os.environ["CUDA_VISIBLE_DEVICES"]="0,1"
    # accelerator = Accelerator()
    device = 'cuda:1'#accelerator.device
    
    tokenizer = AutoTokenizer.from_pretrained(bert_chkpt,cache_dir='/home/jupyter/Ravi_new/HF_cache')
    model = AutoModelForCausalLM.from_pretrained(bert_chkpt,cache_dir='/home/jupyter/Ravi_new/HF_cache')
    
    train_dataset = feedback_QA_dataset(train_df,tokenizer)
    valid_dataset = feedback_QA_dataset(val_df,tokenizer)
    test_dataset = feedback_QA_dataset(test_df,tokenizer)

    train_DL = DataLoader(train_dataset,batch_size=3,shuffle=False)
    valid_DL = DataLoader(valid_dataset,batch_size=3,shuffle=False)
    test_DL = DataLoader(test_dataset,batch_size=1,shuffle=False)
    
    # model,valid_DL = accelerator.prepare(model,valid_DL)
    model = model.to(device)
    model.eval()
    with torch.no_grad():
        for b in valid_DL:
            out = model.generate(inputs=b['input'].to(device),
                                 attention_mask=b['attention_mask'].to(device),
                                 max_new_tokens=500,
                                 num_return_sequences=1,
                                 do_sample=True
                                )
            
            l = [a.split('Refined answer: ')[1].replace('</s>','') for a in tokenizer.batch_decode(out)]
            val_df['refined_answer'].loc[b['id'].tolist()] = l
            # break
    return val_df


In [None]:
import accelerate

In [None]:
val_df['refined_answer'] = ['None']*len(val_df)
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0,1"

# from accelerate import notebook_launcher
accelerate.notebook_launcher(refine,(bert_chkpt,val_df,accelerate.Accelerator),num_processes=2)

In [None]:
device = 'cuda:1'#accelerator.device
    
tokenizer = AutoTokenizer.from_pretrained(bert_chkpt,cache_dir='/home/jupyter/Ravi_new/HF_cache')
model = AutoModelForCausalLM.from_pretrained(bert_chkpt,cache_dir='/home/jupyter/Ravi_new/HF_cache')

train_dataset = feedback_QA_dataset(train_df,tokenizer)
valid_dataset = feedback_QA_dataset(val_df,tokenizer)
test_dataset = feedback_QA_dataset(test_df,tokenizer)

train_DL = DataLoader(train_dataset,batch_size=3,shuffle=False)
valid_DL = DataLoader(valid_dataset,batch_size=3,shuffle=False)
test_DL = DataLoader(test_dataset,batch_size=1,shuffle=False)

In [None]:
# model,valid_DL = accelerator.prepare(model,valid_DL)
model = model.to(device)
model.eval()

num_return_sequences = 2

for i in range(num_return_sequences):
    val_df[f'refined_answer_{i}'] = ['None']*len(val_df)
with torch.no_grad():
    for b in valid_DL:
        out = model.generate(inputs=b['input'].to(device),
                             attention_mask=b['attention_mask'].to(device),
                             max_new_tokens=50,
                             num_return_sequences=2,
                             do_sample=True
                            )
        
        l = [a.split('Refined answer: ')[1].replace('</s>','') for a in tokenizer.batch_decode(out)]
        for i in range(num_return_sequences):
            val_df[f'refined_answer_{i}'].loc[b['id'].tolist()] = l[i::num_return_sequences]
        break

In [None]:
val_df.loc[6][['refined_answer_0','refined_answer_1']].values

In [None]:
l = [0,1,2,3,4,5]
l[2::2]

In [None]:
val_df = refine(bert_chkpt,val_df)
val_df.to_csv('val_refined.csv')

In [None]:
val_df

In [None]:
os.environ['CUDA_VISIBLE_DEVICES'] = "0,1"
torch.cuda.is_available()

In [None]:
val_df

In [None]:
print(tokenizer.decode(out[2]).replace('</s>',''))#.replace('<unk>',''))

In [None]:
tokenizer.decode([0,0,0])

In [None]:
from transformers import BartForConditionalGeneration

device = 'cuda:0'

model = AutoModel.from_pretrained(bert_chkpt).to(device)

def mean_pooling(model_output, attention_mask):
    token_embeddings = model_output[0] #First element of model_output contains all token embeddings
    input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
    se = torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
    return F.normalize(se, p=2, dim=1)

j = 0

with torch.no_grad():
    for b in train_DL:
        out = mean_pooling( model(input_ids=b['context_w_feedback'].to(device), attention_mask=b['context_w_feedback_attn'].to(device)) , b['feedback_pool_mask'].to(device))
        print(out.shape)
        print('----------------------------')
        j+=1
        if j>5:
            break

del model

In [None]:
t = torch.tensor([[[1,2,3,4,5],[6,7,8,9,0]]])
t.repeat(2,1,1)

In [None]:
class classifier(nn.Module):

    def __init__(self, model_chkpt, device='cuda:0', inp_dim=768, hidden_dims=None, num_classes=4, use_norm=False):
        super().__init__()
        
        self.device = device
        self.bert_model = AutoModel.from_pretrained(model_chkpt).to(device)
        
        self.use_norm = use_norm
        self.inp_layer = nn.Linear(inp_dim,hidden_dims[0])

        hidden_layers = []
        for i in range(len(hidden_dims)-1):
            hidden_layers.append(nn.Linear(hidden_dims[i],hidden_dims[i+1]))
            hidden_layers.append(nn.Dropout(p=0.2))
            hidden_layers.append(nn.ReLU())
        self.layers = nn.Sequential(*hidden_layers)

        self.out_layer = nn.Linear(hidden_dims[-1],num_classes)
        
    def mean_pooling(self,model_output,attention_mask):
        token_embeddings = model_output[0] #First element of model_output contains all token embeddings
        input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
        se = torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
        return se
        
    def forward(self, b):
        y = self.mean_pooling( self.bert_model(input_ids=b['context_w_feedback'].to(self.device), attention_mask=b['context_w_feedback_attn'].to(self.device)),
                               b['feedback_pool_mask'].to(self.device))
        if self.use_norm:
            y = F.normalize(y,p=2,dim=-1)
        y = self.inp_layer(y)
        y = F.relu(y)
        y = self.layers(y)
        y = self.out_layer(y)
        
        return_dict = {}
        
        return_dict['logits'] = y
        return_dict['class_probs'] = F.softmax(y,dim=-1)
        return_dict['CE_loss'] = F.cross_entropy(y,b['rating_class'].to(self.device))
        return return_dict
        
        

In [None]:
def train(classifier,train_dl,valid_dl,epochs,optimizer,PATIENCE=20,save_dir=None):

    if not os.path.exists(save_dir):
        os.mkdir(save_dir)
    
    classifier.train()
    
    loss_acc = 0
    num_batches = 0
    total_steps = 0
    best_valid_loss = np.inf
    patience = PATIENCE
    
    train_loss_arr,valid_loss_arr = [],[]
    
    optimizer.zero_grad()
    classifier.zero_grad()
    
    for E in range(epochs):
        
        num_samples = 0
        
        for b in train_dl:
            
            y = classifier(b)
                          # decoder_input_ids=b['feedback'].squeeze(1)[:,:-1].to(device),
                          # decoder_attention_mask=b['feedback_attn'].squeeze(1)[:,:-1].to(device))
            loss = y['CE_loss'] #F.cross_entropy(y.logits.permute(0,2,1), b['feedback'].squeeze(1)[:,1:].to(device), ignore_index=tokenizer.pad_token_id)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            loss_acc += loss.item()
        
            num_batches += 1
            total_steps += 1

            train_loss_arr.append(loss_acc/num_batches)

            if total_steps%100==0 and total_steps!=0:
                print("Epoch:",E,"\t","Steps taken:",total_steps,"\tLoss:",loss_acc/num_batches)
            
        #print("Epoch:",E,"\t","Steps taken:",total_steps,"\tLoss:",loss_acc/num_batches)
        
        torch.save({'model_state':classifier.state_dict(),
                    'optimizer':optimizer.state_dict(),
                    'epoch':E},
                    f"{save_dir}/Epoch_{E}_model_chkpt.pth.tar")
        
        valid_loss = validate(classifier,valid_dl)
        valid_loss_arr.append(valid_loss/len(valid_dl))
        
        if valid_loss<best_valid_loss:
            best_valid_loss = valid_loss
            patience = PATIENCE
            
            torch.save({'model_state':classifier.state_dict(),
                        'optimizer':optimizer.state_dict(),
                        'epoch':E},
                        f"{save_dir}/best_model_chkpt.pth.tar")
        else:
            patience -= 1
            print(f"REDUCING PATIENCE...{patience}")

        if patience<=0:
            print("RUNNING OUT OF PATIENCE... TERMINATING")
            break
    
    
    return train_loss_arr,valid_loss_arr
                

In [None]:
def validate(classifier,valid_dl):
    
    classifier.eval()
    valid_loss = 0
    num_batches = 0
    with torch.no_grad():
        for b in valid_dl:
            y = classifier(b)
                          # decoder_input_ids=b['feedback'].squeeze(1)[:,:-1].to(device),
                          # decoder_attention_mask=b['feedback_attn'].squeeze(1)[:,:-1].to(device))
            loss = y['CE_loss'] #F.cross_entropy(y.logits.permute(0,2,1), b['feedback'].squeeze(1)[:,1:].to(device), ignore_index=tokenizer.pad_token_id)
            valid_loss += loss.item()
            num_batches+=1
            
    print("Validation Loss:",valid_loss/num_batches)
    return valid_loss

In [None]:
import os

from transformers import AutoModel

EPOCHS = 50
FREEZE_BERT = False

device = 'cuda:0'

# MPNet = AutoModel.from_pretrained(bert_chkpt).to(device)
classifier_model = classifier(bert_chkpt,device=device,hidden_dims=[768,128], num_classes=4, use_norm=False).to(device)

In [None]:
if FREEZE_BERT:
    classifier_model.load_state_dict(torch.load('Rating_sent_MPNET_chkpts_1/best_model_chkpt.pth.tar')['model_state'])
    classifier_model.bert_model.requires_grad_(False)

optimizer = torch.optim.AdamW(classifier_model.parameters(),lr=1e-4)

save_dir = 'Rating_ctxt_FB_MPNET_chkpts_1'
if not os.path.exists(save_dir):
    os.mkdir(save_dir)

train_loss,valid_loss = train(classifier_model,
                              train_DL,
                              valid_DL,
                              EPOCHS,
                              optimizer,
                              PATIENCE=5,
                              save_dir=save_dir)

In [None]:
import json

with open('train_loss.json','w') as f:
    json.dump(train_loss,f)

with open('valid_loss.json','w') as f:
    json.dump(valid_loss,f)

In [None]:
train_loss_ds = np.array(train_loss)[np.round(np.linspace(0, len(train_loss) - 1, len(valid_loss))).astype(int)]
loss_df = pd.DataFrame({'train_loss':train_loss_ds , 'valid_loss':valid_loss})

In [None]:
from plotly import express as px
px.line(loss_df,y=['train_loss','valid_loss'])

In [None]:
test_DL = DataLoader(test_dataset,batch_size=100,shuffle=False)

In [None]:
chkpt = torch.load('Rating_ctxt_FB_MPNET_chkpts_1/best_model_chkpt.pth.tar')

In [None]:
classifier_model.load_state_dict(chkpt['model_state'])

In [None]:
i = 0
preds,gt = [],[]
classifier_model.eval()
with torch.no_grad():
    for b in tqdm.tqdm(test_DL,desc='evaluating'):
        out = classifier_model(b)
        pred_labels = out['class_probs'].argmax(dim=-1).cpu().tolist()
        gt_labels = b['rating_class'].tolist()
        preds.extend(pred_labels)
        gt.extend(gt_labels)

In [None]:
from sklearn.metrics import ConfusionMatrixDisplay,confusion_matrix
cm = confusion_matrix(gt,preds,normalize='all')
ConfusionMatrixDisplay(cm).plot()

In [None]:
len(gt)

In [None]:
from sklearn.metrics import precision_score, recall_score, accuracy_score
print('Precision: ' , precision_score(gt,preds,average='macro'))
print('Recall: ' , recall_score(gt,preds,average='macro'))
print('Accuracy: ' , accuracy_score(gt,preds))

In [None]:
print('Precision: ' , precision_score(gt,preds,average='micro'))
print('Recall: ' , recall_score(gt,preds,average='micro'))
print('Accuracy: ' , accuracy_score(gt,preds))