In [5]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '2'

import torch
from tqdm import tqdm
from transformers import AutoTokenizer, AutoModelForCausalLM
from tqdm import tqdm
import numpy as np

for i in range(torch.cuda.device_count()):
   print(torch.cuda.get_device_properties(i).name)
cuda = torch.device('cuda')
b1 = torch.tensor([1., 2.]).to(device=cuda)

NVIDIA RTX A6000


In [7]:
model_name_or_path = 'meta-llama/Llama-2-7b-chat-hf'

print('Loading model...')
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map="auto", cache_dir='/scratch/henrypapadatos')

print('Loading tokenizer...')
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, cache_dir='/scratch/henrypapadatos')

Loading model...


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading tokenizer...


In [24]:
from datasets import load_dataset
import pandas as pd
dataset = load_dataset("rotten_tomatoes")
#convert to dataframe
df_train = pd.DataFrame(dataset['train'])
df_test = pd.DataFrame(dataset['test'])
df_val = pd.DataFrame(dataset['validation'])
df_val['correct'] = np.nan
print(df_train.shape)
print(df_test.shape)
print(df_val.shape)

df_train.head()

(8530, 3)
(1066, 3)
(1066, 3)


Unnamed: 0,text,label,correct
0,the rock is destined to be the 21st century's ...,1,0.0
1,"the gorgeously elaborate continuation of "" the...",1,1.0
2,effective but too-tepid biopic,1,0.0
3,if you sometimes like to go to the movies to h...,1,0.0
4,"emerges as something rare , an issue movie tha...",1,1.0


In [13]:
template = '''[INST] Do you think that "{comment}” is positive or negative sentiment?

Choices:
(A) Positive
(B) Negative

[/INST] I believe the best answer is: ('''

comment = '''the rock is destined to be the 21st century's new " conan " and that he's going to make a splash even greater than arnold schwarzenegger , jean-claud van damme or steven segal .'''

questions = template.format(comment=comment)

In [21]:
df_train['correct'] = np.nan

for index, row in tqdm(df_train.iterrows()):
    prompt = template.format(comment=row['text'])
    # print(prompt)
    tokens = tokenizer.encode(prompt, return_tensors='pt').to(device=cuda)
    number_of_tokens = tokens.shape[1]

    answer = model.generate(tokens, max_length=number_of_tokens+1, pad_token_id=tokenizer.eos_token_id, do_sample=False)

    answer = tokenizer.decode(answer[0])[-1]

    # print(answer)

    if answer != 'A' and answer != 'B':
        print('errore for index: ', index)
        print('prompt', row['text'])

    if answer == 'A' and row['label'] == 1:
        df_train.loc[index, 'correct'] = 1
    
    elif answer == 'B' and row['label'] == 0:
        df_train.loc[index, 'correct'] = 1

    else:
        df_train.loc[index, 'correct'] = 0

8530it [15:42,  9.05it/s]


In [23]:
df_test['correct'] = np.nan

for index, row in tqdm(df_test.iterrows()):
    prompt = template.format(comment=row['text'])
    # print(prompt)
    tokens = tokenizer.encode(prompt, return_tensors='pt').to(device=cuda)
    number_of_tokens = tokens.shape[1]

    answer = model.generate(tokens, max_length=number_of_tokens+1, pad_token_id=tokenizer.eos_token_id, do_sample=False)

    answer = tokenizer.decode(answer[0])[-1]

    # print(answer)

    if answer != 'A' and answer != 'B':
        print('errore for index: ', index)
        print('prompt', row['text'])

    if answer == 'A' and row['label'] == 1:
        df_test.loc[index, 'correct'] = 1
    
    elif answer == 'B' and row['label'] == 0:
        df_test.loc[index, 'correct'] = 1

    else:
        df_test.loc[index, 'correct'] = 0

1066it [01:55,  9.19it/s]


In [25]:
df_val['correct'] = np.nan

for index, row in tqdm(df_val.iterrows()):
    prompt = template.format(comment=row['text'])
    # print(prompt)
    tokens = tokenizer.encode(prompt, return_tensors='pt').to(device=cuda)
    number_of_tokens = tokens.shape[1]

    answer = model.generate(tokens, max_length=number_of_tokens+1, pad_token_id=tokenizer.eos_token_id, do_sample=False)

    answer = tokenizer.decode(answer[0])[-1]

    # print(answer)

    if answer != 'A' and answer != 'B':
        print('errore for index: ', index)
        print('prompt', row['text'])

    if answer == 'A' and row['label'] == 1:
        df_val.loc[index, 'correct'] = 1
    
    elif answer == 'B' and row['label'] == 0:
        df_val.loc[index, 'correct'] = 1

    else:
        df_val.loc[index, 'correct'] = 0

1066it [01:56,  9.14it/s]


In [26]:
#print the percentage of np.nan in the correct column
print("percentage of np.nan in the correct column")
print(df_train['correct'].isna().sum() / len(df_train))
print(df_test['correct'].isna().sum() / len(df_test))
print(df_val['correct'].isna().sum() / len(df_val))

#print the percentage of correct answers
print("percentage of correct answers")
print((df_train['correct'] == 1).sum() / len(df_train))
print((df_test['correct'] == 1).sum() / len(df_test))
print((df_val['correct'] == 1).sum() / len(df_val))

#print the percentage of correct == 0 answers
print("percentage of uncorrect answers")
print((df_train['correct'] == 0).sum() / len(df_train))
print((df_test['correct'] == 0).sum() / len(df_test))
print((df_val['correct'] == 0).sum() / len(df_val))

percentage of np.nan in the correct column
0.0
0.0
0.0
percentage of correct answers
0.6912075029308323
0.6885553470919324
0.6913696060037523
percentage of uncorrect answers
0.30879249706916767
0.31144465290806755
0.30863039399624764


In [27]:
#save to csv
df_train.to_csv('datasets/rotten_tomato_llama_2_7b_train.csv')
df_test.to_csv('datasets/rotten_tomato_llama_2_7b_test.csv')
df_val.to_csv('datasets/rotten_tomato_llama_2_7b_val.csv')