In [1]:
import os
from typing import Tuple

import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import os
import argparse

import pandas as pd
import pickle
from tqdm import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
print(torch.cuda.is_available())

True


In [3]:
device="cuda:0"
model = AutoModelForCausalLM.from_pretrained('../../models/mistral_7b_instruct/').to(device)
model.eval()
tokenizer = AutoTokenizer.from_pretrained('../../models/mistral_7b_instruct/')

Loading checkpoint shards: 100%|██████████| 3/3 [00:30<00:00, 10.15s/it]


In [4]:
def generate(tokenizer, model, prompt: str) -> Tuple[str, str]:
    input_ids = tokenizer.encode(prompt, return_tensors="pt").to(model.device)

    with torch.no_grad():
        outputs = model(input_ids)
        logits = outputs.logits
        last_token_logits = logits[0, -1, :]  # Logits for the last token
        masked_logits = last_token_logits.clone()
        top_unconst_token_id = torch.argmax(last_token_logits).item()
        top_unconst_token = tokenizer.decode([top_unconst_token_id])

    return top_unconst_token, top_unconst_token_id

## Prompting Cities

In [5]:
dataset_path = "../datasets/raw/cities_10k/main/main.csv"
df_all = pd.read_csv(dataset_path)

print(len(df_all))
df_all.head()

9984


Unnamed: 0,question,answer,filtered_answer,correct_answer,is_correct
0,In which country is the city of Kostomloty Pie...,-,-,Poland,-
1,In which country is the city of Bognankro loca...,-,-,Côte d'Ivoire,-
2,In which country is the city of Le Raincy loca...,-,-,France,-
3,In which country is the city of Tobol located?,-,-,Kazakhstan,-
4,In which country is the city of Tayabas Ibaba ...,-,-,Philippines,-


In [11]:
torch.cuda.ipc_collect()
torch.cuda.empty_cache()

In [7]:
for index, row in tqdm(df_all[:5].iterrows()):
    #question = "I am going to ask you about your confidence to answer a question. Please respond with only a percentage and end with <\s>. The confidence indicates how likely you think your answer will be true. How confident are you that you can answer correctly 'What is the last digit of pi'? Answer: 10%<\s>. How confident are you that you can answer correctly 'Will a fair coin land on heads or tails'? Answer: 50%<\s>. How confident are you that you can answer correctly 'What is 1+1?'? Answer: 90%<\s>. How confident are you that you can answer correctly '" + row["question"] + "'? Answer: "
    question = "I am going to ask you about your confidence to answer a question. The confidence indicates how likely you think your answer will be true. Please respond with only a percentage and end with <\s>, so your answer should be 'Answer: (percentage)%<\s>'. How confident are you that you can answer correctly '" + row["question"] + "'? Answer: "
    answer = ''
    for i in range(10):
        token_decoded, token_id = generate(tokenizer, model, question)
        if "<\s>" in answer:
            break

        question = question + token_decoded
        answer = answer + token_decoded
    print(question)
    print(answer)
            

1it [00:00,  1.21it/s]

I am going to ask you about your confidence to answer a question. The confidence indicates how likely you think your answer will be true. Please respond with only a percentage and end with <\s>, so your answer should be 'Answer: (percentage)%<\s>'. How confident are you that you can answer correctly 'In which country is the city of Kostomloty Pierwsze located?'? Answer: 0%<\s>
0%<\s>


2it [00:01,  1.21it/s]

I am going to ask you about your confidence to answer a question. The confidence indicates how likely you think your answer will be true. Please respond with only a percentage and end with <\s>, so your answer should be 'Answer: (percentage)%<\s>'. How confident are you that you can answer correctly 'In which country is the city of Bognankro located?'? Answer: 0%<\s>
0%<\s>


3it [00:02,  1.13it/s]

I am going to ask you about your confidence to answer a question. The confidence indicates how likely you think your answer will be true. Please respond with only a percentage and end with <\s>, so your answer should be 'Answer: (percentage)%<\s>'. How confident are you that you can answer correctly 'In which country is the city of Le Raincy located?'? Answer: 95%<\s>
95%<\s>


4it [00:03,  1.10it/s]

I am going to ask you about your confidence to answer a question. The confidence indicates how likely you think your answer will be true. Please respond with only a percentage and end with <\s>, so your answer should be 'Answer: (percentage)%<\s>'. How confident are you that you can answer correctly 'In which country is the city of Tobol located?'? Answer: 95%<\s>
95%<\s>


5it [00:04,  1.11it/s]

I am going to ask you about your confidence to answer a question. The confidence indicates how likely you think your answer will be true. Please respond with only a percentage and end with <\s>, so your answer should be 'Answer: (percentage)%<\s>'. How confident are you that you can answer correctly 'In which country is the city of Tayabas Ibaba located?'? Answer: 95%<\s>
95%<\s>





In [None]:
df.to_csv("../../datasets/cities_expansions/cities_questions_augmented_2_labeled.csv")

## Prompting birth years

In [5]:
dataset_path = "../datasets/custom/birth_years.csv"
df_all = pd.read_csv(dataset_path)

#with open('../../datasets/cities_expansions/countries_dictionary.pkl', 'rb') as f:
#    country_names = pickle.load(f)

print(len(df_all))
df_all.head()

4430


Unnamed: 0,question,answer,filtered_answer,correct_answer,is_correct
0,What year was Hank Aaron born?,-,-,1934,-
1,What year was Sani Abacha born?,-,-,1943,-
2,What year was Claudio Abbado born?,-,-,1933,-
3,What year was Mahmoud Abbas born?,-,-,1935,-
4,What year was Omar Abdel Rahman born?,-,-,1938,-


In [49]:
question1 = "I am going to ask you what year a person was born. End your sentence with <|eot_id|>. Here are some examples of questions that might help you. What year was Barack Obama born? 1961.<|eot_id|> What year was Vladimir Putin born? 1952.<|eot_id|> What year was Xi Jinping born? 1953.<|eot_id|> What year was Hank Aaron born? "
question2 = "I am going to ask you what year a person was born. Answer this question with a year followed by a dot and <|eot_id|>, like this: 'question'? 2024.<|eot_id|>. What year was Hank Aaron born?"
answer = ''

for i in range(1000):
    token_decoded, token_id = generate(tokenizer, model, question)
    print(token_id, token_decoded)

    question = question + token_decoded
    answer = answer + token_decoded

    if "<|eot_id|>" in answer:
        break
print(answer)

128009 <|eot_id|>
<|eot_id|>


In [41]:
df = df_all[1000:1300]

In [58]:
answer_list, questions = [], []
for index, row in tqdm(df.iterrows(), total=df.shape[0]):
    question = "I am going to ask you what year a person was born. Answer this question with a year followed by a dot and <|eot_id|>, like this: 'question'? 2024.<|eot_id|>. " + row['question']
    answer = ""
    
    for i in range(1000):
        token_decoded, token_id = generate(tokenizer, model, question)

        question = question + token_decoded
        answer = answer + token_decoded

        if "<|eot_id|>" in answer:
            break
        
    answer_list.append(answer.strip())
    questions.append(question)

    
df['answer'] =  answer_list
df['filtered_answer'] = df['answer'].str.slice(stop=-11)
df['is_correct'] = df.apply(lambda row : row['filtered_answer'] == str(row['correct_answer']), axis=1) #df['is_correct'] = (df['filtered_answer'] == df['correct_country'])
print('Accuracy: ' + str(df['is_correct'].sum()/len(df)))

100%|██████████| 300/300 [00:18<00:00, 16.62it/s]

Accuracy: 0.0



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['answer'] =  answer_list
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['filtered_answer'] = df['answer'].str.slice(stop=-11)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['is_correct'] = df.apply(lambda row : row['filtered_answer'] == str(row['correct_answer']), axis=1) #df['is_correct'

In [56]:
val = 0
for index, row in tqdm(df.iterrows(), total=df.shape[0]):
    if len(row['answer']) == 15:
        val += 1
print(val/300)

100%|██████████| 300/300 [00:00<00:00, 20029.15it/s]

0.9366666666666666





In [59]:
df

Unnamed: 0,question,answer,filtered_answer,correct_answer,is_correct
1000,What year was James Dimon born?,<|eot_id|>,,1956,False
1001,What year was John D Dingell born?,<|eot_id|>,,1926,False
1002,What year was David N Dinkins born?,<|eot_id|>,,1927,False
1003,What year was Joseph J DioGuardi born?,<|eot_id|>,,1940,False
1004,What year was Celine Dion born?,<|eot_id|>,,1968,False
...,...,...,...,...,...
1295,What year was Jonathan Safran Foer born?,<|eot_id|>,,1977,False
1296,What year was Glenn Foley born?,<|eot_id|>,,1970,False
1297,What year was Mark A Foley born?,<|eot_id|>,,1954,False
1298,What year was Jane Fonda born?,<|eot_id|>,,1937,False


## Prompting football

In [20]:
dataset_path = "../datasets/custom/football_leagues.csv"
df_all = pd.read_csv(dataset_path)

with open('../datasets/custom/football_leagues_dictionary.pkl', 'rb') as f:
    team_name_mappings = pickle.load(f)

print(len(df_all))
df_all.head()

1078


Unnamed: 0,question,answer,filtered_answer,correct_answer,is_correct
0,Which team finished in position 1 in the 2010-...,-,-,Manchester Utd,-
1,Which team finished in position 2 in the 2010-...,-,-,Chelsea,-
2,Which team finished in position 3 in the 2010-...,-,-,Manchester City,-
3,Which team finished in position 4 in the 2010-...,-,-,Arsenal,-
4,Which team finished in position 5 in the 2010-...,-,-,Tottenham,-


In [None]:
question1 = "I am going to ask you a question about football. End your sentence with <|eot_id|>. Here are some examples of questions that might help you. Which team finished in position 10 in the 2010-2011 Premier League? Sunderland.<|eot_id|> Which team finished in position 19 in the 2020-2021 La Liga? Valladolid.<|eot_id|> Which team finished in position 13 in the 2019-2020 Serie A? Udinese.<|eot_id|> "
question2 = "I am going to ask you what year a person was born. Answer this question with a year followed by a dot and <|eot_id|>, like this: 'question'? 2024.<|eot_id|>. What year was Hank Aaron born?"
answer = ''

for i in range(1000):
    token_decoded, token_id = generate(tokenizer, model, question)
    print(token_id, token_decoded)

    question = question + token_decoded
    answer = answer + token_decoded

    if "<|eot_id|>" in answer:
        break
print(answer)

In [13]:
df = df_all

In [14]:
answer_list, questions = [], []
for index, row in tqdm(df.iterrows(), total=df.shape[0]):
    question = "I am going to ask you a question about football. End your sentence with <|eot_id|>. Here are some examples of questions that might help you. Which team finished in position 10 in the 2010-2011 Premier League? Sunderland.<|eot_id|> Which team finished in position 19 in the 2020-2021 La Liga? Valladolid.<|eot_id|> Which team finished in position 13 in the 2019-2020 Serie A? Udinese.<|eot_id|> " + row['question']
    answer = ""
    
    for i in range(1000):
        token_decoded, token_id = generate(tokenizer, model, question)

        question = question + token_decoded
        answer = answer + token_decoded

        if "<|eot_id|>" in answer:
            break
        
    answer_list.append(answer.strip())
    questions.append(question)

    
df['answer'] =  answer_list
df['filtered_answer'] = df['answer'].str.slice(stop=-11)
df['is_correct'] = df.apply(lambda row : row['filtered_answer'] in team_name_mappings[row['correct_answer']], axis=1) #df['is_correct'] = (df['filtered_answer'] == df['correct_country'])
print('Accuracy: ' + str(df['is_correct'].sum()/len(df)))

100%|██████████| 1078/1078 [08:55<00:00,  2.01it/s]

Accuracy: 0.12337662337662338





In [24]:
df['is_correct'] = df.apply(lambda row : row['filtered_answer'] in team_name_mappings[row['correct_answer']], axis=1) #df['is_correct'] = (df['filtered_answer'] == df['correct_country'])
print('Accuracy: ' + str(df['is_correct'].sum()/len(df)))

Accuracy: 0.16233766233766234


In [26]:
df.to_csv("../datasets/custom/football_leagues_labeled.csv", index=False)

## Prompting medals

In [28]:
dataset_path = "../datasets/custom/medals.csv"
df_all = pd.read_csv(dataset_path)

#with open('../datasets/custom/football_leagues_dictionary.pkl', 'rb') as f:
#    team_name_mappings = pickle.load(f)

print(len(df_all))
df_all.head()

11459


Unnamed: 0,question,answer,filtered_answer,correct_answer,is_correct
0,Which country won gold in Tug-Of-War Men's Tug...,-,-,Denmark/Sweden,-
1,Which country won gold in Gymnastics Men's Tea...,-,-,Finland,-
2,Which country won gold in Gymnastics Men's Hor...,-,-,Finland,-
3,Which country won gold in Gymnastics Men's Pom...,-,-,Finland,-
4,Which country won gold in Handball Women's Han...,-,-,Norway,-


In [None]:
df = df_all[]

 53%|█████▎    | 6036/11459 [00:00<00:00, 31312.83it/s]

Which country won gold in Tug-Of-War Men's Tug-Of-War in the 1900 Summer Olympics? Denmark/Sweden
Which country won gold in Gymnastics Men's Team All-Around in the 1948 Summer Olympics? Finland
Which country won gold in Gymnastics Men's Horse Vault in the 1948 Summer Olympics? Finland
Which country won gold in Gymnastics Men's Pommelled Horse in the 1948 Summer Olympics? Finland
Which country won gold in Handball Women's Handball in the 2008 Summer Olympics? Norway
Which country won gold in Sailing Mixed 8 metres in the 1912 Summer Olympics? Taifun
Which country won gold in Cycling Men's Road Race, Team in the 1956 Summer Olympics? France
Which country won gold in Canoeing Men's Kayak Fours, 1,000 metres in the 2008 Summer Olympics? Belarus
Which country won gold in Handball Men's Handball in the 2008 Summer Olympics? France
Which country won gold in Handball Men's Handball in the 2012 Summer Olympics? France
Which country won gold in Football Men's Football in the 2000 Summer Olympics

100%|██████████| 11459/11459 [00:00<00:00, 32173.73it/s]

Spain
Which country won gold in Taekwondo Men's Flyweight in the 2008 Summer Olympics? Mexico
Which country won gold in Athletics Men's 20 kilometres Walk in the 1996 Summer Olympics? Ecuador
Which country won gold in Baseball Men's Baseball in the 1992 Summer Olympics? Cuba
Which country won gold in Boxing Men's Flyweight in the 1948 Summer Olympics? Argentina
Which country won gold in Cycling Men's Madison in the 2008 Summer Olympics? Argentina
Which country won gold in Hockey Women's Hockey in the 1996 Summer Olympics? Australia
Which country won gold in Water Polo Men's Water Polo in the 1968 Summer Olympics? Yugoslavia
Which country won gold in Swimming Men's 1,500 metres Freestyle in the 1992 Summer Olympics? Australia
Which country won gold in Swimming Men's 1,500 metres Freestyle in the 1996 Summer Olympics? Australia
Which country won gold in Basketball Men's Basketball in the 1984 Summer Olympics? United States
Which country won gold in Handball Men's Handball in the 1996 Sum




In [None]:
answer_list, questions = [], []
for index, row in tqdm(df.iterrows(), total=df.shape[0]):
    question = "I am going to ask you a question about the olympics. End your sentence with <|eot_id|>. Here are some examples of questions that might help you. Which country won gold in Gymnastics Women's Team All-Around in the 1928 Summer Olympics? Netherlands.<|eot_id|> Which country won gold in Hockey Women's Hockey in the 2004 Summer Olympics? Germany.<|eot_id|> Which country won gold in Fencing Men's Sabre, Individual in the 1964 Summer Olympics? Hungary.<|eot_id|> " + row['question']
    answer = ""
    
    for i in range(1000):
        token_decoded, token_id = generate(tokenizer, model, question)

        question = question + token_decoded
        answer = answer + token_decoded

        if "<|eot_id|>" in answer:
            break
        
    answer_list.append(answer.strip())
    questions.append(question)

    
df['answer'] =  answer_list
df['filtered_answer'] = df['answer'].str.slice(stop=-11)
df['is_correct'] = df.apply(lambda row : row['filtered_answer'] in team_name_mappings[row['correct_answer']], axis=1) #df['is_correct'] = (df['filtered_answer'] == df['correct_country'])
print('Accuracy: ' + str(df['is_correct'].sum()/len(df)))