In [1]:
import json
import pandas as pd

# Reading JSON data from a file
with open("/kaggle/input/copa-test/copa-test.json", "r") as file:
    data = json.load(file)
    
df = pd.DataFrame(data) 

In [2]:
NUM_ROW = 2

In [3]:
df

Unnamed: 0,id,asks-for,most-plausible-alternative,p,a1,a2
0,501,cause,1,The item was packaged in bubble wrap.,It was fragile.,It was small.
1,502,effect,1,I emptied my pockets.,I retrieved a ticket stub.,I found a weapon.
2,503,effect,2,Termites invaded the house.,The termites disappeared from the house.,The termites ate through the wood in the house.
3,504,effect,1,The travelers reached the border.,The patrol agent checked their passports.,The patrol agent accused them of smuggling.
4,505,cause,1,The office was closed.,It was a holiday.,It was summer.
...,...,...,...,...,...,...
495,996,effect,2,The runner sensed his competitor gaining on him.,He dropped out of the race.,He sped up his pace.
496,997,effect,2,I thought carefully about the problem.,I asked for advice.,I came up with a solution.
497,998,effect,1,The traveler walked on the shaky suspension br...,He felt terrified.,He felt ecstatic.
498,999,effect,2,The man anticipated the team's victory.,He met his friends to watch the game.,He made a bet with his friends.


In [4]:
# Setting 
SUFFIX = "You are a highly intelligent question-answering bot with profound knowledge of causal inference."
COT_PROMPT ='''Begin your response with reasoning or evidence to suport your explanation, then return me the final result marked by '####'.'''
DIRECT_IO_PROMPT = '''Give me the anwser directly'''

In [5]:
def get_prompt(file_path: str): 
    with open(file_path, 'r') as file: 
        prompt = file.read()

    return prompt 

In [6]:
def data_preprocessing(df: pd.DataFrame, flag_direct_io: bool = False, flag_few_shot: bool = False): 
    suffix = SUFFIX    
    base_prompt = '''
     Premise: {premise}
     Question: {question}
     A. {option_a}
     B. {option_b}
     '''

    if flag_direct_io:
        base_prompt += f"{DIRECT_IO_PROMPT} Answer: A or Answer: B. Do not use any other format."
    if not flag_direct_io:
        base_prompt += f"{COT_PROMPT} The answer format is #### A or #### B."
    
    if flag_few_shot:
        if flag_direct_io:
            examples = df.sample(2).apply(lambda row: base_prompt.format(
                premise=row['p'],
                question=f"What was the {row['asks-for']}?",
                option_a=row['a1'],
                option_b=row['a2']
            ) + f"\nAnswer: {chr(65 + row['most-plausible-alternative'] - 1)}", axis=1).str.cat(sep="\n\n")
        else:
            examples = get_prompt("/kaggle/input/prompt-copa/copa.txt")  # Assuming this function exists as in the original code
        base_prompt = f"\nHere are some examples:\n\n{examples}\n MY QUESTION:\n{base_prompt}"
    
    base_prompt = suffix + base_prompt
    
    df['prompt'] = df.apply(lambda row: base_prompt.format(
        premise=row['p'],
        question=f"What was the {row['asks-for']}?",
        option_a=row['a1'],
        option_b=row['a2']
    ), axis=1)
    
    df['label'] = df['most-plausible-alternative'].map({1: 'A', 2: 'B'})
    
    return df[['prompt', 'label']]

In [7]:
# few shot + cot
fewshot_cot_df = data_preprocessing(df, flag_direct_io = False, flag_few_shot = True)
# zero shot + cot 
zeroshot_cot_df =  data_preprocessing(df, flag_direct_io = False, flag_few_shot = False)
# few shot + direct 
fewshot_dir_df = data_preprocessing(df, flag_direct_io= True, flag_few_shot=True)
# zero shot + direct 
zeroshot_dir_df = data_preprocessing(df, flag_direct_io=True, flag_few_shot=False)

In [8]:
fewshot_cot_df = fewshot_cot_df[:NUM_ROW]
zeroshot_cot_df = zeroshot_cot_df[:NUM_ROW]
fewshot_dir_df = fewshot_dir_df[:NUM_ROW]
zeroshot_dir_df = zeroshot_dir_df[:NUM_ROW]

In [9]:
fewshot_cot_df.iloc[0]

prompt    You are a highly intelligent question-answerin...
label                                                     A
Name: 0, dtype: object

In [10]:
len(fewshot_cot_df)

2

In [11]:
test = fewshot_cot_df.iloc[0]['prompt']

In [12]:
test

'You are a highly intelligent question-answering bot with profound knowledge of causal inference.\nHere are some examples:\n\n"You are a highly intelligent question-answering bot with profound knowledge of causal inference.\\n Premise: The man\'s voice sounded hoarse.\\n Question: What was the cause?\\n A. He had a cold.\\n B. He quit smoking.\\n Begin your response with reasoning or evidence to suport your explanation, then return me the final result marked by \'####\'. The answer format is #### A or #### B."\nA hoarse voice is often a symptom of a cold, as it can result from inflammation of the vocal cords caused by infection. Quitting smoking, while beneficial for overall health, generally leads to improved vocal quality over time rather than immediate hoarseness.\n\n#### A\n\n"You are a highly intelligent question-answering bot with profound knowledge of causal inference.\\n Premise: I started a fire in the fireplace.\\n Question: What was the cause?\\n A. I was out of firewood.\\n

# Evaluation using Ollama

In [13]:
# download ollama 

!curl https://ollama.ai/install.sh | sh
!sudo apt install -y neofetch

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 13269    0 13269    0     0  62913      0 --:--:-- --:--:-- --:--:-- 63185
>>> Installing ollama to /usr/local
>>> Downloading Linux amd64 bundle
############################################################################################# 100.0%
>>> Creating ollama user...
>>> Adding ollama user to video group...
>>> Adding current user to ollama group...
>>> Creating ollama systemd service...
>>> The Ollama API is now available at 127.0.0.1:11434.
>>> Install complete. Run "ollama" from the command line.
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following additional packages will be installed:
  caca-utils chafa imagemagick imagemagick-6.q16 jp2a libchafa0 libid3tag0
  libimlib2 libnetpbm10 libpci3 libsixel-bin libsixel1 netpbm pci.ids pciutils
  toilet toilet-fonts 

In [14]:
!neofetch

[?25l[?7l[0m[31m[1m            .-/+oossssoo+/-.
        `:+ssssssssssssssssss+:`
      -+ssssssssssssssssssyyssss+-
    .ossssssssssssssssss[37m[0m[1mdMMMNy[0m[31m[1msssso.
   /sssssssssss[37m[0m[1mhdmmNNmmyNMMMMh[0m[31m[1mssssss/
  +sssssssss[37m[0m[1mhm[0m[31m[1myd[37m[0m[1mMMMMMMMNddddy[0m[31m[1mssssssss+
 /ssssssss[37m[0m[1mhNMMM[0m[31m[1myh[37m[0m[1mhyyyyhmNMMMNh[0m[31m[1mssssssss/
.ssssssss[37m[0m[1mdMMMNh[0m[31m[1mssssssssss[37m[0m[1mhNMMMd[0m[31m[1mssssssss.
+ssss[37m[0m[1mhhhyNMMNy[0m[31m[1mssssssssssss[37m[0m[1myNMMMy[0m[31m[1msssssss+
oss[37m[0m[1myNMMMNyMMh[0m[31m[1mssssssssssssss[37m[0m[1mhmmmh[0m[31m[1mssssssso
oss[37m[0m[1myNMMMNyMMh[0m[31m[1msssssssssssssshmmmh[0m[31m[1mssssssso
+ssss[37m[0m[1mhhhyNMMNy[0m[31m[1mssssssssssss[37m[0m[1myNMMMy[0m[31m[1msssssss+
.ssssssss[37m[0m[1mdMMMNh[0m[31m[1mssssssssss[37m[0m[1mhNMMMd[0m[31m[1mssssssss.
 /ssssssss[37m[0m[1mh

In [15]:
# state up ollama 

import subprocess
import time

command = "nohup ollama serve&"
process = subprocess.Popen(command,
                            shell=True,
                           stdout=subprocess.PIPE,
                           stderr=subprocess.PIPE)
print("Process ID:", process.pid)
time.sleep(5) 

Process ID: 532


In [16]:
!ollama -v

ollama version is 0.5.4


In [17]:
# SETUP MODEL 
LLAMA3_MODEL='llama3'
QWEN2_MODEL='qwen2:7b'
GEMMA_MODEL="gemma:7b"
MISTRAL_MODEL = "mistral"

## Testing with LLAMA3 model

In [18]:
!pip install -q llama-index==0.9.21

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m15.7/15.7 MB[0m [31m84.3 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.5/1.5 MB[0m [31m54.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m454.3/454.3 kB[0m [31m22.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m73.5/73.5 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.6/78.6 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m345.0/345.0 kB[0m [31m15.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.3/58.3 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of

In [19]:
!ollama run $LLAMA3_MODEL "Say Hello World!."

[?25lpulling manifest ⠙ [?25h[?25l[2K[1Gpulling manifest ⠙ [?25h[?25l[2K[1Gpulling manifest ⠸ [?25h[?25l[2K[1Gpulling manifest ⠼ [?25h[?25l[2K[1Gpulling manifest ⠼ [?25h[?25l[2K[1Gpulling manifest ⠴ [?25h[?25l[2K[1Gpulling manifest ⠦ [?25h[?25l[2K[1Gpulling manifest ⠇ [?25h[?25l[2K[1Gpulling manifest 
pulling 6a0746a1ec1a...   0% ▕                ▏    0 B/4.7 GB                  [?25h[?25l[2K[1G[A[2K[1Gpulling manifest 
pulling 6a0746a1ec1a...   0% ▕                ▏    0 B/4.7 GB                  [?25h[?25l[2K[1G[A[2K[1Gpulling manifest 
pulling 6a0746a1ec1a...   0% ▕                ▏    0 B/4.7 GB                  [?25h[?25l[2K[1G[A[2K[1Gpulling manifest 
pulling 6a0746a1ec1a...   0% ▕                ▏ 3.1 MB/4.7 GB                  [?25h[?25l[2K[1G[A[2K[1Gpulling manifest 
pulling 6a0746a1ec1a...   1% ▕                ▏  64 MB/4.7 GB                  [?25h[?25l[2K[1G[A[2K[1Gpulling manifest 
pulling 6a0746a1ec1a... 

In [20]:
from llama_index.llms import Ollama

llm = Ollama(model=LLAMA3_MODEL)
# response = llm.complete(test)
# print(response)

In [21]:
from typing import List
import re

def parse_answer(response: str, options: List[str]) -> str:
    if "####" in response:
        response = "".join(response.split("####")[1:])
    if "Answer:" in response:
        response = "".join(response.split("Answer:")[1:])
    if "answer:" in response:
        response = "".join(response.split("answer:")[1:])
    
    for option in options[::-1]:
        pattern = rf'(?<![a-zA-Z])[\n\s]*{re.escape(option)}[.\)]?[\n\s]*(?![a-zA-Z])'
        match = re.search(pattern, response, re.IGNORECASE)
        if match:
            return option
    return ""


In [22]:
def get_result(llm, prompt): 
    response = str(llm.complete(prompt))

    return parse_answer(response, ["A", "B"])

fewshot_cot_df = fewshot_cot_df[:50]
zeroshot_cot_df = zeroshot_cot_df[:50]
fewshot_dir_df = fewshot_dir_df[:50]
zeroshot_dir_df = zeroshot_dir_df[:50]

In [23]:
fewshot_cot_df['Model_predict'] = fewshot_cot_df['prompt'].apply(lambda x: get_result(llm, x))
zeroshot_cot_df['Model_predict'] = zeroshot_cot_df['prompt'].apply(lambda x: get_result(llm, x))
fewshot_dir_df['Model_predict'] = fewshot_dir_df['prompt'].apply(lambda x: get_result(llm, x))
zeroshot_dir_df['Model_predict'] = zeroshot_dir_df['prompt'].apply(lambda x: get_result(llm, x))


In [30]:
fewshot_cot_df

Unnamed: 0,prompt,label,Model_predict
0,You are a highly intelligent question-answerin...,A,A
1,You are a highly intelligent question-answerin...,A,A


# Metrics 

In [31]:
from sklearn.metrics import accuracy_score, f1_score
from sklearn.preprocessing import LabelEncoder

def calculate_f1_acc_metrics(df: pd.DataFrame):
    df = df.dropna(subset=["Model_predict"])
    label_encoder = LabelEncoder()
    df['label'] = label_encoder.fit_transform(df['label'])
    df['Model_predict'] = label_encoder.transform(df['Model_predict'])
    
    # Tính accuracy
    accuracy = accuracy_score(df['label'], df['Model_predict'])
    print(f'Accuracy: {accuracy}')

    f1 = f1_score(df['label'], df['Model_predict'])
    print(f'F1 Score: {f1}')

    return {
        "Acc": accuracy, 
        "F1": f1
    }

In [32]:
calculate_f1_acc_metrics(fewshot_cot_df)

Accuracy: 1.0
F1 Score: 0.0


  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


{'Acc': 1.0, 'F1': 0.0}