In [1]:
test_set_path = "../dataset/test.csv"

In [2]:
import pandas as pd

if test_set_path.endswith(".parquet"):
    test_df = pd.read_parquet(test_set_path)
else:
    test_df = pd.read_csv(test_set_path, encoding="ISO-8859-1")

In [3]:
test_df = test_df.rename(columns={"text": "texts", "sentiment": "category"})

In [4]:
test_df

Unnamed: 0,textID,texts,category,Time of Tweet,Age of User,Country,Population -2020,Land Area (Kmï¿½),Density (P/Kmï¿½)
0,f87dea47db,Last session of the day http://twitpic.com/67ezh,neutral,morning,0-20,Afghanistan,38928346,652860.0,60
1,96d74cb729,Shanghai is also really exciting (precisely -...,positive,noon,21-30,Albania,2877797,27400.0,105
2,eee518ae67,"Recession hit Veronique Branquinho, she has to...",negative,night,31-45,Algeria,43851044,2381740.0,18
3,01082688c6,happy bday!,positive,morning,46-60,Andorra,77265,470.0,164
4,33987a8ee5,http://twitpic.com/4w75p - I like it!!,positive,noon,60-70,Angola,32866272,1246700.0,26
...,...,...,...,...,...,...,...,...,...
3529,e5f0e6ef4b,"its at 3 am, im very tired but i can`t sleep ...",negative,noon,21-30,Nicaragua,6624554,120340.0,55
3530,416863ce47,All alone in this old house again. Thanks for...,positive,night,31-45,Niger,24206644,1266700.0,19
3531,6332da480c,I know what you mean. My little dog is sinkin...,negative,morning,46-60,Nigeria,206139589,910770.0,226
3532,df1baec676,_sutra what is your next youtube video gonna b...,positive,noon,60-70,North Korea,25778816,120410.0,214


In [5]:
!pip install regex ratelimit

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [6]:
from prompt.prompt import get_sentiment_classifier_prompt
from llm_service.llm_caller import LLMCaller
from config import LLMConfig, load_yaml_config
from utils.load_label_maps import load_label_maps
#from utils.evaluate import get_accuracy
from utils.preprocess import *

import time
from tqdm import tqdm

In [7]:
def preprocess_pipelines(text: str) -> str:
    text = remove_url(text)
    #text = remove_special_char(text)
    return text

In [8]:
LABEL_MAPS = load_label_maps("../dataset/label_maps.yaml")

In [9]:
load_yaml_config("../config/gemini_config.yaml")

In [10]:
llm_config = LLMConfig()
llm_service = LLMCaller(llm_config)

  from .autonotebook import tqdm as notebook_tqdm


In [11]:
llm_config.MODEL

'gemini-1.0-pro'

In [None]:
import os

total_row = len(test_df)
ans_df = pd.DataFrame(columns=["texts", "category", "process_time"])

RESULTS_DIR = "../results/english_dataset"
results_path = os.path.join(RESULTS_DIR, f"{llm_service.llm_config.MODEL}_preprocess_results.csv")

if not os.path.exists(RESULTS_DIR):
    os.makedirs(RESULTS_DIR)

for row in tqdm(range(0, len(test_df))):
    sentence = test_df.loc[row, "texts"]
    preprocessed_sentence = preprocess_pipelines(sentence)
    sentiment_classifier_prompt = get_sentiment_classifier_prompt(message=preprocessed_sentence)
    start_process = time.time()
    ans = llm_service.call(sentiment_classifier_prompt)
    process_time = time.time() - start_process
    try:
        category = LABEL_MAPS[ans]
    except KeyError:
        category = ans

    ans_df.loc[row, "texts"] = sentence
    ans_df.loc[row, "category"] = category
    ans_df.loc[row, "process_time"] = process_time

    ans_df.to_csv(results_path, index=False)

I0000 00:00:1723392216.357332    7469 config.cc:230] gRPC experiments enabled: call_status_override_on_cancellation, event_engine_dns, event_engine_listener, http2_stats_fix, monitoring_experiment, pick_first_new, trace_record_callops, work_serializer_clears_time_cache
I0000 00:00:1723392216.364907    7469 check_gcp_environment.cc:61] BIOS data file does not exist or cannot be opened.
  9%|█████████▉                                                                                                          | 301/3534 [05:10<51:05,  1.05it/s]

In [None]:
ans_df = pd.read_csv(results_path)

In [None]:
post_process_ans_df = pd.DataFrame(columns=["texts", "category", "process_time"])
for row in range(len(ans_df)):
    category = ans_df.loc[row, "category"]
    if isinstance(category, str):
        if "neutral" in category:
            category = LABEL_MAPS["neutral"]
        elif "positive" in category:
            category = LABEL_MAPS["positive"]
        elif "negative" in category:
            category = LABEL_MAPS["negative"]
        # elif "question" in category:
        #     category = LABEL_MAPS["neutral"]

    post_process_ans_df.loc[row, "texts"] = ans_df.loc[row, "texts"]
    post_process_ans_df.loc[row, "category"] = int(category)
    post_process_ans_df.loc[row, "process_time"] = ans_df.loc[row, "process_time"]

In [None]:
def get_accuracy(test_df: pd.DataFrame, ans_df: pd.DataFrame) -> float:
    correct = 0

    if len(test_df) != len(ans_df):
        raise ValueError("2 dataframes not same length.")

    total_row = len(test_df)
    for row in range(total_row):
        actual = LABEL_MAPS[test_df.loc[row, "category"]]
        predict = ans_df.loc[row, "category"]
        if actual == predict:
            correct += 1
    return correct / total_row

In [None]:
acc = get_accuracy(test_df, post_process_ans_df)

In [None]:
acc

In [None]:
for row in range(len(post_process_ans_df)):
    pred = post_process_ans_df.loc[row, "category"]
    actual = test_df.loc[row, "category"]
    text = post_process_ans_df.loc[row, "texts"]
    if pred != actual:
        print(f"{text}, actual: {actual}, pred: {pred}")