In [1]:
%reload_ext autoreload
%autoreload 2
from src.utils import find_text_parts, split_text
from datasets import load_dataset
import pandas as pd
import numpy as np
import os
from evaluator.gpt_evaluator import GPT4Semantic, GPT4Accuracy

[nltk_data] Downloading package punkt to /home/ubuntu/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /home/ubuntu/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt_tab to /home/ubuntu/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


# Processing the data

In [153]:
window_size = 5
unit = "day"
output_dir = f"/home/ubuntu/multimodal/Data/climate-GPT4-Evaluation/input_copy/{window_size}{unit}"
filename = "processed.csv"
text_key_name = "weather_forecast"
num_key_name = "temp"
num_pattern = fr"{unit}_\d+_{num_key_name}: '([\d.]+)'"
text_pattern =fr'({unit}_\d+_date:\s*\S+\s+{unit}_\d+_{text_key_name}:.*?)(?=\s{unit}_\d+_date|\Z)'
hf_dataset = f"Howard881010/climate-{window_size}{unit}-mixed"

data_all = load_dataset(hf_dataset)
data = pd.DataFrame(data_all['test'])

output_texts = data['output'].apply(lambda x: find_text_parts(x, num_pattern)).apply(lambda x: split_text(x, text_pattern)).to_list()
pred_texts = data['input'].apply(lambda x: find_text_parts(x, num_pattern)).apply(lambda x: split_text(x, text_pattern)).to_list()
for idx, pred_text in enumerate(pred_texts):
    if len(pred_text) > window_size:
        pred_texts[idx] = pred_text[:window_size]
    while len(pred_text) < window_size:
        pred_texts[idx].append(None)

output_texts = np.reshape(output_texts, -1)
pred_texts = np.reshape(pred_texts, -1)

results = pd.DataFrame({"output_text": output_texts, "pred_text": pred_texts})

os.makedirs(output_dir, exist_ok=True)
output_path = os.path.join(output_dir, filename)
results.to_csv(output_path, index=False)

# GPT4 semantic

In [154]:
gpt4semantic = GPT4Semantic()

results_dir = f"/home/ubuntu/multimodal/Data/climate-GPT4-Evaluation/input_copy/{window_size}{unit}"
results = pd.read_csv(f"/home/ubuntu/multimodal/Data/climate-GPT4-Evaluation/input_copy/{window_size}{unit}/processed.csv")

jsonl_path = os.path.join(results_dir, "batch.jsonl")
output_path = os.path.join(results_dir, "semantic.txt")

batch_object_id = gpt4semantic.create_and_run_batch_job(results, jsonl_path, output_text_column="output_text",
                                 pred_text_column="pred_text")

batch job created with batch_object_id 
 batch_d3Qbd5uPsU3BsvpfsUTtZXQj


In [151]:
outputs = gpt4semantic.check_status_and_parse("batch_Of6VWveHQG09ALh6qrQ5E9vn", output_path)
print(outputs)

[{'score': 4}, {'score': 6}, {'score': 4}, {'score': 7}, {'score': 4}, {'score': 3}, {'score': 6}, {'score': 5}, {'score': 5}, {'score': 6}, {'score': 7}, {'score': 5}, {'score': 4}, {'score': 5}, {'score': 5}, {'score': 5}, {'score': 5}, {'score': 5}, {'score': 5}, {'score': 5}, {'score': 5}, {'score': 3}, {'score': 4}, {'score': 6}, {'score': 3}, {'score': 4}, {'score': 6}, {'score': 5}, {'score': 3}, {'score': 6}, {'score': 3}, {'score': 4}, {'score': 4}, {'score': 4}, {'score': 4}, {'score': 6}, {'score': 5}, {'score': 3}, {'score': 6}, {'score': 4}, {'score': 3}, {'score': 6}, {'score': 5}, {'score': 6}, {'score': 4}, {'score': 4}, {'score': 4}, {'score': 7}, {'score': 3}, {'score': 4}, {'score': 7}, {'score': 4}, {'score': 5}, {'score': 6}, {'score': 3}, {'score': 5}, {'score': 6}, {'score': 4}, {'score': 3}, {'score': 7}, {'score': 3}, {'score': 4}, {'score': 7}, {'score': 7}, {'score': 4}, {'score': 7}, {'score': 6}, {'score': 5}, {'score': 7}, {'score': 7}, {'score': 5}, {'sco

In [152]:
semantic_score, count_none = gpt4semantic.calculate_metrics(outputs)
print(semantic_score, count_none)

4.973756906077348 0


# GPT4 Accuracy

In [146]:
gpt4accuracy = GPT4Accuracy()

results_dir = f"/home/ubuntu/multimodal/Data/climate-GPT4-Evaluation/input_copy/{window_size}{unit}"
results = pd.read_csv(f"/home/ubuntu/multimodal/Data/climate-GPT4-Evaluation/input_copy/{window_size}{unit}/processed.csv")

jsonl_path = os.path.join(results_dir, "batch.jsonl")
output_path = os.path.join(results_dir, "accuracy.txt")

batch_object_id = gpt4accuracy.create_and_run_batch_job(results, jsonl_path, output_text_column="output_text",
                                 pred_text_column="pred_text")

batch job created with batch_object_id 
 batch_VPMo0cJG5B3Dta6M9FEdoAgB


In [149]:
outputs = gpt4accuracy.check_status_and_parse(batch_object_id, output_path)
print(outputs)



In [150]:
precisions, recalls, f1_scores = gpt4accuracy.calculate_metrics(outputs)

print(precisions, recalls, f1_scores)

0.6305992640246784 0.3789844556076067 0.4486163831397433
