In [None]:
import openai
import json
import csv
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.colors import LinearSegmentedColormap

In [None]:
# Translate dataset to 'prompt-completion' format
with open('./json_file/NQuAD_final_train_7.5k.json', 'r') as file:
    data = json.load(file)
prompt_completion_pairs = []
for item in data:
    # Construct prompt
    prompt = f"{item['context']} 请从以下四个选项中选择最能概括文章的句子，请考虑句子中数字的准确性。选项：A) {item['ans0']} B) {item['ans1']} C) {item['ans2']} D) {item['ans3']}
    # Translate (0, 1, 2, 3) to (A, B, C, D)
    completion = chr(65 + item['ans'])
    # Add converted data to the prompt list
    prompt_completion_pairs.append({
        "prompt": prompt,
        "completion": completion
    })
# Write to jsonl file with UTF-8 encoding and ensure_ascii=False
with open('./json_file/prompt_completion_data.jsonl', 'w', encoding='utf-8') as f:
    for pair in prompt_completion_pairs:
        # Ensure that JSON dumps does not escape non-ASCII characters
        f.write(json.dumps(pair, ensure_ascii=False) + '\n')

In [None]:
# Use OpenAI CLI to adjust dataset format(run in terminal)
openai tools fine_tunes.prepare_data -f <LOCAL_FILE>

In [None]:
# Upload json file to OpenAI Server and get model ID(run in terminal)
openai api files.create -f <FILE_PATH> -p purpose

In [None]:
# Fine-tune GPT model
openai.api_key = 'sk-6i*******co5'
openai.FineTuningJob.create(training_file="file-qmcjU4tOYcmzjoHk7bfWlxX3", model="babbage-002")

In [None]:
# Evaluation
def get_answer(prompt):
    response = openai.Completion.create(
        model="ft:babbage-002:personal::9KZIZQyg",
        prompt=prompt,
        max_tokens=1
    )
    return response.choices[0].text.strip()

correct_count = 0
total_count = 0
pred_list = []
true_list = []
with open('./json_file/prompt_completion_data_test.jsonl', 'r') as file:
    for line in file:
        if total_count >= 1000:
          break
        data = json.loads(line)
        prompt = data['prompt']
        true_response = data['completion']
        pred = get_answer(prompt)
        true_list.append(true_response)
        pred_list.append(pred)
        if pred == true_response:
            print(f"{total_count}: true")
            correct_count += 1
        else:
            print(f"{total_count}: false")
        total_count +=
accuracy = correct_count / total_count
print(f"Accuracy: {accuracy:.2%}")
with open('results.csv', 'w', newline='') as f:
    writer = csv.writer(f)
    writer.writerow(['True Response', 'Predicted Response'])
    for true, pred in zip(true_list, pred_list):
        writer.writerow([true, pred])

In [None]:
# Error analysis
def convert_to_float(value):
    try:
        return float(value.replace(',', ''))
    except ValueError:
        print(f"Error converting '{value}' to float.")
        return None


def calculate_value_diff():
    with open('answer_options.json', 'r') as file:
        answer_options = json.load(file)

    index_map = {'A': 0, 'B': 1, 'C': 2, 'D': 3}

    differences = []
    with open('results.csv', 'r') as csvfile:
        reader = csv.DictReader(csvfile)
        for index, row in enumerate(reader):
            if index < len(answer_options):
                print(row)
                correct_index = index_map[row['true']]
                if row['pred'] in index_map:
                    predicted_index = index_map[row['pred']]
                else:
                    predicted_index = -1
                if predicted_index < 0:
                    predicted_value = 0
                else:
                    predicted_value = convert_to_float(answer_options[index][predicted_index])
                correct_value = convert_to_float(answer_options[index][correct_index])

                if correct_value != predicted_value:
                    difference = abs(predicted_value - correct_value)
                    differences.append(difference)

    return differences


def draw_graphic(differences):
    bins = np.arange(0, 10, 1)

    n, bins, patches = plt.hist(differences, bins=bins, edgecolor=None)

    # colors = ["#00441b", "#006d2c", "#238b45", "#41ab5d", "#74c476", "#a1d99b", "#c7e9c0", "#e5f5e0"]
    colors = ["#e5f5e0", "#c7e9c0", "#a1d99b", "#74c476", "#41ab5d", "#238b45", "#006d2c", "#00441b"]
    cmap = LinearSegmentedColormap.from_list("custom_green", colors, N=len(patches))

    max_height = max(n)
    min_height = min(n)
    for patch, height in zip(patches, n):
        frac = (height - min_height) / (max_height - min_height)
        color = cmap(frac)
        patch.set_facecolor(color)

    plt.title('Histogram of Numerical Data')
    plt.xlabel('Value Diff')
    plt.ylabel('Frequency')

    plt.show()

differences = calculate_value_diff()
draw_graphic(differences)