In [None]:
import pandas as pd
import openai
from mcts_main import grid_search
from sequence_generator import make_possible_terms
import numpy as np
import math 

In [None]:
print(len(make_possible_terms(True)))

In [None]:
nterms = 3
train_or_test = 'test'

In [None]:
df = pd.read_csv(f'data/{train_or_test}/{nterms}/{nterms}_int.csv', names=['prompt', 'completion'], delimiter='],', engine='python')

In [None]:
df['prompt'] = df['prompt'].str.slice(1)

In [None]:
df['completion'] = df['completion'].str.slice(1, -1) + ' <EOS>'

In [None]:
df.to_json(f"data/{train_or_test}/{nterms}/{nterms}_int.jsonl", orient='records', lines=True)

In [None]:
df_valid = df.sample(160)
valid_ids = set(df_valid.index)

In [None]:
train_ids = [i for i in range(800) if i not in valid_ids]
df_train = df.iloc[train_ids]

In [None]:
df_valid.to_json(f"data/train/{nterms}/{nterms}_int_v.jsonl", orient='records', lines=True)

In [None]:
!openai tools fine_tunes.prepare_data -f data/train/2/2.jsonl -q

In [None]:
!openai api fine_tunes.create -t "/data/train/2/2_int_prepared_train.jsonl" -v "data/train/2/2_int_prepared_valid.jsonl" -m ada

In [None]:
#################### no interaction terms ####################
# nterms=2, ada, 4 epochs, ada:ft-personal-2022-11-30-08-42-32
# nterms=2, babbage, 4 epochs, babbage:ft-personal-2022-11-30-08-32-24
# nterms=2, curie, 4 epochs, curie:ft-personal-2022-11-30-08-22-34
# nterms=2, davinci, 4 epochs, davinci:ft-personal-2022-11-29-06-40-46

# nterms=3, ada, 4 epochs, ada:ft-personal-2022-11-30-09-12-47
# nterms=3, babbage, 4 epochs, babbage:ft-personal-2022-11-30-09-02-00
# nterms=3, curie, 4 epochs, curie:ft-personal-2022-11-30-09-15-04
# nterms=3, davinci, 4 epochs, davinci:ft-personal-2022-11-30-08-52-38

#################### with interaction terms ##################
# nterms=2, ada, 4 epochs, ada:ft-personal-2022-12-19-07-44-09
# nterms=2, babbage, 4 epochs, babbage:ft-personal-2022-12-19-07-53-32
# nterms=2, curie, 4 epochs, curie:ft-personal-2022-12-19-08-00-02
# nterms=2, davinci, 4 epochs, davinci:ft-personal-2022-12-19-08-09-25

# nterms=3, ada, 4 epochs, ada:ft-personal-2022-12-19-08-28-41
# nterms=3, babbage, 4 epochs, babbage:ft-personal-2022-12-19-08-30-35
# nterms=3, curie, 4 epochs, curie:ft-personal-2022-12-19-08-40-23
# nterms=3, davinci, 4 epochs, davinci:ft-personal-2022-12-19-08-46-42

In [None]:
def evaluate_gpt3(ft_model=None, nterms=2, interaction=False):
    if not ft_model:
        raise ValueError('Must provide a fine-tuned model')
    test_dir = 'data/test/'
    f_int = '_int' if interaction else ''
    test_file = test_dir + f'{nterms}/{nterms}{f_int}_prepared.jsonl'
    test_data = pd.read_json(test_file, orient='records', lines=True)['prompt'].tolist()
    terms = np.array(make_possible_terms(use_interaction=interaction))
    n = len(test_data)
    rmses = []
    correct_cnt = 0
    for i in range(0, n, 20):
        curr_test = test_data[i:] if n-i < 20 else test_data[i:i+20]
        preds = openai.Completion.create(model=ft_model, prompt=curr_test, stop=[' <EOS>'], max_tokens=50, temperature=0)['choices']
        for j, obj in enumerate(preds):
            pred = obj['text']
            pred_mask = np.array(pred.strip().split(', '))=='True'
            pred_terms = terms[pred_mask]
            seq_list = np.array(test_data[i+j][:-3].split(', ')).astype(int)
            rmse = grid_search(seq_list, pred_terms)
            rmses.append(rmse)
            if rmse == 0:
                correct_cnt += 1
    print('Mean RMSE on test data:', np.mean(rmses))
    print('Percentage of examples solved perfectly:', f'{100*correct_cnt/n}%')

In [None]:
model = 'davinci:ft-personal-2022-12-19-08-46-42'

In [None]:
evaluate_gpt3(model, 3, interaction=True)