# Pseudo dataset
* Get sample data [here](https://github.com/fuyu-quant/IBLM/tree/main/datasets).

In [1]:
%%capture
!pip install git+https://github.com/fuyu-quant/IBLM.git@feature-in-context-learning 

In [2]:
#%%capture
#!pip install git+https://github.com/fuyu-quant/IBLM.git

In [3]:
import pkg_resources
print(pkg_resources.get_distribution('IBLM').version)

0.1.45


In [4]:
from dotenv import load_dotenv

# .env ファイルを読み込む
load_dotenv(override=True)

True

In [3]:
prompt = """
Please create your code in compliance with all of the following conditions. Output should be code only. Do not enclose the output in ``python ``` or the like.
・The following data are for "target" of 0 and 1, respectively. Analyze these data and create a python code to predict the probability that the "target" of the unknown data is 1.
------------------
{dataset_str_}
------------------
・Create a code that predicts a high probability value when "target" is 1 and a low probability value when "target" is 0 for the data given above.
・Each data type is as follows. If necessary, you can change the data type.
------------------
{data_type_}
------------------
・Think and code the logic to predict probability values based on the data without using a existing machine learning model.
・Please make your predictions as accurate as possible.
・Predicting probability values as finely as possible increases overall accuracy.
・You do not need to provide examples.
・Create a code like the following.
------------------
import numpy as np
def predict(x):
    df = x.copy()
    output = []
    for index, row in df.iterrows():
        # Do not change the code before this point.
        # Please describe the process required to make the prediction below.

        # Do not change the code after this point.
        output.append(y)
    return np.array(output)
"""

### Training

In [1]:
import os
import sys
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# pip
#from iblm import IBLMClassifier

sys.path.append('../..')

from src.iblm.iblmodel.model import IBLClassifier

In [3]:
columns = 4
seed = 3657
train_data = 300

file_name = f'pseudodata_{seed}_{train_data}_train.csv'


df = pd.read_csv(f'../../data/evaluation_dataset/{file_name}')
x_train = df.drop('target', axis=1)
y_train = df['target']

In [22]:
prompt = """
・The data types are as follows.
{data_type_}
・data
{dataset_str_}
------
From the above data, output Python code that outputs the probability value that the rightmost column is 1.
The Python code will always output the probability values.
Predicting probability values as finely as possible increases overall accuracy.
Never use machine learning models.
Create a code like the following.
------------------
import numpy as np
def predict(x):
    df = x.copy()
    output = []
    for index, row in df.iterrows():
        # Do not change the code before this point.
        # Please describe the process required to make the prediction below.

        # Do not change the code after this point.
        output.append(y)
    return np.array(output)
"""

In [23]:
#model_name = 'gpt-4'
#prompt_file = 'classification_3.txt'
seed = 3665

ibl_cl = IBLClassifier(prompt = prompt, seed=seed)

In [24]:
ibl_cl.fit(x_train, y_train)

In [25]:
print(ibl_cl.code_model)

Based on the given data, it seems like the rightmost column (which we need to predict) is a binary classification (0 or 1). However, without any context or information about the relationship between the input features and the output, it's impossible to write a deterministic function to predict the output. 

The task explicitly mentions not to use machine learning models, which are typically used for such prediction tasks. Therefore, we can only make a very naive guess. For example, we could return 1 if the sum of the input features is positive and 0 otherwise. But this is a very naive approach and likely won't give accurate results.

Here is the Python code for this naive approach:

```python
import numpy as np

def predict(df):
    output = []
    for index, row in df.iterrows():
        # Do not change the code before this point.
        # Please describe the process required to make the prediction below.
        y = 1 if np.sum(row[:-1]) > 0 else 0
        # Do not change the code a

### Prediction

In [29]:
file_name = f'pseudodata_{columns}f_{train_data}_test.csv'


df = pd.read_csv(f'../data/pseudodata/{file_name}')
x_test = df.drop('target', axis=1)
y_test = df['target']

In [30]:
y_proba = iblm.predict(x_test)
y_pred = (y_proba > 0.5).astype(int)

In [31]:
accuracy = round(accuracy_score(y_test, y_pred),4)
print(f'Accuracy: {accuracy}')

# Precision
precision = round(precision_score(y_test, y_pred),4)
print(f'Precision: {precision}')

# Recall
recall = round(recall_score(y_test, y_pred),4)
print(f'Recall: {recall}')

# F1 score
f1 = round(f1_score(y_test, y_pred),4)
print(f'F1 score: {f1}')

# ROC-AUC (you need prediction probabilities for this, not just class predictions)
# Here we just reuse y_pred for simplicity
roc_auc = round(roc_auc_score(y_test, y_proba),4)
print(f'ROC-AUC: {roc_auc}')

Accuracy: 0.49
Precision: 0.4912
Recall: 0.7517
F1 score: 0.5942
ROC-AUC: 0.4715


In [None]:
file_path = '../models/pseudodata/'
model_name = 'pseudodata'

### Create multiple code models

In [73]:
llm_model = OpenAI(temperature=0, model_name = 'gpt-4-0613')

params = {'columns_name': True}

iblm = IBLMClassifier(llm_model = llm_model, params=params)

#### 3655

In [74]:
seed = 3655
train_data = 300

train_num = [10, 20, 30, 40, 50, 100, 200, 300]


for i in train_num:
    file_name = f'pseudodata_{seed}_{train_data}_train.csv'
    df = pd.read_csv(f'../data/pseudodata/{file_name}').head(i)
    x_train = df.drop('target', axis=1)
    y_train = df['target']

    file_path = '../models/pseudodata/'
    for j in range(1,31):
        model = iblm.fit(x_train, y_train, model_name = f'pseudodata_{seed}_{i}_{j}', file_path=file_path)

Tokens Used: 816
	Prompt Tokens: 563
	Completion Tokens: 253
Successful Requests: 1
Total Cost (USD): $0.03207
Tokens Used: 948
	Prompt Tokens: 563
	Completion Tokens: 385
Successful Requests: 1
Total Cost (USD): $0.03999
Tokens Used: 890
	Prompt Tokens: 563
	Completion Tokens: 327
Successful Requests: 1
Total Cost (USD): $0.03651
Tokens Used: 914
	Prompt Tokens: 563
	Completion Tokens: 351
Successful Requests: 1
Total Cost (USD): $0.03795
Tokens Used: 776
	Prompt Tokens: 563
	Completion Tokens: 213
Successful Requests: 1
Total Cost (USD): $0.029670000000000002
Tokens Used: 1001
	Prompt Tokens: 563
	Completion Tokens: 438
Successful Requests: 1
Total Cost (USD): $0.04317
Tokens Used: 874
	Prompt Tokens: 563
	Completion Tokens: 311
Successful Requests: 1
Total Cost (USD): $0.03555
Tokens Used: 864
	Prompt Tokens: 563
	Completion Tokens: 301
Successful Requests: 1
Total Cost (USD): $0.03495
Tokens Used: 1081
	Prompt Tokens: 563
	Completion Tokens: 518
Successful Requests: 1
Total Cost (U

#### 3656

In [75]:
seed = 3656
train_data = 300

train_num = [10, 20, 30, 40, 50, 100, 200, 300]


for i in train_num:
    file_name = f'pseudodata_{seed}_{train_data}_train.csv'
    df = pd.read_csv(f'../data/pseudodata/{file_name}').head(i)
    x_train = df.drop('target', axis=1)
    y_train = df['target']

    file_path = '../models/pseudodata/'
    for j in range(1,31):
        model = iblm.fit(x_train, y_train, model_name = f'pseudodata_{seed}_{i}_{j}', file_path=file_path)

Tokens Used: 849
	Prompt Tokens: 565
	Completion Tokens: 284
Successful Requests: 1
Total Cost (USD): $0.03399
Tokens Used: 935
	Prompt Tokens: 565
	Completion Tokens: 370
Successful Requests: 1
Total Cost (USD): $0.039150000000000004
Tokens Used: 1083
	Prompt Tokens: 565
	Completion Tokens: 518
Successful Requests: 1
Total Cost (USD): $0.04803
Tokens Used: 927
	Prompt Tokens: 565
	Completion Tokens: 362
Successful Requests: 1
Total Cost (USD): $0.038669999999999996
Tokens Used: 948
	Prompt Tokens: 565
	Completion Tokens: 383
Successful Requests: 1
Total Cost (USD): $0.03993
Tokens Used: 931
	Prompt Tokens: 565
	Completion Tokens: 366
Successful Requests: 1
Total Cost (USD): $0.03891
Tokens Used: 965
	Prompt Tokens: 565
	Completion Tokens: 400
Successful Requests: 1
Total Cost (USD): $0.04095
Tokens Used: 1097
	Prompt Tokens: 565
	Completion Tokens: 532
Successful Requests: 1
Total Cost (USD): $0.04887
Tokens Used: 848
	Prompt Tokens: 565
	Completion Tokens: 283
Successful Requests: 1


#### 3657

In [76]:
seed = 3657
train_data = 300

train_num = [10, 20, 30, 40, 50, 100, 200, 300]


for i in train_num:
    file_name = f'pseudodata_{seed}_{train_data}_train.csv'
    df = pd.read_csv(f'../data/pseudodata/{file_name}').head(i)
    x_train = df.drop('target', axis=1)
    y_train = df['target']

    file_path = '../models/pseudodata/'
    for j in range(1,31):
        model = iblm.fit(x_train, y_train, model_name = f'pseudodata_{seed}_{i}_{j}', file_path=file_path)

Tokens Used: 936
	Prompt Tokens: 565
	Completion Tokens: 371
Successful Requests: 1
Total Cost (USD): $0.039209999999999995
Tokens Used: 947
	Prompt Tokens: 565
	Completion Tokens: 382
Successful Requests: 1
Total Cost (USD): $0.03987
Tokens Used: 969
	Prompt Tokens: 565
	Completion Tokens: 404
Successful Requests: 1
Total Cost (USD): $0.04119
Tokens Used: 1103
	Prompt Tokens: 565
	Completion Tokens: 538
Successful Requests: 1
Total Cost (USD): $0.04923
Tokens Used: 928
	Prompt Tokens: 565
	Completion Tokens: 363
Successful Requests: 1
Total Cost (USD): $0.03873
Tokens Used: 1117
	Prompt Tokens: 565
	Completion Tokens: 552
Successful Requests: 1
Total Cost (USD): $0.050069999999999996
Tokens Used: 1115
	Prompt Tokens: 565
	Completion Tokens: 550
Successful Requests: 1
Total Cost (USD): $0.04995
Tokens Used: 859
	Prompt Tokens: 565
	Completion Tokens: 294
Successful Requests: 1
Total Cost (USD): $0.034589999999999996
Tokens Used: 849
	Prompt Tokens: 565
	Completion Tokens: 284
Successfu

### Models Evaluation

In [None]:
import pandas as pd
import numpy as np
from langchain.llms import OpenAI
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

from iblm import IBLMClassifier

import sys
import os

In [None]:
llm_model = OpenAI(temperature=0, model_name = 'gpt-4-0613')

params = {'columns_name': True}

iblm = IBLMClassifier(llm_model = llm_model, params=params)

In [8]:
#seed = 3655
seed = 3656
#seed = 3657


train_data = 300


train_num = [10, 20, 30, 40, 50, 100, 200, 300]
i = 30

#file_name = f'pseudodata_{columns}f_{train_data}_test.csv'

file_name = f'pseudodata_{seed}_{train_data}_test.csv'


df = pd.read_csv(f'../data/pseudodata/{file_name}')
x_test = df.drop('target', axis=1)
y_test = df['target']

sys.path.append('..')

error_count = 0
error_list = []
auc_list = []
acc_list = []
model_list = []


for j in range(1,31):

    import_file = f'import models.pseudodata.pseudodata_{seed}_{i}_{j} as codemodel'


    try:
        exec(import_file)
        y_proba = codemodel.predict(x_test)
        y_pred = (y_proba > 0.5).astype(int)
        negative_values_exist = np.any(y_proba < 0)
        values_greater_than_one_exist = np.any(y_proba > 1)
        if negative_values_exist:
            error_list.append(j)
            error_count += 1
            print(f"Negative values exist：{negative_values_exist}")

        elif values_greater_than_one_exist:
            error_list.append(j)
            error_count += 1
            print(f"Positive values exist：{values_greater_than_one_exist}")

        else:
            roc_auc = roc_auc_score(y_test, y_proba)
            accuracy = round(accuracy_score(y_test, y_pred),4)
            auc_list.append(roc_auc)
            acc_list.append(accuracy)
            model_list.append(f'moon_{seed}_{i}_{j}')
        
    except Exception:
        print('run error')
        error_count += 1
        error_list.append(j)
        pass


print('-----------error-------------')
print(f'Number of errors:{error_count}')
print(error_list)

print('-----------Model-------------')
print(model_list)

print('-----------ACC-------------')
print(acc_list)
average = sum(acc_list) / len(acc_list)
max_value = max(acc_list)
min_value = min(acc_list)
print("Maximum Value:", round(max_value, 4))
print("Average Value:", round(average,4))
print("Minimum Value:", round(min_value, 4))

print('-----------AUC-------------')
print(auc_list)
average = sum(auc_list) / len(auc_list)
max_value = max(auc_list)
min_value = min(auc_list)
print("Maximum Value:", round(max_value,4))
print("Average Value:", round(average,4))
print("Minimum Value:", round(min_value,4))

run error
run error
run error
run error
run error
run error
run error
Positive values exist：True
run error
run error
run error
run error
run error
run error
run error
run error
run error
run error
run error
run error
run error
run error
run error
run error
run error
run error
run error
-----------error-------------
Number of errors:27
[1, 2, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]
-----------Model-------------
['moon_3656_30_3', 'moon_3656_30_6', 'moon_3656_30_16']
-----------ACC-------------
[0.9133, 0.6367, 0.4967]
Maximum Value: 0.9133
Average Value: 0.6822
Minimum Value: 0.4967
-----------AUC-------------
[0.961998311036046, 0.6965865149562204, 0.7124094404195743]
Maximum Value: 0.962
Average Value: 0.7903
Minimum Value: 0.6966
