# Circle

In [1]:
%%capture
!pip install git+https://github.com/fuyu-quant/IBLM.git@feature-in-context-learning 

In [2]:
#%%capture
#!pip install git+https://github.com/fuyu-quant/IBLM.git

In [3]:
import pkg_resources
print(pkg_resources.get_distribution('IBLM').version)

0.1.35


In [4]:
import pandas as pd
import numpy as np
from langchain.llms import OpenAI
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

from iblm import IBLMClassifier

import sys
import os

In [23]:
n = 400

df = pd.read_csv(f'../data/circle/circle_{n}_train.csv')
x_train = df.drop('Target', axis=1)
y_train = df['Target']
print(len(x_train))

400


In [24]:
llm_model_name = 'gpt-4'

params = {'columns_name': True}

iblm = IBLMClassifier(llm_model_name=llm_model_name, params=params)

### Training

In [25]:
file_path = '../models/circle/'

print(f'Number of data:{len(x_train)}')
model = iblm.fit(x_train, y_train, model_name = 'circle', file_path=file_path)

Number of data:400
> Start of model creating.
Tokens Used: 5452
	Prompt Tokens: 5294
	Completion Tokens: 158
Successful Requests: 1
Total Cost (USD): $0.16829999999999998


In [26]:
# Code of the model created
print(model)

import numpy as np

def predict(x):
    df = x.copy()
    output = []
    for index, row in df.iterrows():
        # Do not change the code before this point.
        # Please describe the process required to make the prediction below.

        # Calculate the distance from the origin (0, 0)
        distance = np.sqrt(row['Feature_1']**2 + row['Feature_2']**2)

        # Normalize the distance to a range between 0 and 1
        normalized_distance = distance / np.sqrt(2)

        # Calculate the probability based on the normalized distance
        y = 1 - normalized_distance

        # Do not change the code after this point.
        output.append(y)
    return np.array(output)


### Prediction

In [27]:
df = pd.read_csv(f'../data/circle/circle_{n}_test.csv')
x_test = df.drop('Target', axis=1)
y_test = df['Target']

In [28]:
y_proba = iblm.predict(x_test)
y_pred = (y_proba > 0.5).astype(int)

In [29]:
y_proba

array([0.45845545, 0.36472644, 0.24786969, 0.38140482, 0.43457052,
       0.45559941, 0.29641987, 0.42951862, 0.38656011, 0.25170861,
       0.45314307, 0.31911051, 0.30695383, 0.28075282, 0.27984134,
       0.19924723, 0.3810004 , 0.35277631, 0.28005382, 0.37615306,
       0.23181187, 0.46433919, 0.44188308, 0.46252023, 0.27641863,
       0.35496318, 0.44244955, 0.43808542, 0.31397631, 0.3707425 ,
       0.45442828, 0.2596538 , 0.43932674, 0.40340759, 0.42368672,
       0.39059168, 0.46833845, 0.29973933, 0.31418188, 0.49349975,
       0.26904857, 0.42665848, 0.41612244, 0.40336862, 0.29744004,
       0.37063445, 0.2982504 , 0.27697683, 0.40198662, 0.47408556,
       0.29696373, 0.25756179, 0.45545891, 0.28885831, 0.33266013,
       0.31439188, 0.29229632, 0.48428593, 0.36060302, 0.32714006,
       0.25840543, 0.32133955, 0.24304789, 0.25046948, 0.46687244,
       0.37594511, 0.42379301, 0.41457024, 0.50502525, 0.28329364,
       0.42787676, 0.44596345, 0.2497667 , 0.45158   , 0.39599

In [30]:
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')

# Precision
precision = precision_score(y_test, y_pred)
print(f'Precision: {precision}')

# Recall
recall = recall_score(y_test, y_pred)
print(f'Recall: {recall}')

# F1 score
f1 = f1_score(y_test, y_pred)
print(f'F1 score: {f1}')

# ROC-AUC (you need prediction probabilities for this, not just class predictions)
# Here we just reuse y_pred for simplicity
roc_auc = roc_auc_score(y_test, y_proba)
print(f'ROC-AUC: {roc_auc}')

Accuracy: 0.5166666666666667
Precision: 1.0
Recall: 0.03333333333333333
F1 score: 0.06451612903225806
ROC-AUC: 0.9953333333333334


### Create multiple code models

In [12]:
n = 600
df = pd.read_csv(f'../data/circle/circle_{n}_train.csv')
x_train = df.drop('Target', axis=1)
y_train = df['Target']

file_path = '../models/circle/'
for i in range(1,31):
    model = iblm.fit(x_train, y_train, model_name = f'circle_{i}', file_path=file_path)

> Start of model creating.
Tokens Used: 4756
	Prompt Tokens: 4582
	Completion Tokens: 174
Successful Requests: 1
Total Cost (USD): $0.1479
> Start of model creating.
Tokens Used: 4730
	Prompt Tokens: 4582
	Completion Tokens: 148
Successful Requests: 1
Total Cost (USD): $0.14634
> Start of model creating.
Tokens Used: 4756
	Prompt Tokens: 4582
	Completion Tokens: 174
Successful Requests: 1
Total Cost (USD): $0.1479
> Start of model creating.
Tokens Used: 4758
	Prompt Tokens: 4582
	Completion Tokens: 176
Successful Requests: 1
Total Cost (USD): $0.14801999999999998
> Start of model creating.
Tokens Used: 4755
	Prompt Tokens: 4582
	Completion Tokens: 173
Successful Requests: 1
Total Cost (USD): $0.14784
> Start of model creating.
Tokens Used: 4754
	Prompt Tokens: 4582
	Completion Tokens: 172
Successful Requests: 1
Total Cost (USD): $0.14778
> Start of model creating.
Tokens Used: 4756
	Prompt Tokens: 4582
	Completion Tokens: 174
Successful Requests: 1
Total Cost (USD): $0.1479
> Start of 

### Prediction

In [14]:
n = 600

df = pd.read_csv(f'../data/circle/circle_{n}_test.csv')
x_test = df.drop('Target', axis=1)
y_test = df['Target']

sys.path.append('..')

error_count = 0
error_list = []
auc_list = []


for i in range(1,31):

    import_file = f'import models.circle.circle_{i} as codemodel'

    exec(import_file)

    try:
        y_proba = codemodel.predict(x_test)
        y_pred = (y_proba > 0.5).astype(int)
        negative_values_exist = np.any(y_proba < 0)
        values_greater_than_one_exist = np.any(y_proba > 1)
        if negative_values_exist:
            error_list.append(i)
            error_count += 1
            print(f"Negative values exist：{negative_values_exist}")

        elif values_greater_than_one_exist:
            error_list.append(i)
            error_count += 1
            print(f"Positive values exist：{values_greater_than_one_exist}")

        else:
            roc_auc = roc_auc_score(y_test, y_proba)
            auc_list.append(roc_auc)
        
    except Exception:
        print('run error')
        error_count += 1
        error_list.append(i)
        pass

print(error_count)
print(error_list)
print(auc_list)
average = sum(auc_list) / len(auc_list)
print("Average Value:", average)
max_value = max(auc_list)
min_value = min(auc_list)
print("Maximum Value:", max_value)
print("Minimum Value:", min_value)

0
[]
[0.9991111111111111, 0.9991111111111111, 0.9991111111111111, 0.9991111111111111, 0.9991111111111111, 0.9991111111111111, 0.9991111111111111, 0.9991111111111111, 0.9991111111111111, 0.9991111111111111, 0.0008888888888888887, 0.9991111111111111, 0.9991111111111111, 0.9991111111111111, 0.9991111111111111, 0.9991111111111111, 0.9991111111111111, 0.9991111111111111, 0.9991111111111111, 0.9991111111111111, 0.9991111111111111, 0.9991111111111111, 0.9991111111111111, 0.9991111111111111, 0.9991111111111111, 0.9991111111111111, 0.9991111111111111, 0.9991111111111111, 0.9991111111111111, 0.9991111111111111]
Average Value: 0.9658370370370374
Maximum Value: 0.9991111111111111
Minimum Value: 0.0008888888888888887
