# Circle

In [1]:
%%capture
!pip install iblm --upgrade

In [1]:
%%capture
!pip install git+https://github.com/fuyu-quant/IBLM.git 

In [2]:
import pkg_resources
print(pkg_resources.get_distribution('IBLM').version)

0.1.1


In [2]:
import pandas as pd
from langchain.llms import OpenAI
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

from iblm import IBLMClassifier


import os
#os.environ["OPENAI_API_KEY"] = "OPENAI_API_KEY"

In [36]:
n = 600

df = pd.read_csv(f'../data/circle/circle_{n}_train.csv')
x_train = df.drop('Target', axis=1)
y_train = df['Target']
print(len(x_train))

300


In [38]:
llm_model_name = 'gpt-4'

params = {'columns_name': True}

iblm = IBLMClassifier(llm_model_name=llm_model_name, params=params)

### Training

In [39]:
file_path = '../models/circle/'

print(f'Number of data:{len(x_train)}')
model = iblm.fit(x_train, y_train, model_name = 'circle', file_path=file_path)

Number of data:300
> Start of model creating.
Tokens Used: 7160
	Prompt Tokens: 6988
	Completion Tokens: 172
Successful Requests: 1
Total Cost (USD): $0.21996


In [40]:
# Code of the model created
print(model)

import numpy as np
def predict(x):
    df = x.copy()
    output = []
    for index, row in df.iterrows():
        # Do not change the code before this point.
        # Please describe the process required to make the prediction below.

        feature_1 = row['Feature_1']
        feature_2 = row['Feature_2']

        # Calculate the distance from the origin (0, 0)
        distance = np.sqrt(feature_1**2 + feature_2**2)

        # Normalize the distance to range [0, 1]
        normalized_distance = distance / np.sqrt(2)

        # Calculate the probability of target being 1
        y = 1 - normalized_distance

        # Do not change the code after this point.
        output.append(y)
    return np.array(output)


### Prediction

In [41]:
df = pd.read_csv(f'../data/circle/circle_{n}_test.csv')
x_test = df.drop('Target', axis=1)
y_test = df['Target']

In [42]:
y_proba = iblm.predict(x_test)
y_pred = (y_proba > 0.5).astype(int)

In [43]:
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')

# Precision
precision = precision_score(y_test, y_pred)
print(f'Precision: {precision}')

# Recall
recall = recall_score(y_test, y_pred)
print(f'Recall: {recall}')

# F1 score
f1 = f1_score(y_test, y_pred)
print(f'F1 score: {f1}')

# ROC-AUC (you need prediction probabilities for this, not just class predictions)
# Here we just reuse y_pred for simplicity
roc_auc = roc_auc_score(y_test, y_proba)
print(f'ROC-AUC: {roc_auc}')

Accuracy: 0.51
Precision: 1.0
Recall: 0.02
F1 score: 0.0392156862745098
ROC-AUC: 0.9991111111111111


### Create multiple code models

In [44]:
n = 600
df = pd.read_csv(f'../data/circle/circle_{n}_train.csv')
x_train = df.drop('Target', axis=1)
y_train = df['Target']

file_path = '../models/circle/'
for i in range(1,31):
    model = iblm.fit(x_train, y_train, model_name = f'circle_{i}', file_path=file_path)

> Start of model creating.
Tokens Used: 7260
	Prompt Tokens: 6988
	Completion Tokens: 272
Successful Requests: 1
Total Cost (USD): $0.22596
> Start of model creating.
Tokens Used: 7193
	Prompt Tokens: 6988
	Completion Tokens: 205
Successful Requests: 1
Total Cost (USD): $0.22194
> Start of model creating.
Tokens Used: 7166
	Prompt Tokens: 6988
	Completion Tokens: 178
Successful Requests: 1
Total Cost (USD): $0.22032
> Start of model creating.
Tokens Used: 7186
	Prompt Tokens: 6988
	Completion Tokens: 198
Successful Requests: 1
Total Cost (USD): $0.22152
> Start of model creating.
Tokens Used: 7160
	Prompt Tokens: 6988
	Completion Tokens: 172
Successful Requests: 1
Total Cost (USD): $0.21996
> Start of model creating.
Tokens Used: 7190
	Prompt Tokens: 6988
	Completion Tokens: 202
Successful Requests: 1
Total Cost (USD): $0.22175999999999998
> Start of model creating.
Tokens Used: 7162
	Prompt Tokens: 6988
	Completion Tokens: 174
Successful Requests: 1
Total Cost (USD): $0.22008
> Start 