# Moon

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/fuyu-quant/IBLM/blob/main/examples/iblmodel/iblmodel_moon.ipynb)

In [2]:
%%capture
!pip install iblm --upgrade

In [7]:
import numpy as np
import pandas as pd
import string
from langchain.llms import OpenAI
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.datasets import make_moons

from iblm import IBLMClassifier


import os
#os.environ["OPENAI_API_KEY"] = "OPENAI_API_KEY"

### Preparing data sets

In [10]:
sample = 600
seed = 3657

X, y = make_moons(n_samples = sample, noise=0.05, random_state = seed)
df = pd.DataFrame(data=X, columns=['Feature_1', 'Feature_2']).round(3)
df['target'] = y

num_train = 300
sample_num = int(num_train/2)

df_1 = df[df['target'] == 1].sample(n=sample_num, random_state=seed)
df_0 = df[df['target'] == 0].sample(n=sample_num, random_state=seed)


df_train = pd.DataFrame()
df_len = len(df_1)
for i in range(df_len):
    df1 = pd.DataFrame([df_1.iloc[i]])
    df0 = pd.DataFrame([df_0.iloc[i]])
    df_train = pd.concat([df_train, df1, df0])

df_train['target'] = df_train['target'].astype(int)
df_test = df.drop(df_train.index)

df_train.head()

Unnamed: 0,Feature_1,Feature_2,target
35,1.131,-0.562,1
22,0.926,0.096,0
453,1.314,-0.443,1
464,0.99,0.242,0
221,2.05,0.283,1


In [13]:
x_train = df_train.drop('target', axis=1)
y_train = df_train['target']

### Training

In [14]:
llm_model = OpenAI(temperature=0, model_name = 'gpt-4-0613')

params = {'columns_name': True}

iblm = IBLMClassifier(llm_model = llm_model, params=params)

In [15]:
#file_path = '/content/'

#model = iblm.fit(x_train, y_train, model_name = 'pseudodata', file_path=file_path)
model = iblm.fit(x_train, y_train)

Tokens Used: 4190
	Prompt Tokens: 4032
	Completion Tokens: 158
Successful Requests: 1
Total Cost (USD): $0.13044


In [16]:
# Code of the model created
print(model)

import numpy as np

def predict(x):
    df = x.copy()
    output = []
    for index, row in df.iterrows():
        # Do not change the code before this point.
        # Please describe the process required to make the prediction below.
        # Here we are using a simple linear regression model for prediction.
        # The coefficients of the model are assumed based on the data.
        y = 0.5 * row['Feature_1'] - 0.3 * row['Feature_2'] + 0.2
        y = 1 / (1 + np.exp(-y))  # Apply sigmoid function to get probability
        # Do not change the code after this point.
        output.append(y)
    return np.array(output)


### Prediction

In [17]:
x_test = df_test.drop('target', axis=1)
y_test = df_test['target']

In [18]:
y_proba = iblm.predict(x_test)
y_pred = (y_proba > 0.5).astype(int)

In [19]:
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')

# Precision
precision = precision_score(y_test, y_pred)
print(f'Precision: {precision}')

# Recall
recall = recall_score(y_test, y_pred)
print(f'Recall: {recall}')

# F1 score
f1 = f1_score(y_test, y_pred)
print(f'F1 score: {f1}')

# ROC-AUC (you need prediction probabilities for this, not just class predictions)
# Here we just reuse y_pred for simplicity
roc_auc = roc_auc_score(y_test, y_proba)
print(f'ROC-AUC: {roc_auc}')

Accuracy: 0.8133333333333334
Precision: 0.7281553398058253
Recall: 1.0
F1 score: 0.8426966292134831
ROC-AUC: 0.9132444444444444
