# Moon

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/fuyu-quant/IBLM/blob/main/examples/iblmodel/iblmodel_moon.ipynb)

In [1]:
%%capture
!pip install iblm --upgrade

In [2]:
import numpy as np
import pandas as pd
import string
from langchain.llms import OpenAI
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.datasets import make_moons

from iblm import IBLModel


import os
#os.environ["OPENAI_API_KEY"] = "OPENAI_API_KEY"

### Preparing data sets

In [3]:
seed = 3657
num_train = 300
sample_num = int(num_train/2)

X, y = make_moons(n_samples = 1000, noise=0.05, random_state = seed)
df = pd.DataFrame(data=X, columns=['Feature_1', 'Feature_2']).round(3)
df['target'] = y


df_1 = df[df['target'] == 1].sample(n=sample_num, random_state=seed)
df_0 = df[df['target'] == 0].sample(n=sample_num, random_state=seed)


df_train = pd.DataFrame()
df_len = len(df_1)
for i in range(df_len):
    df1 = pd.DataFrame([df_1.iloc[i]])
    df0 = pd.DataFrame([df_0.iloc[i]])
    df_train = pd.concat([df_train, df1, df0])

df_train['target'] = df_train['target'].astype(int)
df_test = df.drop(df_train.index)

df_train.head()

Unnamed: 0,Feature_1,Feature_2,target
555,0.259,-0.251,1
595,-0.822,0.584,0
82,0.102,0.025,1
117,-0.386,0.881,0
434,1.941,0.563,1


In [4]:
x_train = df_train.drop('target', axis=1)
y_train = df_train['target']

### Training

In [5]:
llm_model = OpenAI(temperature=0, model_name = 'gpt-4-0613')

params = {
    'columns_name': True,
    'objective': 'classification',
    }
    
iblm = IBLModel(llm_model = llm_model, params=params)

In [6]:
#file_path = '/content/'

#model = iblm.fit(x_train, y_train, model_name = 'pseudodata', file_path=file_path)
model = iblm.fit(x_train, y_train)

Please create your code in compliance with all of the following conditions. 
・Analyze the data given below and write python code to predict the probability that the "target" of the unknown data is 1.
・Never use machine learning algorithms.
・Only Python functions should be output.
・Please come up with a logic that allows you to predict probability values as closely as possible.
・For the data given below, create a code that predicts a high probability value when "target" is likely to be 1 and a low probability value when "target" is unlikely to be 1.
・The 'target' value cannot be used for forecasting.
・Analyze the data in as much detail as possible.
・Each data type is float64, float64, int64.
・The column names, in order, are as follows Feature_1, Feature_2, target.
・Create a code like the following.
------------
import numpy as np
def predict(x):
    df = x.copy()
    
    output = []
    for index, row in df.iterrows():
        # Do not change the code before this point.
        # Pleas

In [10]:
# Code of the model created
print(model)

import numpy as np
import pandas as pd

def predict(x):
    df = x.copy()
    
    output = []
    for index, row in df.iterrows():
        if row['Feature_1'] > 0 and row['Feature_2'] < 0:
            y = 0.9
        else:
            y = 0.1
        output.append(y)
    return np.array(output)

data = [
    [0.259,-0.251,1.0],
    [-0.822,0.584,0.0],
    [0.102,0.025,1.0],
]
df = pd.DataFrame(data, columns=['Feature_1', 'Feature_2', 'target'])

df = df.drop(columns='target')

probabilities = predict(df)


### Prediction

In [11]:
x_test = df_test.drop('target', axis=1)
y_test = df_test['target']

In [12]:
y_proba = iblm.predict(x_test)
y_pred = (y_proba > 0.5).astype(int)

In [13]:
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')

# Precision
precision = precision_score(y_test, y_pred)
print(f'Precision: {precision}')

# Recall
recall = recall_score(y_test, y_pred)
print(f'Recall: {recall}')

# F1 score
f1 = f1_score(y_test, y_pred)
print(f'F1 score: {f1}')

# ROC-AUC (you need prediction probabilities for this, not just class predictions)
# Here we just reuse y_pred for simplicity
roc_auc = roc_auc_score(y_test, y_proba)
print(f'ROC-AUC: {roc_auc}')

Accuracy: 0.8357142857142857
Precision: 0.9916317991631799
Recall: 0.6771428571428572
F1 score: 0.8047538200339559
ROC-AUC: 0.8357142857142857
