In [1]:
import pandas as pd
import numpy as np
import lightgbm as lgb
from scipy.io.arff import loadarff 
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from odt import ODTTransformer, ODTManager, PipeConfig

In [2]:
raw_data = loadarff('dataset_13_breast-cancer.arff')
df = pd.DataFrame(raw_data[0])
for c in list(df.columns):
    df[c] = df[c].str.decode('utf-8')

In [3]:
df['Class'] = df['Class'].map({'recurrence-events': 1, 'no-recurrence-events': 0})

In [4]:
x, y = df[['age', 'menopause', 'tumor-size', 'inv-nodes', 'node-caps', 'deg-malig',
       'breast', 'breast-quad', 'irradiat']], df['Class']

In [5]:
pipe = Pipeline(
    steps=[
        ("odt_transformer", ODTTransformer())
    ]
)

In [6]:
transformed_x = pipe.fit_transform(x)

In [7]:
x_train, x_test, y_train, y_test = train_test_split(transformed_x, y, test_size=0.15) 

In [8]:
lgb_train = lgb.Dataset(x_train, y_train)
lgb_eval = lgb.Dataset(x_test, y_test, reference=lgb_train)

In [9]:
params = {
    'task': 'train', 
    'boosting': 'gbdt',
    'objective': 'binary',
    'num_leaves': 10,
    'learnnig_rage': 0.05,
    'metric': {'l2','l1'},
    'verbose': -1
}

model = lgb.train(params,
                 train_set=lgb_train,
                 valid_sets=lgb_eval,
                 early_stopping_rounds=30)

[1]	valid_0's l2: 0.248077	valid_0's l1: 0.448401
Training until validation scores don't improve for 30 rounds
[2]	valid_0's l2: 0.243341	valid_0's l1: 0.443861
[3]	valid_0's l2: 0.242902	valid_0's l1: 0.441093
[4]	valid_0's l2: 0.241352	valid_0's l1: 0.437176
[5]	valid_0's l2: 0.239036	valid_0's l1: 0.433042
[6]	valid_0's l2: 0.237979	valid_0's l1: 0.430772
[7]	valid_0's l2: 0.239283	valid_0's l1: 0.429943
[8]	valid_0's l2: 0.239732	valid_0's l1: 0.428889
[9]	valid_0's l2: 0.24084	valid_0's l1: 0.428464
[10]	valid_0's l2: 0.2421	valid_0's l1: 0.427171
[11]	valid_0's l2: 0.243359	valid_0's l1: 0.426844
[12]	valid_0's l2: 0.243381	valid_0's l1: 0.424934
[13]	valid_0's l2: 0.244205	valid_0's l1: 0.425054
[14]	valid_0's l2: 0.245456	valid_0's l1: 0.424685
[15]	valid_0's l2: 0.24428	valid_0's l1: 0.422685
[16]	valid_0's l2: 0.243333	valid_0's l1: 0.420997
[17]	valid_0's l2: 0.243303	valid_0's l1: 0.420863
[18]	valid_0's l2: 0.243538	valid_0's l1: 0.41954
[19]	valid_0's l2: 0.243614	valid_0



In [10]:
model.save_model("example_lgbm_model.txt")

<lightgbm.basic.Booster at 0x7fa83595bc10>

In [11]:
odt_transformer = pipe.steps[0][1]

In [12]:
odt_transformer.config

PipeConfig(features=[Feature(name='age', datatype=<DataType.Object: 'Object'>, nullable=True), Feature(name='breast', datatype=<DataType.Object: 'Object'>, nullable=True), Feature(name='breast-quad', datatype=<DataType.Object: 'Object'>, nullable=True), Feature(name='deg-malig', datatype=<DataType.Object: 'Object'>, nullable=True), Feature(name='inv-nodes', datatype=<DataType.Object: 'Object'>, nullable=True), Feature(name='irradiat', datatype=<DataType.Object: 'Object'>, nullable=True), Feature(name='menopause', datatype=<DataType.Object: 'Object'>, nullable=True), Feature(name='node-caps', datatype=<DataType.Object: 'Object'>, nullable=True), Feature(name='tumor-size', datatype=<DataType.Object: 'Object'>, nullable=True)])

In [13]:
# TODO: fix update config (json doesn't work)

In [14]:
manager = ODTManager('http://localhost:8080')

In [15]:
manager.update_model(model)

Updating model succeeded!


In [16]:
manager.update_config(odt_transformer.config)

Updating config succeeded!


In [17]:
import json
input_str = '{"age":"40-49","menopause":"premeno","tumor-size":"15-19","inv-nodes":"0-2","node-caps":"yes","deg-malig":"3","breast":"right","breast-quad":"left_up","irradiat":"no"}'

manager.get_prediction(
    data=json.loads(input_str)
)

{'prediction': [0.5050860155731521]}