In [45]:
import json

from modeling import get_order, Runner
from plotnine import *

In [46]:
from sklearn.datasets import load_wine
wine = load_wine(as_frame = True).frame
wine["target2"] = (wine.target==1).astype(int).astype(int)
wine.loc[:,"row_id"] = wine.index

In [47]:
reg_config = {
            "task": "classification",
            "validation": {
                "splitter": {
                    "id": "KFold",
                    "params": {
                        "n_splits": 2
                    }
                },
                "evalulation": {
                    "metrics": ["sklearn:mean_squared_error",
                                "sklearn:mean_absolute_error"]
                }
            },
            "model": {
                "id": "xgboost",
                "params": {
                    "objective": "reg:squarederror",
                    "eval_metric": "rmse",
                    "num_boost_round": [500, 1000],
                    "eta": 0.3,
                    "max_depth": [4, 6, 8]
                }
            },
            "data": {
                "target": "alcohol",
                "features": {
                    "asis": ["malic_acid", "ash", "alcalinity_of_ash", "magnesium",
                        "total_phenols", "flavanoids", "nonflavanoid_phenols", "proanthocyanins",
                         "color_intensity", "hue", "od280/od315_of_diluted_wines", "proline"]
                    }
                }
            }

class_config = {
            "task": "classification",
            "validation": {
                "splitter": {
                    "id": "KFold",
                    "params": {
                        "n_splits": 2
                    }
                },
                "evalulation": {
                    "metrics": ["sklearn:precision_score"
                                #"sklearn:average_precision_score"
                               ]
                }
            },
            "model": {
                "id": "xgboost",
                "params": {
                    "objective": "reg:logistic",
                    "eval_metric": "error",
                    "num_boost_round": [500, 1000],
                    "eta": 0.3,
                    "max_depth": [4, 6, 8]
                }
            },
            "data": {
                "target": "target2",
                "features": {
                    "asis": ["malic_acid", "ash", "alcalinity_of_ash", "magnesium",
                        "total_phenols", "flavanoids", "nonflavanoid_phenols", "proanthocyanins",
                         "color_intensity", "hue", "od280/od315_of_diluted_wines", "proline"]
                    }
                }
            }

In [73]:
r = requests.get(f'http://0.0.0.0:5050/')
r.text

'{"test":0}\n'

In [76]:
import requests
request = f'http://0.0.0.0:5050/train/job1'
r = requests.post(request,
                  json = {"config": reg_config,
                          "df": wine.to_dict()})
r.text

'test1'

In [71]:
t = {'key': 1}
', '.join(list(t.keys()))

'key'

In [77]:
r = requests.get('http://0.0.0.0:5050/jobs')
r.text

'test1'

In [None]:
request = f'http://0.0.0.0:5050/train/test1'
r = requests.get(request)
r.json()

In [18]:
config = class_config
order = get_order(df = wine, config = config)

runner = Runner(workers = 4)
runner.run(order = order)

process started
adding result
adding result
adding result
adding result
adding result
adding result


Process ForkProcess-3:


In [None]:
runner._futures[0].exception()

In [None]:
evaldf = order.get_results()['eval']
evaldf.head()

In [14]:
order.get_results()['predictions']

Unnamed: 0,row_id,predictions,truth,num_boost_round,objective,eval_metric,eta,max_depth,task_id,param_id,split_id
0,89,0.990234,1,1000,reg:logistic,error,0.3,4,3,0,1
1,90,0.979258,1,1000,reg:logistic,error,0.3,4,3,0,1
2,91,0.987462,1,1000,reg:logistic,error,0.3,4,3,0,1
3,92,0.990234,1,1000,reg:logistic,error,0.3,4,3,0,1
4,93,0.962167,1,1000,reg:logistic,error,0.3,4,3,0,1
...,...,...,...,...,...,...,...,...,...,...,...
84,173,0.295776,0,500,reg:logistic,error,0.3,8,5,2,1
85,174,0.295776,0,500,reg:logistic,error,0.3,8,5,2,1
86,175,0.036695,0,500,reg:logistic,error,0.3,8,5,2,1
87,176,0.036695,0,500,reg:logistic,error,0.3,8,5,2,1


In [None]:
def plot_cv_plot(df, metric_name):
    display = df.copy()
    display.loc[:, 'split_id'] = display.split_id.astype(str)
    p1 = (ggplot(display, aes(x = 'param_id', y = metric_name, color = 'split_id'))
          + geom_point(size = 3)
          + theme_minimal()
          + labs(title = metric_name)
         )
    return p1

In [None]:
for metric in config['validation']['evalulation']['metrics']:
    print(plot_cv_plot(df = evaldf, metric_name = metric))