# Model manager

This is a model manager that will manage machine learning models for the micro framework
- Data preparation
- Model training
- Model storage

## Data preparation

In [208]:
import pandas as pd
'''
Read data into a Pandas dataframe 
'''

df = pd.read_csv("data/dataset.csv")
df.head()

Unnamed: 0.1,Unnamed: 0,price_per_area,rooms,fee,living_space,supplemental_area,price,asked_price,land_area,longitude,latitude,year,month,day,address,floor,typeSummary
0,0,20000.0,2.0,3833.0,50.0,,1000000.0,995000.0,,59.623836,16.570182,2020,6,11,Haga parkgata 9B,0.0,Bostadsrättslägenhet
1,1,96212.0,3.0,3944.0,66.0,,6350000.0,5790000.0,,59.319173,18.038294,2020,6,12,Heleneborgsgatan 5C,0.0,Bostadsrättslägenhet
2,2,20541.0,3.0,4948.0,92.5,,1900000.0,1950000.0,,63.181727,14.639372,2020,6,15,Rådhusgatan 28 A,0.0,Bostadsrättslägenhet
3,3,34568.0,3.0,4663.0,81.0,,2800000.0,2995000.0,,59.439847,18.073676,2020,6,10,"Kometvägen 35, vån 3",3.0,Bostadsrättslägenhet
4,4,56538.0,2.0,2904.0,52.0,,2940000.0,2495000.0,,59.295118,18.106403,2020,6,10,Ulricehamnsvägen 4,0.0,Bostadsrättslägenhet


In [209]:
df.describe()

Unnamed: 0.1,Unnamed: 0,price_per_area,rooms,fee,living_space,supplemental_area,price,asked_price,land_area,longitude,latitude,year,month,day,floor
count,2446.0,2390.0,2375.0,1433.0,2392.0,710.0,2446.0,2430.0,1000.0,2446.0,2446.0,2446.0,2446.0,2446.0,2446.0
mean,1222.5,36985.372385,3.403368,3728.35799,87.042559,41.252113,2864402.0,2703596.0,4757.62,58.889279,15.90827,2020.0,6.0,11.810303,0.386263
std,706.243702,25616.539904,1.720102,1322.843515,47.306871,39.878895,2184230.0,2091508.0,40386.77,1.86817,2.524556,0.0,0.0,1.67893,1.845948
min,0.0,982.0,1.0,0.0,0.0,0.0,55000.0,75000.0,103.0,55.346335,11.176771,2020.0,6.0,9.0,0.0
25%,611.25,19672.25,2.0,2795.0,55.0,9.0,1495000.0,1375000.0,651.5,57.731481,13.224915,2020.0,6.0,10.0,0.0
50%,1222.5,30909.0,3.0,3655.0,77.75,30.0,2420000.0,2250000.0,1092.5,59.280953,16.582154,2020.0,6.0,11.0,0.0
75%,1833.75,47673.75,4.0,4580.0,110.0,65.0,3700000.0,3495000.0,1836.25,59.435408,18.004513,2020.0,6.0,13.0,0.0
max,2445.0,418750.0,20.0,9321.0,560.0,335.0,25300000.0,23000000.0,1007140.0,67.854406,24.136828,2020.0,6.0,16.0,56.0


In [212]:
# df.typeSummary.value_counts()

In [337]:
'''
Split into features and target
'''

X = df[['rooms', 'fee', 'living_space', 'supplemental_area', 'land_area', 'longitude', 'latitude', 'year', 'month', 'day', 'floor']].to_numpy()
Y = df.price.to_numpy()

## Model training 

In [338]:
# from sklearn.base import BaseEstimator, TransformerMixin
# from sklearn.preprocessing import LabelEncoder

# class CustomTransformer(BaseEstimator, TransformerMixin):
#     def __init__(self):
#         self.le = LabelEncoder()
        
#     def fit(self, X: list, y=None):
#         self.le.fit(X.typeSummary.fillna(-1))
#         return self

#     def transform(self, X):
#         X["typeSummary"] = self.le.transform(X.typeSummary.fillna(-1))
#         return X

In [339]:
from xgboost import XGBRegressor
from sklearn.pipeline import Pipeline

# clf = Pipeline([
#     ('CustomTransformer', CustomTransformer()),
#     ('XGBRegressor', XGBRegressor())
# ])

clf = XGBRegressor()
clf.fit(X, Y)

XGBRegressor(base_score=0.5, booster=None, colsample_bylevel=1,
       colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,
       importance_type='gain', interaction_constraints=None,
       learning_rate=0.300000012, max_delta_step=0, max_depth=6,
       min_child_weight=1, missing=nan, monotone_constraints=None,
       n_estimators=100, n_jobs=0, num_parallel_tree=1,
       objective='reg:squarederror', random_state=0, reg_alpha=0,
       reg_lambda=1, scale_pos_weight=1, subsample=1, tree_method=None,
       validate_parameters=False, verbosity=None)

In [340]:
# X.info()

## Model storage 

In [341]:
import time 
import pickle

model_name = "sample-Hemnet"
full_name = "models/model-{}-{}.model".format(model_name, time.time())

with open(full_name, "wb") as f:
    pickle.dump(clf, f)
    
print(full_name)

models/model-sample-Hemnet-1592321010.42141.model


## Test model 

In [342]:
with open(full_name, "rb") as f:
    deserialized_model = pickle.load(f)

In [343]:
sample = X[2:3]
res = deserialized_model.predict(sample)[0]
print("Input:", sample)
print("Result:", res)

Input: [[3.00000000e+00 4.94800000e+03 9.25000000e+01            nan
             nan 6.31817274e+01 1.46393723e+01 2.02000000e+03
  6.00000000e+00 1.50000000e+01 0.00000000e+00]]
Result: 1905049.8


In [351]:
sample.tolist()

[[3.0,
  4948.0,
  92.5,
  nan,
  nan,
  63.18172743316941,
  14.639372293351753,
  2020.0,
  6.0,
  15.0,
  0.0]]

In [347]:
import json


raw = {'rooms': '6', 'fee': '2100', 'living_space': '263', 'floor': '9', 'supplemental_area': '227', 'land_area': '123', 'longitude': 0, 'latitude': 0, 'year': '2020', 'month': '06', 'day': '16'}
series = pd.Series(raw, raw.keys())
series = series.astype("float")
deserialized_model.predict(series)

array([4541071.], dtype=float32)

In [348]:
series.to_frame().transpose()

Unnamed: 0,rooms,fee,living_space,floor,supplemental_area,land_area,longitude,latitude,year,month,day
0,6.0,2100.0,263.0,9.0,227.0,123.0,0.0,0.0,2020.0,6.0,16.0


Unnamed: 0,0
rooms,6
fee,2100
living_space,263
floor,9
supplemental_area,227
land_area,123
longitude,0
latitude,0
year,2020
month,6


In [77]:
X

Unnamed: 0,rooms,fee,living_space,supplemental_area,land_area,longitude,latitude,year,month,day,floor,typeSummary
0,2.0,3833.0,50.0,,,59.623836,16.570182,2020,6,11,0.0,Bostadsrättslägenhet
1,3.0,3944.0,66.0,,,59.319173,18.038294,2020,6,12,0.0,Bostadsrättslägenhet
2,3.0,4948.0,92.5,,,63.181727,14.639372,2020,6,15,0.0,Bostadsrättslägenhet
3,3.0,4663.0,81.0,,,59.439847,18.073676,2020,6,10,3.0,Bostadsrättslägenhet
4,2.0,2904.0,52.0,,,59.295118,18.106403,2020,6,10,0.0,Bostadsrättslägenhet
...,...,...,...,...,...,...,...,...,...,...,...,...
2441,2.0,2063.0,58.0,,,59.318745,18.035443,2020,6,13,1.0,Bostadsrättslägenhet
2442,6.0,,143.0,33.0,1378.0,57.180309,14.018999,2020,6,11,0.0,Villa
2443,5.0,,169.0,10.0,1428.0,59.654982,17.559955,2020,6,12,0.0,Villa
2444,4.0,4900.0,95.2,,,57.782538,14.266611,2020,6,12,0.0,Bostadsrättslägenhet
