# Loading model

**ATTENTION:**

Notebook language: **Python**

## Loading exctracted model data

In [1]:
import pickle5 as pickle

In [2]:
with open('./model/model-imported.pickle', 'rb') as fp:
    model = pickle.load(fp) 

In [3]:
model.keys()

dict_keys(['num.trees', 'child.nodeIDs', 'split.varIDs', 'split.values', 'is.ordered', 'class.values', 'terminal.class.counts', 'independent.variable.names', 'treetype'])

## Converting attributes

In [4]:
model['num_trees'] = model.pop('num.trees')
model['child_node_ids'] = model.pop('child.nodeIDs')
model['split_var_ids'] = model.pop('split.varIDs')
model['split_values'] = model.pop('split.values')
model['is_ordered'] = model.pop('is.ordered')
model['class_values'] = model.pop('class.values')
model['terminal_class_counts'] = model.pop('terminal.class.counts')

In [5]:
model['num_trees'] = int(model['num_trees'])

In [6]:
def everything_as_int(x):
    if isinstance(x, int): return x
    elif isinstance(x, list): return [everything_as_int(y) for y in x]
    else: return int(x)

In [7]:
model["child_node_ids"] = everything_as_int(model["child_node_ids"])

## Loading data

In [8]:
import numpy as np
import pandas as pd

In [9]:
df = pd.read_csv('./data/data_preprocessed.csv', index_col=0)
df.head()

Unnamed: 0,status,minute,h_a,situation,shotType,lastAction,distanceToGoal,angleToGoal
1,0,7,2,3,2,2,12.554569,10.860495
2,1,13,2,3,4,39,8.497323,44.427378
3,0,24,2,3,2,24,23.388803,17.205847
4,0,24,2,3,4,29,25.298204,16.339054
5,0,30,2,2,1,1,7.967234,44.485873


In [10]:
X = df.drop('status', axis = 1)
y = df['status']

## Loading `skranger` - `Python` interface to `ranger`

In [11]:
from scripts.RandomForestModified import RangerForestClassifierModified

In [12]:
rf = RangerForestClassifierModified(n_estimators = model['num_trees'], enable_tree_details = False)

`Fit` just adds parameters from already trained model; model is not trained here

In [13]:
rf.fit(X, y, {'forest': model, 'num_trees': model['num_trees']})

## Checking results

In [14]:
X.loc[1:11]

Unnamed: 0,minute,h_a,situation,shotType,lastAction,distanceToGoal,angleToGoal
1,7,2,3,2,2,12.554569,10.860495
2,13,2,3,4,39,8.497323,44.427378
3,24,2,3,2,24,23.388803,17.205847
4,24,2,3,4,29,25.298204,16.339054
5,30,2,2,1,1,7.967234,44.485873
6,42,2,3,4,27,26.241467,15.824642
7,47,2,1,4,33,20.834178,19.888362
8,55,2,2,1,11,10.767052,10.198737
9,66,2,1,4,33,29.060308,13.068224
10,88,2,2,4,24,10.237765,35.869253


In [15]:
rf.predict(X.loc[1:11])

array([0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [16]:
rf.predict_proba(X.loc[1:11])

array([[0.83205017, 0.16794983],
       [0.08896921, 0.91103079],
       [0.91217144, 0.08782856],
       [0.92099141, 0.07900859],
       [0.63573259, 0.36426741],
       [0.7195911 , 0.2804089 ],
       [0.69306182, 0.30693818],
       [0.83613391, 0.16386609],
       [0.76523042, 0.23476958],
       [0.7634721 , 0.2365279 ],
       [0.72639206, 0.27360794]])

In [21]:
np.array(y.loc[1:11])

array([0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0])

## Save model

In [17]:
with open('./model/model.pickle', 'wb') as fp:
    pickle.dump(rf, fp) 

## Calculate metrics on the whole dataset

In [18]:
from scripts.calculate_metrics import calculate_metrics

In [19]:
calculate_metrics(rf, X, y)

Unnamed: 0,accuracy,balanced_accuracy,f1,precision,recall,roc_auc
0,0.904929,0.917865,0.677727,0.531704,0.934322,0.97584
