# Modelling

In [19]:
import warnings

from sklearn.linear_model import LinearRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestRegressor
from sklearn.dummy import DummyRegressor

from xgboost import XGBRegressor

import functions as f
import params as p

### Settings

In [20]:
warnings.simplefilter(action='ignore', category=FutureWarning)

### Import Data

In [21]:
train, test, val = f.load_split_datasets(part='03')

In [22]:
X_train, y_train = f.split_data_X_y(train)
X_test, y_test = f.split_data_X_y(test)
X_val, y_val = f.split_data_X_y(val)

In [23]:
train_data = {'X' : X_train, 'y' : y_train}
test_data = {'X' : X_test, 'y' : y_test}
val_data = {'X' : X_val, 'y' : y_val}

### Dummy - baseline

In [24]:
dummy = DummyRegressor().fit(**train_data)

In [25]:
f.evaluate_model(dummy, **test_data)

rmse : 639.3735506978311
mae : 518.3287428683848
r2 : -0.00012107339438527376


Since we are using this model as a baseline, we will not pursue it further with any type of tuning.

### Random Forest

In [26]:
rf = RandomForestRegressor(random_state=p.RANDOM_STATE).fit(**train_data)

In [27]:
f.evaluate_model(rf, **test_data)

rmse : 218.1043116667645
mae : 136.07476331360948
r2 : 0.883621770083004


### Linear Regression

In [28]:
lr = LinearRegression().fit(**train_data)

In [29]:
f.evaluate_model(lr, **test_data)

rmse : 428.15200157505717
mae : 320.21941759822073
r2 : 0.5515240046747174


Since we cannot tune this model, we will be using these figures to represent the `LinearRegression()` model in our comparisons.

### Naive Bayes

In [30]:
nb = GaussianNB().fit(**train_data)

In [31]:
f.evaluate_model(nb, **test_data)

rmse : 528.3442356654862
mae : 362.60059171597635
r2 : 0.3170684256007753


### XGB

In [32]:
xgb = XGBRegressor(random_state = p.RANDOM_STATE).fit(**train_data)

In [33]:
f.evaluate_model(xgb, **test_data)

rmse : 191.26158706184336
mae : 122.09586748265656
r2 : 0.9105050034929465


Progressing with `RandomForestRegressor()`, `GaussianNB()` and  `XGBRegressor()` models into the tuning phase of this project.