# Modelling

In [63]:
import joblib
import pandas as pd

from sklearn.model_selection import train_test_split

from sklearn.linear_model import LinearRegression, Perceptron
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestRegressor
from sklearn.dummy import DummyRegressor

from sklearn.metrics import mean_squared_error

import params as p

### Functions

In [68]:
def evaluate_model(model, y_test=y_test, X_test=X_test):
    
    y_pred = model.predict(X_test)

    print(f'RMSE: {(mean_squared_error(y_true=y_test, y_pred=y_pred))**0.5}')

In [69]:
def save_model(model, model_name: str):

    joblib.dump(model, p.OUTPUTS_PATH + f'model_{model_name}.pkl')

In [70]:
def save_preds(model, model_name: str, X_test=X_test):

    y_pred = pd.DataFrame(model.predict(X_test))

    y_pred.to_csv(p.OUTPUTS_PATH + f'preds_{model_name}.csv')

In [71]:
def save_model_and_preds(model, model_name: str, X_test=X_test):

    save_model(model, model_name)

    save_preds(model, model_name, X_test)

### Dummmy - baseline

In [72]:
dummy = DummyRegressor().fit(X_train, y_train)

In [73]:
evaluate_model(dummy)
# save_model_and_preds(dummy, 'dummy')

RMSE: 616.9586879284441


### Random Forest

In [74]:
rf = RandomForestRegressor()
rf.fit(X_train, y_train)

RandomForestRegressor()

In [75]:
evaluate_model(rf)
# save_model_and_preds(rf, 'rf')

RMSE: 1.4698444091547413


### Linear Regression

In [76]:
lr = LinearRegression()
lr.fit(X_train, y_train)

LinearRegression()

In [77]:
evaluate_model(lr)
# save_model_and_preds(lr, 'lr')

RMSE: 2.9631939518339384e-13


### Naive Bayes

In [78]:
nb = GaussianNB()
nb.fit(X_train, y_train)

GaussianNB()

In [79]:
evaluate_model(nb)
# save_model_and_preds(nb, 'nb')

RMSE: 3.49668647471027
