# Machine Learning Assessment

## 1. Initialisation

In [1]:
%%writefile tester.py

from time import process_time as timer

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error

import init
import prep
import model
import train
import main
from importlib import reload

def calls_te() :

    reload(init)
    
    print('Testing calls parsing from given database')

    tim = timer()
    calls_df = init.init_calls()
    print(calls_df.info())
    print(calls_df.head(1))
    # print(calls_df.tail(2))
    # print(calls_df.iloc[calls_df.index.duplicated()])
    
    print('Testing calls parsing from given database successful in '+ str(timer() - tim) + ' s')


Overwriting tester.py


In [2]:
%%writefile -a tester.py

def weather_te() :

    reload(init)

    print('Testing weather parsing from given database')
    
    tim = timer()
    wtr_df = init.init_weather()
    print(wtr_df.info())
    print(wtr_df.head(1))
    print(wtr_df.tail(1))
    # print(wtr_df.iloc[wtr_df.index.duplicated(keep=False)])
    
    print('Testing weather parsing from given database successful in '+ str(timer() - tim) + ' s')


Appending to tester.py


In [3]:
%%writefile -a tester.py

def init_te() :

    reload(init)
    
    print('Testing database initialisation from given databases')

    tim = timer()
    x, y = init.init()
    print(x.info())
    print(x.head(1))
    print(y.head(1))
    # print(x.iloc[x.index.duplicated(keep=False)])
    
    print('Testing database initialisation from given databases successful in '+ str(timer() - tim) + ' s')


Appending to tester.py


## 2. Preprocessing

In [4]:
%%writefile -a tester.py

def fwe_te() :

    reload(init)
    reload(prep)

    x, y = init.init()
    for tr_ind, te_ind in prep.fwd_splitter().split(x) :
        print(x.iloc[tr_ind].info(), y.iloc[te_ind].head(2), len(y))


Appending to tester.py


In [5]:
%%writefile -a tester.py

def nai_te() :

    reload(init)
    reload(prep)

    x, y = init.init()
    tr_ind, e1_ind, e2_ind = prep.nai_splitter()

    print(tr_ind, e1_ind, e2_ind)
    print(x.iloc[tr_ind].info(), y.iloc[e1_ind].head(2), y.iloc[e2_ind].head(2),)


Appending to tester.py


## 3. Modelling

In [6]:
%%writefile -a tester.py

def printer(x, y, te_ind, mod) :
    print('Testing Gradient Boosting Regressor with .iloc[' + str(te_ind[0]) + ', ' + str(te_ind[-1]) + ']')

    y_pd = mod.predict(x.iloc[te_ind])

    print('Score: ' + str(mod.score(x.iloc[te_ind], y.iloc[te_ind])))
    print('R^2: ' + str(np.sqrt(mean_squared_error(y.iloc[te_ind], y_pd))))

    pd.DataFrame(data=y_pd, 
                 index=y.iloc[te_ind].index, 
                 columns=['prediction']).join(y.iloc[te_ind]).resample('W').sum().plot()
    plt.title('Gradient Boosting Regressor with .iloc[' + str(te_ind[0]) + ':' + str(te_ind[-1]) + ']')
    plt.show()

Appending to tester.py


In [7]:
%%writefile -a tester.py

def model_fwd_te(n_spi=2) :

    reload(init)
    reload(prep)
    reload(model)
    reload(pred_te)

    x, y = init.init()

    mod = model.model_ppl()

    print('Model parameters: ' + str(mod.get_params()))

    for tr_ind, te_ind in prep.fwd_splitter(n_spi=n_spi).split(x) :
        x_tr, y_tr = x.iloc[tr_ind], y.iloc[tr_ind]

        print('Training Gradient Boosting Regressor with .iloc[' + str(tr_ind[0]) + ':' + str(tr_ind[-1]) + ']')
        tim = timer()
        mod.fit(x_tr, y_tr)
        print('Training Gradient Boosting Regressor successful in ' + str(timer() - tim) + ' s')

        pred_te.printer(x, y, te_ind, mod)

    
def model_nai_te() :

    reload(init)
    reload(prep)
    reload(model)
    reload(pred_te)

    x, y = init.init()

    mod = model.model_ppl()
    
    print('Model parameters: ' + str(mod.get_params()))

    x, y = init.init()
    tr_ind, e1_ind, e2_ind = prep.nai_splitter()

    print('Training Gradient Boosting Regressor with .iloc[' + str(tr_ind[0]) + ':' + str(tr_ind[-1]) + ']')
    tim = timer()
    mod.fit(x.iloc[tr_ind], y.iloc[tr_ind])
    print('Training Gradient Boosting Regressor successful in ' + str(timer() - tim) + ' s')

    pred_te.printer(x, y, e1_ind, mod)
    pred_te.printer(x, y, e2_ind, mod)

Appending to tester.py


## 4. Training

### 4.2. Unit tester

In [8]:
%%writefile -a tester.py

def train_te() :

    reload(init)
    reload(prep)
    reload(train)
    reload(pred_te)

    x, y = init.init()

    tr_ind, e1_ind, e2_ind = prep.nai_splitter()

    reg = train.train()

    pred_te.printer(x, y, e1_ind, reg)
    pred_te.printer(x, y, e2_ind, reg)


Appending to tester.py


## 5. Main

In [9]:


def data_gen() :
    123

def main_te() :
    123