In [None]:
# std
import os
import sys
import inspect
import time
import pathlib
from math import sqrt
from math import log2
# packgaes
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
%matplotlib inline

# packages
from matplotlib.colors import ListedColormap

## sklearn
from sklearn.preprocessing import StandardScaler,PowerTransformer,MinMaxScaler,QuantileTransformer,normalize
from sklearn.model_selection import train_test_split, learning_curve, ShuffleSplit
from sklearn.feature_selection import VarianceThreshold, SelectKBest
from sklearn.model_selection import KFold
from sklearn.decomposition import PCA
from sklearn.feature_selection import chi2

from sklearn.metrics import r2_score
from sklearn.metrics import mean_poisson_deviance
from sklearn.metrics import mean_gamma_deviance
from sklearn.metrics import median_absolute_error

from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import SGDRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor


# for selection the right path
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
sys.path.insert(0,parentdir)

from common.DataParser import parse_metro
from common.model_trainer_reg import *
from common.regression_plotfunctions import *


from GD.LinearRegression import LinearRegression
from KNN.KNNRegressor import KNNRegressor

import metro_preprocessing


# Train and Test

In [None]:
df_raw = parse_metro()
#df_raw = df_raw.sample(30000)
X, Y = metro_preprocessing.preprocessing(df_raw, transform = True)

In [None]:
df_raw

In [None]:
del df_raw
# We don't need it anymore :)
try:
    df_raw
except Exception as e:
    print(e)

## SGD-Regression

In [None]:
MODEL = SGDRegressor
NAME = "SGD"
PATH = "out/"+NAME+"/"
params = {"alpha" : [0.1,0.01,0.001,0.0001],
          "max_iter" : [1000, 10000, 100000]}
n_splits = 2
test_size = 0.3

modeltrainer = ModelTrainer(MODEL, params, X, Y, thread_cnt=1)
########### train with TrainTestSplit  ###################
modeltrainer.TTSplit(test_size = test_size)
modeltrainer.train()
results = modeltrainer.retResults(PATH + "sklearn_TTS_SGD.csv")
display(results)
############ shuffle_Cross validation  ###################
modeltrainer.CV_shuffle_split(k = n_splits, test_size = test_size, random_state = 42)
results = modeltrainer.retResults(PATH + "sklearn_CV_SGD.csv")
display(results)

## My SGD-Regression

In [None]:
MODEL = LinearRegression
params = {"alpha" : [0.0001], "max_iter": [1000]}


modeltrainer = ModelTrainer(MODEL, params, X, Y, thread_cnt=1)
########### train with TrainTestSplit  ###################
modeltrainer.TTSplit(test_size = test_size)
modeltrainer.train()
results = modeltrainer.retResults(PATH + "my_TTS_SGD.csv")
display(results)
############ shuffle_Cross validation  ###################
modeltrainer.CV_shuffle_split(k = n_splits, test_size = test_size, random_state = 42)
results = modeltrainer.retResults(PATH + "my_CV_SGD.csv")
display(results)

## KNN-Regression

In [None]:
MODEL = KNeighborsRegressor
params = {"weights" : ["uniform"]}
NAME = "KNN"
PATH = "out/"+NAME+"/"


modeltrainer = ModelTrainer(MODEL, params, X, Y, thread_cnt=1)
########### train with TrainTestSplit  ###################
modeltrainer.TTSplit(test_size = test_size)
modeltrainer.train()
results = modeltrainer.retResults(PATH + "sklearn_TTS_KNN.csv")
display(results)
############ shuffle_Cross validation  ###################
modeltrainer.CV_shuffle_split(k = n_splits, test_size = test_size, random_state = 42)
results = modeltrainer.retResults(PATH + "sklearn_CV_KNN.csv")
display(results)

## my KNN-Regression

In [None]:
MODEL = KNNRegressor
params = {"n_neighbors" : [5],
           "p": [2],
           "chunk_size": [20,100]}
NAME = "KNN"
PATH = "out/"+NAME+"/"


modeltrainer = ModelTrainer(MODEL, params, X, Y, thread_cnt=1)
########### train with TrainTestSplit  ###################
modeltrainer.TTSplit(test_size = test_size)
modeltrainer.train()
results = modeltrainer.retResults(PATH + "my_TTS_KNN.csv")
display(results)
############ shuffle_Cross validation  ###################
modeltrainer.CV_shuffle_split(k = n_splits, test_size = test_size, random_state = 42)
results = modeltrainer.retResults(PATH + "my_CV_KNN.csv")
display(results)

## RF-Regression

In [None]:
MODEL = RandomForestRegressor
params = {"n_estimators" : [100]}
NAME = "RF"
PATH = "out/"+NAME+"/"



modeltrainer = ModelTrainer(MODEL, params, X, Y, thread_cnt=1)
########### train with TrainTestSplit  ###################
modeltrainer.TTSplit(test_size = test_size)
modeltrainer.train()
results = modeltrainer.retResults(PATH + "sklearn_TTS_RF.csv")
display(results)
############ shuffle_Cross validation  ###################
modeltrainer.CV_shuffle_split(k = n_splits, test_size = test_size, random_state = 42)
results = modeltrainer.retResults(PATH + "sklearn_CV_RF.csv")
display(results)

## DT-Regression

In [None]:
MODEL = DecisionTreeRegressor
params = {"criterion": ["mse"]}
NAME = "DT"
PATH = "out/"+NAME+"/"



modeltrainer = ModelTrainer(MODEL, params, X, Y, thread_cnt=1)
########### train with TrainTestSplit  ###################
modeltrainer.TTSplit(test_size = test_size)
modeltrainer.train()
results = modeltrainer.retResults(PATH + "sklearn_TTS_DT.csv")
display(results)
############ shuffle_Cross validation  ###################
modeltrainer.CV_shuffle_split(k = n_splits, test_size = test_size, random_state = 42)
results = modeltrainer.retResults(PATH + "sklearn_CV_DT.csv")
display(results)