In [1]:
import os
import time

#data manipulation
import numpy as np
import pandas as pd

#learning methods
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.ensemble import VotingRegressor
from sklearn.ensemble import BaggingRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.linear_model import LinearRegression

#Splitting the data
from sklearn.model_selection import train_test_split

import seaborn as sns
import matplotlib.pyplot as plt

%matplotlib inline

#Evaluation methods
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score


from sklearn.model_selection import cross_val_score

In [2]:
def feed_data(reg):
    data = pd.read_csv("final.csv")
    y = data['Gb2t_avg']
    X = data.drop('Gb2t_avg', axis=1)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
    reg = reg.fit(X_train, y_train)
    predictions = reg.predict(X_test)
    
    mse = mean_squared_error(y_test, predictions)
    mae = mean_absolute_error(y_test, predictions)
    r2 = r2_score(y_test, predictions)
    
    return mse, mae, r2

def experiment_report():
    
    file_address = 'log_fold.txt'
    if os.path.exists(file_address):
        os.remove(file_address)
        f = open(file_address, "w+")
    else:
        f = open(file_address, "w+")

    regressors = [
        ExtraTreesRegressor(),
        RandomForestRegressor(),
        GradientBoostingRegressor(),
        AdaBoostRegressor(),
        KNeighborsRegressor(),
        DecisionTreeRegressor(),
        BaggingRegressor(ExtraTreesRegressor()),
       LinearRegression(),
       MLPRegressor(),
    ]

    for reg in regressors:
        start = time.time()
        mse, mae, r2 = feed_data(reg)
        time_taken=(time.time()-start)
        f.write("Regressor Name: {}\n".format(reg.__class__.__name__))
        f.write("MSE: {}\n".format(mse))
        f.write("MAE: {}\n".format(mae))
        f.write("R2: {}\n".format(r2))
        f.write("Time: {}\n".format(time_taken))
        f.write("##############################\n")

In [3]:
experiment_report()



In [None]:
## DATA VISUALIZATION

In [None]:
data = pd.read_csv("final.csv")
y = data['Gb2t_avg']
X = data.drop('Gb2t_avg', axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
reg = BaggingRegressor()
reg = reg.fit(X_train, y_train)
predictions = reg.predict(X_test)

In [None]:
ax = sns.lineplot(x="timepoint", y="signal", hue="event",

units="subject", estimator=None, lw=1,

                  data=fmri.query("region == 'frontal'"))

In [None]:
np.shape(X)

In [None]:
20/100*201865

In [None]:
number_of_datapoint = [i for i in range(0,40373)]

In [None]:
a4_dims = (20, 10)
fig, ax = plt.subplots(figsize=a4_dims)
ax = sns.lineplot(x=number_of_datapoint, y=predictions, estimator=None, lw=1, data=data)

In [None]:
a4_dims = (30, 10)
fig, ax = plt.subplots(figsize=a4_dims)
ax = sns.lineplot(x=number_of_datapoint, y=predictions, estimator=None, lw=1, data=data)

In [None]:
results = pd.DataFrame()
# preds = pd.DataFrame(data=predictions)
#results = pd.concat([y_test], axis=1)
#results['predictions'] = pd.DataFrame(data=prediction)
#results.head(20)

In [None]:
np.shape(y_test)

In [None]:
np.shape(predictions)

In [None]:
number_of_datapoint = [i for i in range(0,3000)]
pred_val = predictions[:3000]
actual_val = y_test[:3000]

In [None]:
np.shape(predictions)

In [None]:
np.shape(y_test)

In [None]:
number_of_datapoint = [i for i in range(0,40373)]

fig = plt.figure(figsize=(16,10), dpi=200)
plt.plot(number_of_datapoint, predictions, marker=".", color="blue")
plt.plot(number_of_datapoint, y_test, marker=".", color="green")
plt.show()

In [None]:
#error = abs(actual_val-pred_val)
#error = abs(y_test-predictions)

In [None]:
a4_dims = (16, 10)
fig, ax = plt.subplots(figsize=a4_dims)
plt.axhline(y=2.5, color="Yellow")
plt.axhline(y=4, color="Red")
ax = sns.lineplot(x=number_of_datapoint, y=error, estimator=None, lw=1, data=data)

In [None]:
# GOOD ONE IS BAGGING CLASSIFER

In [None]:
data = pd.read_csv("final.csv")
y = data['Gb2t_avg']
X = data.drop('Gb2t_avg', axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
reg = BaggingRegressor()
reg = reg.fit(X_train, y_train)
predictions = reg.predict(X_test)

In [None]:
number_of_datapoint = [i for i in range(0,3000)]
pred_val = predictions[:3000]
actual_val = y_test[:3000]
error = abs(y_test-pred_val)
a4_dims = (16, 10)
fig, ax = plt.subplots(figsize=a4_dims)

plt.axhline(y=2.5, color="Yellow")
plt.axhline(y=4, color="Red")

ax = sns.lineplot(x=number_of_datapoint, y=error, estimator=None, lw=1, data=data)

In [None]:
#lower = [i for i in error if i>=2.5]
output_lower = list()

counter = 0
for i in error:
    if(i>=2.5):
        output_lower.append(number_of_datapoint[counter])
    counter += 1

In [None]:
output_lower

In [None]:
#lower = [i for i in error if i>=2.5]
output_upper = list()

counter = 0
for i in error:
    if(i>=4):
        output_upper.append(number_of_datapoint[counter])
    counter += 1

In [None]:
output_upper

In [None]:
number_of_datapoint = [i for i in range(0,2100)]
pred_val = predictions[400:2500]
actual_val = y_test[400:2500]
error = abs(actual_val-pred_val)
a4_dims = (16, 10)
fig, ax = plt.subplots(figsize=a4_dims)

plt.axhline(y=2.5, color="Yellow")
plt.axhline(y=4, color="Red")

ax = sns.lineplot(x=number_of_datapoint, y=error, estimator=None, lw=1, data=data)

In [None]:
number_of_datapoint = [i for i in range(0,40373)]
fig = plt.figure(figsize=(16,10), dpi=200)
plt.plot(number_of_datapoint, predictions, marker=".", color="blue")
plt.plot(number_of_datapoint, y_test, marker=".", color="green")
plt.show()


a4_dims = (16, 10)
fig, ax = plt.subplots(figsize=a4_dims)
plt.axhline(y=2.5, color="Yellow")
plt.axhline(y=4, color="Red")
ax = sns.lineplot(x=number_of_datapoint, y=error, estimator=None, lw=1, data=data)

In [None]:
number_of_datapoint = [i for i in range(0,12000)]
pred_val = predictions[15000:27000]
actual_val = y_test[15000:27000]
error = abs(actual_val-pred_val)
a4_dims = (16, 10)
fig, ax = plt.subplots(figsize=a4_dims)

plt.axhline(y=2.5, color="Yellow")
plt.axhline(y=4, color="Red")

ax = sns.lineplot(x=number_of_datapoint, y=error, estimator=None, lw=1, data=data)

In [None]:
output_upper = list()

counter = 0
for i in error:
    if(i>2.5):
        output_upper.append(i)
        print(counter)
    counter += 1

In [None]:
output_upper

In [None]:
# With the best regression (bagging ) at time 12047 we get approximately 2.6
#Now we try the upper limit


output_upper = list()

counter = 0
for i in error:
    if(i>4):
        output_upper.append(i)
        print(counter)
    counter += 1

In [None]:
output_upper

In [None]:
# At time 19678 we get the first upper bound arround 4.8