In [138]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor 
from sklearn.tree import DecisionTreeClassifier 
from math import sqrt
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, r2_score
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
import warnings
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import RidgeCV
from sklearn.svm import LinearSVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import StackingRegressor
from sklearn.pipeline import make_pipeline
from sklearn.ensemble import StackingClassifier
from imblearn.over_sampling import RandomOverSampler
from sklearn.ensemble import BaggingClassifier, BaggingRegressor, GradientBoostingClassifier, GradientBoostingRegressor, StackingClassifier
warnings.filterwarnings('ignore')
from catboost import CatBoostRegressor, CatBoostClassifier

In [139]:
data_regress= pd.read_csv("../data/trip_duration_task_m.csv")
data_regress.drop(["Unnamed: 0"], axis=1, inplace=True)
data_regress

Unnamed: 0,pickup_longitude,pickup_latitude,dropoff_longitude,dropoff_latitude,trip_duration
0,-73.953918,40.778873,-73.963875,40.771164,400
1,-73.988312,40.731743,-73.994751,40.694931,1100
2,-73.997314,40.721458,-73.948029,40.774918,1635
3,-73.961670,40.759720,-73.956779,40.780628,1141
4,-74.017120,40.708469,-73.988182,40.740631,848
...,...,...,...,...,...
199489,-73.978088,40.751461,-73.964417,40.764450,565
199490,-73.988548,40.721390,-73.998604,40.693054,800
199491,-73.987770,40.732391,-73.971451,40.760799,472
199492,-73.870796,40.773720,-73.988571,40.721371,2163


In [140]:
y = data_regress['trip_duration']
X = data_regress.drop(['trip_duration'], axis=1)

In [141]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=3)

In [142]:
def DecisionTreeRegressorOptimal(x_train, y_train, x_test, y_test):
    parameters={"splitter":["best","random"],
            "max_depth" : [1,3,5,7,9,11,12],
           "min_samples_leaf":[1,2,3,4,5,6,7,8,9,10],
           "min_weight_fraction_leaf":[0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9],
           "max_features":["auto","log2","sqrt",None],
           "max_leaf_nodes":[None,10,20,30,40,50,60,70,80,90] }
    search = RandomizedSearchCV(DecisionTreeRegressor(),param_distributions=parameters, scoring="neg_mean_squared_error", )
    
    optimalDecitionTreeReg = search.fit(x_train,y_train)
    y_pred = optimalDecitionTreeReg.predict(x_test)
    print(search.best_params_)
    print(f'MAE: {mean_absolute_error(y_test, y_pred)}')
    print(f'MSE: {mean_squared_error(y_test, y_pred)}')
    print(f'RMSE: {sqrt(mean_squared_error(y_test, y_pred))}')
    print(f'MAPE: {(mean_absolute_percentage_error(y_test, y_pred))}')
    print(f'R^2: {r2_score(y_test, y_pred)}')

In [143]:
DecisionTreeRegressorOptimal(X_train,y_train, X_test, y_test)

{'splitter': 'best', 'min_weight_fraction_leaf': 0.3, 'min_samples_leaf': 9, 'max_leaf_nodes': 40, 'max_features': 'log2', 'max_depth': 5}
MAE: 611.0402474361597
MSE: 9960689.147997325
RMSE: 3156.0559481728656
MAPE: 1.522529856707061
R^2: 0.0018434478874982396


In [144]:
data_class = pd.read_csv("../data/csgo_task_m.csv")
data_class.drop(["Unnamed: 0"], axis=1, inplace=True)
data_class.head()

Unnamed: 0,time_left,ct_score,t_score,bomb_planted,ct_health,t_health,ct_money,t_money,ct_players_alive,t_players_alive,map_de_cache,map_de_dust2,map_de_inferno,map_de_mirage,map_de_nuke,map_de_overpass,map_de_train,map_de_vertigo
0,175.0,0.0,0.0,1,500.0,500.0,4000.0,4000.0,5.0,5.0,0,1,0,0,0,0,0,0
1,156.03,0.0,0.0,1,500.0,500.0,600.0,650.0,5.0,5.0,0,1,0,0,0,0,0,0
2,96.03,0.0,0.0,1,391.0,400.0,750.0,500.0,4.0,4.0,0,1,0,0,0,0,0,0
3,76.03,0.0,0.0,1,391.0,400.0,750.0,500.0,4.0,4.0,0,1,0,0,0,0,0,0
4,174.97,1.0,0.0,1,500.0,500.0,18350.0,10750.0,5.0,5.0,0,1,0,0,0,0,0,0


In [145]:
X = data_class.values
y = data_class["bomb_planted"].values

In [146]:
overSampler = RandomOverSampler(random_state=42)
X_over_sample, y_over_sample = overSampler.fit_resample(X,y)

In [147]:
X_train_c, X_test_c, y_train_c, y_test_c = train_test_split(X_over_sample, y_over_sample, test_size=0.2, random_state=4)

In [148]:
def DecisionTreeClassifierOptimal(x_train, y_train, x_test, y_test):
    parameters={"splitter":["best","random"],
            "max_depth" : [1,3,5,7,9,11,12],
           "min_samples_leaf":[1,2,3,4,5,6,7,8,9,10],
           "min_weight_fraction_leaf":[0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9],
           "max_features":["auto","log2","sqrt",None],
           "max_leaf_nodes":[None,10,20,30,40,50,60,70,80,90] }
    search = RandomizedSearchCV(DecisionTreeClassifier(),param_distributions=parameters, scoring="neg_mean_squared_error", )
    optimalDecitionTreeClass = search.fit(x_train,y_train)
    predictions = optimalDecitionTreeClass.predict(x_test)
    print(search.best_params_)
    print(accuracy_score(y_test, predictions))
    print(confusion_matrix(y_test, predictions))
    print(precision_score(y_test, predictions))
    print(recall_score(y_test, predictions))
    print(f1_score(y_test, predictions))
    

In [149]:
DecisionTreeClassifierOptimal(X_train_c,y_train_c, X_test_c, y_test_c)

{'splitter': 'best', 'min_weight_fraction_leaf': 0.1, 'min_samples_leaf': 8, 'max_leaf_nodes': 20, 'max_features': None, 'max_depth': 11}
1.0
[[19769     0]
 [    0 19937]]
1.0
1.0
1.0


In [150]:
bagging_classifier = BaggingClassifier(n_estimators=10, random_state=0).fit(X_train_c, y_train_c)
predictions = bagging_classifier.predict(X_test_c)
print(accuracy_score(y_test_c, predictions))
print(confusion_matrix(y_test_c, predictions))
print(precision_score(y_test_c, predictions))
print(recall_score(y_test_c, predictions))
print(f1_score(y_test_c, predictions))

1.0
[[19769     0]
 [    0 19937]]
1.0
1.0
1.0


In [151]:
bagging_regressor = BaggingRegressor(n_estimators=20).fit(X_train, y_train)
y_pred = bagging_regressor.predict(X_test)
print(f'MAE: {mean_absolute_error(y_test, y_pred)}')
print(f'MSE: {mean_squared_error(y_test, y_pred)}')
print(f'RMSE: {sqrt(mean_squared_error(y_test, y_pred))}')
print(f'MAPE: {(mean_absolute_percentage_error(y_test, y_pred))}')
print(f'R^2: {r2_score(y_test, y_pred)}')


MAE: 486.61376530103587
MSE: 19244654.562435627
RMSE: 4386.872982254629
MAPE: 0.7925884738138038
R^2: -0.9284988979401116


In [152]:
gradientBoostClassifier = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0,max_depth=1, random_state=0).fit(X_train_c, y_train_c)
predictions = gradientBoostClassifier.predict(X_test_c)
print(accuracy_score(y_test_c, predictions))
print(confusion_matrix(y_test_c, predictions))
print(precision_score(y_test_c, predictions))
print(recall_score(y_test_c, predictions))
print(f1_score(y_test_c, predictions))

1.0
[[19769     0]
 [    0 19937]]
1.0
1.0
1.0


In [153]:
gradientrBoostingReg = GradientBoostingRegressor(n_estimators=100, learning_rate=0.2,max_depth=1, random_state=0)
gradientrBoostingReg.fit(X_train, y_train)
y_pred = gradientrBoostingReg.predict(X_test)
print(f'MAE: {mean_absolute_error(y_test, y_pred)}')
print(f'MSE: {mean_squared_error(y_test, y_pred)}')
print(f'RMSE: {sqrt(mean_squared_error(y_test, y_pred))}')
print(f'MAPE: {(mean_absolute_percentage_error(y_test, y_pred))}')
print(f'R^2: {r2_score(y_test, y_pred)}')

MAE: 565.6798665200671
MSE: 9903778.566768583
RMSE: 3147.026940902887
MAPE: 1.6071423103774127
R^2: 0.0075464337646774515


In [154]:
estimators = [
    ('rf', RandomForestClassifier(n_estimators=10, random_state=42)),
    ('svr', make_pipeline(StandardScaler(),
                          LinearSVC(random_state=42)))
]
stackingClassifier = StackingClassifier(
    estimators=estimators, final_estimator=LogisticRegression()
)
stackingClassifier.fit(X_train_c, y_train_c)
predictions = stackingClassifier.predict(X_test_c)
print(accuracy_score(y_test_c, predictions))
print(confusion_matrix(y_test_c, predictions))
print(precision_score(y_test_c, predictions))
print(recall_score(y_test_c, predictions))
print(f1_score(y_test_c, predictions))

1.0
[[19769     0]
 [    0 19937]]
1.0
1.0
1.0


In [155]:
estimators = [
    ('lr', RidgeCV()),
    ('svr', LinearSVR(random_state=42))
]
stackinReg = StackingRegressor(
    estimators=estimators,
    final_estimator=RandomForestRegressor(n_estimators=10,
                                          random_state=42)
)

stackinReg.fit(X_train, y_train)
y_pred = stackinReg.predict(X_test)
print(f'MAE: {mean_absolute_error(y_test, y_pred)}')
print(f'MSE: {mean_squared_error(y_test, y_pred)}')
print(f'RMSE: {sqrt(mean_squared_error(y_test, y_pred))}')
print(f'MAPE: {(mean_absolute_percentage_error(y_test, y_pred))}')
print(f'R^2: {r2_score(y_test, y_pred)}')

MAE: 645.6040067837957
MSE: 11477764.401316937
RMSE: 3387.8849451120586
MAPE: 1.3981827464983414
R^2: -0.15018203766368576


In [156]:
catReg = CatBoostRegressor(iterations=2,
                          learning_rate=0.2,
                          depth=2)
catReg.fit(X_train, y_train)
y_pred  = catReg.predict(X_test)
print(f'MAE: {mean_absolute_error(y_test, y_pred)}')
print(f'MSE: {mean_squared_error(y_test, y_pred)}')
print(f'RMSE: {sqrt(mean_squared_error(y_test, y_pred))}')
print(f'MAPE: {(mean_absolute_percentage_error(y_test, y_pred))}')
print(f'R^2: {r2_score(y_test, y_pred)}')

0:	learn: 5711.7899693	total: 8.14ms	remaining: 8.14ms
1:	learn: 5707.1827330	total: 24.5ms	remaining: 0us
MAE: 579.1867068145372
MSE: 9887997.901174039
RMSE: 3144.51870739769
MAPE: 1.489062644752037
R^2: 0.009127807756561968


In [157]:
catClass = CatBoostClassifier(iterations=2,learning_rate=0.2,depth=2)
catClass.fit(X_train_c, y_train_c)
predictions = catClass.predict(X_test_c)
print(accuracy_score(y_test_c, predictions))
print(confusion_matrix(y_test_c, predictions))
print(precision_score(y_test_c, predictions))
print(recall_score(y_test_c, predictions))
print(f1_score(y_test_c, predictions))


0:	learn: 0.5130111	total: 7.83ms	remaining: 7.83ms
1:	learn: 0.3921029	total: 17.6ms	remaining: 0us
1.0
[[19769     0]
 [    0 19937]]
1.0
1.0
1.0


In [161]:
def CatBoostClassifierOptimal(x_train, y_train, x_test, y_test):
    parameters={
            "depth" : [1,3],
           "learning_rate": np.arange(0, 1, 0.5,) }
    search = RandomizedSearchCV(CatBoostClassifier(),param_distributions=parameters )
    optimalDecitionTreeClass = search.fit(x_train,y_train)
    predictions = optimalDecitionTreeClass.predict(x_test)
    print(search.best_params_)
    print(accuracy_score(y_test, predictions))
    print(confusion_matrix(y_test, predictions))
    print(precision_score(y_test, predictions))
    print(recall_score(y_test, predictions))
    print(f1_score(y_test, predictions))

In [162]:
CatBoostClassifierOptimal(X_train_c, y_train_c, X_test_c, y_test_c)

0:	learn: 0.0058758	total: 7.63ms	remaining: 7.62s
1:	learn: 0.0003122	total: 14.8ms	remaining: 7.39s
2:	learn: 0.0000707	total: 21.7ms	remaining: 7.22s
3:	learn: 0.0000614	total: 27ms	remaining: 6.73s
4:	learn: 0.0000397	total: 32.6ms	remaining: 6.49s
5:	learn: 0.0000315	total: 37.9ms	remaining: 6.28s
6:	learn: 0.0000315	total: 42.8ms	remaining: 6.08s
7:	learn: 0.0000315	total: 47.6ms	remaining: 5.9s
8:	learn: 0.0000315	total: 52.5ms	remaining: 5.78s
9:	learn: 0.0000315	total: 57.4ms	remaining: 5.68s
10:	learn: 0.0000315	total: 62.4ms	remaining: 5.61s
11:	learn: 0.0000315	total: 67.6ms	remaining: 5.57s
12:	learn: 0.0000315	total: 72.6ms	remaining: 5.51s
13:	learn: 0.0000315	total: 77.4ms	remaining: 5.45s
14:	learn: 0.0000315	total: 82.1ms	remaining: 5.39s
15:	learn: 0.0000315	total: 86.8ms	remaining: 5.33s
16:	learn: 0.0000315	total: 91.9ms	remaining: 5.31s
17:	learn: 0.0000315	total: 97ms	remaining: 5.29s
18:	learn: 0.0000315	total: 102ms	remaining: 5.27s
19:	learn: 0.0000315	total: 

In [163]:
def CatBoostRegressorOptima(x_train, y_train, x_test, y_test):
    parameters={
            "depth" : [1,3],
           "learning_rate": np.arange(0, 1, 0.5,) }
    search = RandomizedSearchCV(CatBoostRegressor(),param_distributions=parameters )
    
    optimalDecitionTreeReg = search.fit(x_train,y_train)
    y_pred = optimalDecitionTreeReg.predict(x_test)
    print(search.best_params_)
    print(f'MAE: {mean_absolute_error(y_test, y_pred)}')
    print(f'MSE: {mean_squared_error(y_test, y_pred)}')
    print(f'RMSE: {sqrt(mean_squared_error(y_test, y_pred))}')
    print(f'MAPE: {(mean_absolute_percentage_error(y_test, y_pred))}')
    print(f'R^2: {r2_score(y_test, y_pred)}')

In [164]:
CatBoostRegressorOptima(X_train,y_train, X_test, y_test)

0:	learn: 6241.3849220	total: 2.51ms	remaining: 2.51s
1:	learn: 6237.5772037	total: 4.69ms	remaining: 2.34s
2:	learn: 6234.2577011	total: 7.03ms	remaining: 2.34s
3:	learn: 6232.9151777	total: 9.13ms	remaining: 2.27s
4:	learn: 6232.1304165	total: 11.3ms	remaining: 2.25s
5:	learn: 6231.2855075	total: 13.6ms	remaining: 2.24s
6:	learn: 6231.0503324	total: 16.9ms	remaining: 2.39s
7:	learn: 6230.7419636	total: 19.4ms	remaining: 2.41s
8:	learn: 6230.6648684	total: 21.6ms	remaining: 2.38s
9:	learn: 6230.5842252	total: 24.3ms	remaining: 2.4s
10:	learn: 6230.5244812	total: 26.5ms	remaining: 2.39s
11:	learn: 6230.4663353	total: 28.7ms	remaining: 2.36s
12:	learn: 6230.3870906	total: 31.1ms	remaining: 2.36s
13:	learn: 6230.2003108	total: 33.8ms	remaining: 2.38s
14:	learn: 6230.0262646	total: 36.3ms	remaining: 2.38s
15:	learn: 6230.0123127	total: 39.2ms	remaining: 2.41s
16:	learn: 6229.9053014	total: 42.1ms	remaining: 2.43s
17:	learn: 6229.7562291	total: 44.5ms	remaining: 2.43s
18:	learn: 6229.71789