In [66]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor 
from sklearn.tree import DecisionTreeClassifier 
from math import sqrt
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, r2_score
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
import warnings
from imblearn.under_sampling import RandomUnderSampler
from imblearn.over_sampling import RandomOverSampler
warnings.filterwarnings('ignore')

In [67]:
data_regress= pd.read_csv("../data/trip_duration_task_m.csv")
data_regress.drop(["Unnamed: 0"], axis=1, inplace=True)
data_regress

Unnamed: 0,pickup_longitude,pickup_latitude,dropoff_longitude,dropoff_latitude,trip_duration
0,-73.953918,40.778873,-73.963875,40.771164,400
1,-73.988312,40.731743,-73.994751,40.694931,1100
2,-73.997314,40.721458,-73.948029,40.774918,1635
3,-73.961670,40.759720,-73.956779,40.780628,1141
4,-74.017120,40.708469,-73.988182,40.740631,848
...,...,...,...,...,...
199489,-73.978088,40.751461,-73.964417,40.764450,565
199490,-73.988548,40.721390,-73.998604,40.693054,800
199491,-73.987770,40.732391,-73.971451,40.760799,472
199492,-73.870796,40.773720,-73.988571,40.721371,2163


In [68]:
y = data_regress['trip_duration']
X = data_regress.drop(['trip_duration'], axis=1)

In [69]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=3)

In [70]:
def DecisionTreeRegressorOptimal(x_train, y_train, x_test, y_test):
    parameters={"splitter":["best","random"],
            "max_depth" : [1,3,5,7,9,11,12],
           "min_samples_leaf":[1,2,3,4,5,6,7,8,9,10],
           "min_weight_fraction_leaf":[0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9],
           "max_features":["auto","log2","sqrt",None],
           "max_leaf_nodes":[None,10,20,30,40,50,60,70,80,90] }
    search = RandomizedSearchCV(DecisionTreeRegressor(),param_distributions=parameters, scoring="neg_mean_squared_error", )
    
    optimalDecitionTreeReg = search.fit(x_train,y_train)
    y_pred = optimalDecitionTreeReg.predict(x_test)
    print(search.best_params_)
    print(f'MAE: {mean_absolute_error(y_test, y_pred)}')
    print(f'MSE: {mean_squared_error(y_test, y_pred)}')
    print(f'RMSE: {sqrt(mean_squared_error(y_test, y_pred))}')
    print(f'MAPE: {(mean_absolute_percentage_error(y_test, y_pred))}')
    print(f'R^2: {r2_score(y_test, y_pred)}')

In [71]:
DecisionTreeRegressorOptimal(X_train,y_train, X_test, y_test)

{'splitter': 'best', 'min_weight_fraction_leaf': 0.3, 'min_samples_leaf': 7, 'max_leaf_nodes': 40, 'max_features': 'log2', 'max_depth': 1}
MAE: 619.5816312316038
MSE: 9971403.960967967
RMSE: 3157.7529923931615
MAPE: 1.554404604086877
R^2: 0.0007697208980904646


In [72]:
data_class = pd.read_csv("../data/csgo_task_m.csv")
data_class.drop(["Unnamed: 0"], axis=1, inplace=True)
data_class.head()

Unnamed: 0,time_left,ct_score,t_score,bomb_planted,ct_health,t_health,ct_money,t_money,ct_players_alive,t_players_alive,map_de_cache,map_de_dust2,map_de_inferno,map_de_mirage,map_de_nuke,map_de_overpass,map_de_train,map_de_vertigo
0,175.0,0.0,0.0,1,500.0,500.0,4000.0,4000.0,5.0,5.0,0,1,0,0,0,0,0,0
1,156.03,0.0,0.0,1,500.0,500.0,600.0,650.0,5.0,5.0,0,1,0,0,0,0,0,0
2,96.03,0.0,0.0,1,391.0,400.0,750.0,500.0,4.0,4.0,0,1,0,0,0,0,0,0
3,76.03,0.0,0.0,1,391.0,400.0,750.0,500.0,4.0,4.0,0,1,0,0,0,0,0,0
4,174.97,1.0,0.0,1,500.0,500.0,18350.0,10750.0,5.0,5.0,0,1,0,0,0,0,0,0


In [73]:
X = data_class.values
y = data_class["bomb_planted"].values

In [74]:
overSampler = RandomOverSampler(random_state=42)
X_over_sample, y_over_sample = overSampler.fit_resample(X,y)

In [75]:
X_train, X_test, y_train, y_test = train_test_split(X_over_sample, y_over_sample, test_size=0.2, random_state=4)

In [76]:
def DecisionTreeClassifierOptimal(x_train, y_train, x_test, y_test):
    parameters={"splitter":["best","random"],
            "max_depth" : [1,3,5,7,9,11,12],
           "min_samples_leaf":[1,2,3,4,5,6,7,8,9,10],
           "min_weight_fraction_leaf":[0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9],
           "max_features":["auto","log2","sqrt",None],
           "max_leaf_nodes":[None,10,20,30,40,50,60,70,80,90] }
    search = RandomizedSearchCV(DecisionTreeClassifier(),param_distributions=parameters, scoring="neg_mean_squared_error", )
    optimalDecitionTreeClass = search.fit(x_train,y_train)
    predictions = optimalDecitionTreeClass.predict(x_test)
    print(search.best_params_)
    print(accuracy_score(y_test, predictions))
    print(confusion_matrix(y_test, predictions))
    print(precision_score(y_test, predictions))
    print(recall_score(y_test, predictions))
    print(f1_score(y_test, predictions))
    

In [77]:
DecisionTreeClassifierOptimal(X_train,y_train, X_test, y_test)

{'splitter': 'best', 'min_weight_fraction_leaf': 0.1, 'min_samples_leaf': 5, 'max_leaf_nodes': None, 'max_features': None, 'max_depth': 1}
1.0
[[19769     0]
 [    0 19937]]
1.0
1.0
1.0
