In [162]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
import pickle
import matplotlib.pyplot as plt
from sklearn.metrics import r2_score
from rfpimp import permutation_importances
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

In [164]:
# In order to get reproducible results

# Seed value (can actually be different for each attribution step)
seed_value= 0

# 1. Set `PYTHONHASHSEED` environment variable at a fixed value
import os
os.environ['PYTHONHASHSEED']=str(seed_value)

# 2. Set `python` built-in pseudo-random generator at a fixed value
import random
random.seed(seed_value)

# 3. Set `numpy` pseudo-random generator at a fixed value
import numpy as np
np.random.seed(seed_value)

In [165]:
def load_data():
    
    def dummie_and_drop(df, name):
        # Creates a dummy variable, concatenates it and finally drops the original categorical variable.
        # In order not to have redundant variables, one of the dummy variables is dropped too
        dummies = pd.get_dummies(df[name]).rename(columns = lambda x: name + '_' + str(x))
        dummies = dummies.drop(dummies.columns[-1], axis = 1)
        df = pd.concat([df, dummies], axis = 1)
        df.drop(columns = [name], inplace=True, axis=1)

        return df
    
    def convert_to_categorical(df, categorical_variables, categories, need_pickup = True):
        """ 
        The dataframe's selected variables are converted to categorical, and each variable's categories are also specified.
        It is also specified if the "pickup community area" has to be converted into categorical or no. If it is not 
        converted into categorical it is because it's not going to be used in the model.            
        """
        
        if need_pickup:
            begin = 0
        else:
            df.drop(columns = ['pickup_community_area'], inplace = True, axis = 1)
            begin = 1
        
        for i in range(begin, len(categorical_variables)):
            df[categorical_variables[i]] = df[categorical_variables[i]].astype('category').cat.set_categories(categories[i])
        return df
    
    
    def load(name, need_pickup = False, drop_correlated = False):
    
        # This parameter has to be set to True if the "pickup_community_area" variable is needed in the model
        

        # Load needed dataset and choose the useful columns
        df = pd.read_csv(name) #'dataset_train.csv')

        x = df[['pickup_community_area' ,'temperature', 'relative_humidity', 'wind_direction', 'wind_speed', 'precipitation_cat', 
                'sky_level', 'daytype', 'Day Name', 'Month', 'Hour', 'Fare Last Month', 'Trips Last Hour',
                'Trips Last Week (Same Hour)', 'Trips 2 Weeks Ago (Same Hour)', 'Year']]
#        float32=['temperature','relative_humidity','wind_direction','wind_speed','Fare Last Month', 'Trips Last Hour',
#                'Trips Last Week (Same Hour)', 'Trips 2 Weeks Ago (Same Hour)']
#        x= x[float32]=x[float32].astype('float32')
        # Convert the categorical variables
        categorical_variables = ['pickup_community_area', 'daytype', 'sky_level', 'Day Name', 'Month','Hour', 'Year']
        categories = [[*(range(1,78))], ['U', 'W', 'A'], ['OVC', 'BKN', 'SCT', 'FEW', 'CLR', 'VV '], 
                      ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'], 
                      [*(range(1,13))], [*(range(0, 24))], [2017, 2018, 2019]]

        
        
        x = convert_to_categorical(x, categorical_variables, categories, need_pickup = need_pickup)

        float32=['temperature','relative_humidity','wind_direction','wind_speed','Fare Last Month', 'Trips Last Hour',
                'Trips Last Week (Same Hour)', 'Trips 2 Weeks Ago (Same Hour)']
        
        x[float32]=x[float32].astype('float32')
        # Make dummy variables with the categorical ones
        if need_pickup:
            begin = 0
        else:
            begin = 1
        for i in range(begin, len(categorical_variables)):
            x = dummie_and_drop(x, name = categorical_variables[i])

        y = df['Trips'].to_numpy()

        if need_pickup == False:
            # If we don't need the pickup, it means this is Neural Network case. Therefore we have to modify Y, in order
            # to have "n_areas" outputs per input (because there are "n_areas" regressions per input)
            n_areas = 77
            y = np.reshape(y, [-1, n_areas]) # If 
        
        if drop_correlated:
            x.drop(columns = ['Trips Last Week (Same Hour)'], inplace = True, axis = 1)
            x.drop(columns = ['Trips 2 Weeks Ago (Same Hour)'], inplace = True, axis = 1)

#        x = x.to_numpy()
        
        return (x,y)   
    
# ------------------------------------- MAIN PROGRAM ------------------------

    need_pickup = True 
    drop_correlated = False
    
    
    name_train = 'dataset_train.csv'
#    name_test = 'dataset_test.csv'
    x, y = load(name_train, need_pickup, drop_correlated)
#    x_test, y_test = load(name_test, need_pickup, drop_correlated)
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.15,shuffle=True)
    
    return (x_train, x_test, y_train, y_test)

In [166]:
x_train, x_test, y_train, y_test=load_data()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [167]:
#No. of traning rows and col 
x_train.shape

(1584937, 134)

### Traning the data using random forest regressor and getting standard hyperparameters

In [168]:
rf1 = RandomForestRegressor(n_estimators = 128,bootstrap=True,min_samples_leaf=8,oob_score=True,n_jobs=-1,max_features=0.5,verbose=4)

In [169]:
rf1=rf1.fit(x_train, y_train)

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


building tree 1 of 128
building tree 2 of 128
building tree 3 of 128
building tree 4 of 128
building tree 5 of 128
building tree 6 of 128
building tree 7 of 128
building tree 8 of 128building tree 9 of 128
building tree 10 of 128

building tree 11 of 128
building tree 12 of 128
building tree 13 of 128
building tree 14 of 128
building tree 15 of 128
building tree 16 of 128
building tree 17 of 128
building tree 18 of 128
building tree 19 of 128building tree 20 of 128

building tree 21 of 128
building tree 22 of 128
building tree 23 of 128
building tree 24 of 128
building tree 25 of 128
building tree 26 of 128
building tree 27 of 128
building tree 28 of 128
building tree 29 of 128
building tree 30 of 128
building tree 31 of 128
building tree 32 of 128
building tree 33 of 128
building tree 34 of 128
building tree 35 of 128
building tree 36 of 128
building tree 37 of 128
building tree 38 of 128
building tree 39 of 128
building tree 40 of 128
building tree 41 of 128
building tree 42 of 128
b

[Parallel(n_jobs=-1)]: Done  58 tasks      | elapsed:  4.0min


building tree 78 of 128
building tree 79 of 128
building tree 80 of 128
building tree 81 of 128
building tree 82 of 128
building tree 83 of 128
building tree 84 of 128
building tree 85 of 128
building tree 86 of 128
building tree 87 of 128
building tree 88 of 128
building tree 89 of 128
building tree 90 of 128
building tree 91 of 128
building tree 92 of 128
building tree 93 of 128
building tree 94 of 128
building tree 95 of 128
building tree 96 of 128
building tree 97 of 128
building tree 98 of 128
building tree 99 of 128
building tree 100 of 128
building tree 101 of 128
building tree 102 of 128
building tree 103 of 128
building tree 104 of 128
building tree 105 of 128
building tree 106 of 128
building tree 107 of 128
building tree 108 of 128
building tree 109 of 128
building tree 110 of 128
building tree 111 of 128
building tree 112 of 128
building tree 113 of 128
building tree 114 of 128
building tree 115 of 128
building tree 116 of 128
building tree 117 of 128
building tree 118 of 1

[Parallel(n_jobs=-1)]: Done 122 out of 128 | elapsed:  8.3min remaining:   24.4s
[Parallel(n_jobs=-1)]: Done 128 out of 128 | elapsed:  8.4min finished


In [170]:
def rmse(predictions, targets): 

    return np.sqrt(((predictions - targets) ** 2).mean())

In [171]:
def metric(rf):
    
    print("Traning Score")
    print(rf.score(x_train,y_train))
    print("Test Score")
    print(rf.score(x_test,y_test))
    print("MAE Train")
    train=np.rint(rf.predict(x_train))
    test=np.rint(rf.predict(x_test))
    print(mean_absolute_error(train, y_train))
    print("MAE Test")
    print(mean_absolute_error(test, y_test))

    print("MSE Train")
    print(mean_squared_error(train, y_train))
    print("MSE Test")
    print(mean_squared_error(test, y_test))
    print("RMSE Train")
    print(rmse(train,y_train))
    print("RMSE Test")
    print(rmse(test,y_test))

In [172]:
metric(rf1)

Traning Score


[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  58 tasks      | elapsed:   36.3s
[Parallel(n_jobs=20)]: Done 122 out of 128 | elapsed:  1.5min remaining:    4.2s
[Parallel(n_jobs=20)]: Done 128 out of 128 | elapsed:  1.5min finished


0.9728923616048942
Test Score


[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  58 tasks      | elapsed:    7.0s
[Parallel(n_jobs=20)]: Done 122 out of 128 | elapsed:   15.5s remaining:    0.7s
[Parallel(n_jobs=20)]: Done 128 out of 128 | elapsed:   15.6s finished


0.9575393705894802
MAE Train


[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  58 tasks      | elapsed:   34.2s
[Parallel(n_jobs=20)]: Done 122 out of 128 | elapsed:  1.4min remaining:    4.1s
[Parallel(n_jobs=20)]: Done 128 out of 128 | elapsed:  1.4min finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  58 tasks      | elapsed:    6.0s
[Parallel(n_jobs=20)]: Done 122 out of 128 | elapsed:   15.2s remaining:    0.7s
[Parallel(n_jobs=20)]: Done 128 out of 128 | elapsed:   15.2s finished


0.756935449169273
MAE Test
0.9653265163839182
MSE Train
7.25883047717354
MSE Test
11.441259228802803
RMSE Train
2.69422168300486
RMSE Test
3.3824930493354755


In [173]:
#Printing the Important features
def feature_Imp(rf):
    importances = rf1.feature_importances_
    std = np.std([tree.feature_importances_ for tree in rf.estimators_],
                 axis=0)
    indices = np.argsort(importances)[::-1]

    # Print the feature ranking
    print("Feature ranking:")

    for f in range(x_train.shape[1]):
        print("%d. feature %d (%f)" % (f + 1, indices[f], importances[indices[f]]))
    
    print("Top 10 Feature")
    print(x_train.columns[indices[:10]])
    l=len(indices)
    print("Least Important 10 Feature")
    print(x_train.columns[indices[(l-10):]])

In [174]:
feature_Imp(rf1)

Feature ranking:
1. feature 6 (0.528896)
2. feature 7 (0.265902)
3. feature 8 (0.155177)
4. feature 5 (0.018457)
5. feature 16 (0.006476)
6. feature 40 (0.003958)
7. feature 1 (0.002112)
8. feature 0 (0.001901)
9. feature 116 (0.001743)
10. feature 2 (0.001474)
11. feature 36 (0.001401)
12. feature 117 (0.001308)
13. feature 3 (0.001251)
14. feature 120 (0.000841)
15. feature 86 (0.000738)
16. feature 85 (0.000655)
17. feature 132 (0.000526)
18. feature 113 (0.000395)
19. feature 114 (0.000392)
20. feature 125 (0.000390)
21. feature 129 (0.000388)
22. feature 126 (0.000374)
23. feature 84 (0.000341)
24. feature 110 (0.000300)
25. feature 97 (0.000292)
26. feature 133 (0.000249)
27. feature 90 (0.000249)
28. feature 14 (0.000206)
29. feature 128 (0.000199)
30. feature 92 (0.000183)
31. feature 115 (0.000165)
32. feature 127 (0.000161)
33. feature 131 (0.000158)
34. feature 109 (0.000154)
35. feature 41 (0.000132)
36. feature 96 (0.000129)
37. feature 100 (0.000125)
38. feature 95 (0.000

### Traning the data after removing the correlated col

In [177]:
def load_data():
    
    def dummie_and_drop(df, name):
        # Creates a dummy variable, concatenates it and finally drops the original categorical variable.
        # In order not to have redundant variables, one of the dummy variables is dropped too
        dummies = pd.get_dummies(df[name]).rename(columns = lambda x: name + '_' + str(x))
        dummies = dummies.drop(dummies.columns[-1], axis = 1)
        df = pd.concat([df, dummies], axis = 1)
        df.drop(columns = [name], inplace=True, axis=1)

        return df
    
    def convert_to_categorical(df, categorical_variables, categories, need_pickup = True):
        """ 
        The dataframe's selected variables are converted to categorical, and each variable's categories are also specified.
        It is also specified if the "pickup community area" has to be converted into categorical or no. If it is not 
        converted into categorical it is because it's not going to be used in the model.            
        """
        
        if need_pickup:
            begin = 0
        else:
            df.drop(columns = ['pickup_community_area'], inplace = True, axis = 1)
            begin = 1
        
        for i in range(begin, len(categorical_variables)):
            df[categorical_variables[i]] = df[categorical_variables[i]].astype('category').cat.set_categories(categories[i])
        return df
    
    
    def load(name, need_pickup = False, drop_correlated = False):
    
        # This parameter has to be set to True if the "pickup_community_area" variable is needed in the model
        

        # Load needed dataset and choose the useful columns
        df = pd.read_csv(name) #'dataset_train.csv')

        x = df[['pickup_community_area' ,'temperature', 'relative_humidity', 'wind_direction', 'wind_speed', 'precipitation_cat', 
                'sky_level', 'daytype', 'Day Name', 'Month', 'Hour', 'Fare Last Month', 'Trips Last Hour',
                'Trips Last Week (Same Hour)', 'Trips 2 Weeks Ago (Same Hour)', 'Year']]
#        float32=['temperature','relative_humidity','wind_direction','wind_speed','Fare Last Month', 'Trips Last Hour',
#                'Trips Last Week (Same Hour)', 'Trips 2 Weeks Ago (Same Hour)']
#        x= x[float32]=x[float32].astype('float32')
        # Convert the categorical variables
        categorical_variables = ['pickup_community_area', 'daytype', 'sky_level', 'Day Name', 'Month','Hour', 'Year']
        categories = [[*(range(1,78))], ['U', 'W', 'A'], ['OVC', 'BKN', 'SCT', 'FEW', 'CLR', 'VV '], 
                      ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'], 
                      [*(range(1,13))], [*(range(0, 24))], [2017, 2018, 2019]]

        
        
        x = convert_to_categorical(x, categorical_variables, categories, need_pickup = need_pickup)

        float32=['temperature','relative_humidity','wind_direction','wind_speed','Fare Last Month', 'Trips Last Hour',
                'Trips Last Week (Same Hour)', 'Trips 2 Weeks Ago (Same Hour)']
        
        x[float32]=x[float32].astype('float32')
        # Make dummy variables with the categorical ones
        if need_pickup:
            begin = 0
        else:
            begin = 1
        for i in range(begin, len(categorical_variables)):
            x = dummie_and_drop(x, name = categorical_variables[i])

        y = df['Trips'].to_numpy()

        if need_pickup == False:
            # If we don't need the pickup, it means this is Neural Network case. Therefore we have to modify Y, in order
            # to have "n_areas" outputs per input (because there are "n_areas" regressions per input)
            n_areas = 77
            y = np.reshape(y, [-1, n_areas]) # If 
        
        if drop_correlated:
            x.drop(columns = ['Trips Last Week (Same Hour)'], inplace = True, axis = 1)
            x.drop(columns = ['Trips 2 Weeks Ago (Same Hour)'], inplace = True, axis = 1)

#        x = x.to_numpy()
        
        return (x,y)   
    
# ------------------------------------- MAIN PROGRAM ------------------------

    need_pickup = True 
    drop_correlated = True
    
    
    name_train = 'dataset_train.csv'
#    name_test = 'dataset_test.csv'
    x, y = load(name_train, need_pickup, drop_correlated)
#    x_test, y_test = load(name_test, need_pickup, drop_correlated)
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.15,shuffle=True)
    
    return (x_train, x_test, y_train, y_test)

In [178]:
x_train, x_test, y_train, y_test=load_data()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[k1] = value[k2]


In [1]:
#Setting the standard hyperparameters

In [179]:
rf2 = RandomForestRegressor(n_estimators = 128,bootstrap=True,min_samples_leaf=8,oob_score=True,n_jobs=-1,max_features=0.5,verbose=4)

rf2=rf2.fit(x_train, y_train)

rf2.score(x_train,y_train)

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


building tree 1 of 128
building tree 2 of 128
building tree 3 of 128
building tree 4 of 128
building tree 5 of 128
building tree 6 of 128
building tree 7 of 128
building tree 8 of 128
building tree 9 of 128
building tree 10 of 128
building tree 11 of 128
building tree 12 of 128
building tree 13 of 128
building tree 14 of 128
building tree 15 of 128
building tree 16 of 128
building tree 17 of 128
building tree 18 of 128
building tree 19 of 128
building tree 20 of 128
building tree 21 of 128
building tree 22 of 128
building tree 23 of 128
building tree 24 of 128
building tree 25 of 128
building tree 26 of 128
building tree 27 of 128
building tree 28 of 128
building tree 29 of 128
building tree 30 of 128
building tree 31 of 128
building tree 32 of 128
building tree 33 of 128
building tree 34 of 128
building tree 35 of 128
building tree 36 of 128
building tree 37 of 128
building tree 38 of 128
building tree 39 of 128
building tree 40 of 128
building tree 41 of 128
building tree 42 of 128
b

[Parallel(n_jobs=-1)]: Done  58 tasks      | elapsed:  4.3min


building tree 78 of 128
building tree 79 of 128
building tree 80 of 128
building tree 81 of 128
building tree 82 of 128
building tree 83 of 128
building tree 84 of 128
building tree 85 of 128
building tree 86 of 128
building tree 87 of 128
building tree 88 of 128
building tree 89 of 128
building tree 90 of 128
building tree 91 of 128
building tree 92 of 128
building tree 93 of 128
building tree 94 of 128
building tree 95 of 128
building tree 96 of 128
building tree 97 of 128
building tree 98 of 128
building tree 99 of 128
building tree 100 of 128
building tree 101 of 128
building tree 102 of 128
building tree 103 of 128
building tree 104 of 128
building tree 105 of 128
building tree 106 of 128
building tree 107 of 128
building tree 108 of 128
building tree 109 of 128
building tree 110 of 128
building tree 111 of 128
building tree 112 of 128
building tree 113 of 128
building tree 114 of 128
building tree 115 of 128
building tree 116 of 128
building tree 117 of 128
building tree 118 of 1

[Parallel(n_jobs=-1)]: Done 122 out of 128 | elapsed:  8.7min remaining:   25.6s
[Parallel(n_jobs=-1)]: Done 128 out of 128 | elapsed:  8.9min finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  58 tasks      | elapsed:   36.1s
[Parallel(n_jobs=20)]: Done 122 out of 128 | elapsed:  1.5min remaining:    4.4s
[Parallel(n_jobs=20)]: Done 128 out of 128 | elapsed:  1.5min finished


0.9726030551218093

In [180]:
metric(rf2)

Traning Score


[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  58 tasks      | elapsed:   36.8s
[Parallel(n_jobs=20)]: Done 122 out of 128 | elapsed:  1.5min remaining:    4.4s
[Parallel(n_jobs=20)]: Done 128 out of 128 | elapsed:  1.5min finished


0.9726030551218093
Test Score


[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  58 tasks      | elapsed:    6.5s
[Parallel(n_jobs=20)]: Done 122 out of 128 | elapsed:   16.0s remaining:    0.7s
[Parallel(n_jobs=20)]: Done 128 out of 128 | elapsed:   16.0s finished


0.9582400738887416
MAE Train


[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  58 tasks      | elapsed:   36.2s
[Parallel(n_jobs=20)]: Done 122 out of 128 | elapsed:  1.5min remaining:    4.4s
[Parallel(n_jobs=20)]: Done 128 out of 128 | elapsed:  1.5min finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  58 tasks      | elapsed:    6.7s
[Parallel(n_jobs=20)]: Done 122 out of 128 | elapsed:   16.0s remaining:    0.7s
[Parallel(n_jobs=20)]: Done 128 out of 128 | elapsed:   16.1s finished


0.7648682565931643
MAE Test
0.9419725057652085
MSE Train
7.365646710247789
MSE Test
10.999520906701942
RMSE Train
2.7139724962216896
RMSE Test
3.316552563536713


## We can clearly see that the metric got worse after dropping the correlated
## Therefore we will not drop them in future

In [181]:
def load_data():
    
    def dummie_and_drop(df, name):
        # Creates a dummy variable, concatenates it and finally drops the original categorical variable.
        # In order not to have redundant variables, one of the dummy variables is dropped too
        dummies = pd.get_dummies(df[name]).rename(columns = lambda x: name + '_' + str(x))
        dummies = dummies.drop(dummies.columns[-1], axis = 1)
        df = pd.concat([df, dummies], axis = 1)
        df.drop(columns = [name], inplace=True, axis=1)

        return df
    
    def convert_to_categorical(df, categorical_variables, categories, need_pickup = True):
        """ 
        The dataframe's selected variables are converted to categorical, and each variable's categories are also specified.
        It is also specified if the "pickup community area" has to be converted into categorical or no. If it is not 
        converted into categorical it is because it's not going to be used in the model.            
        """
        
        if need_pickup:
            begin = 0
        else:
            df.drop(columns = ['pickup_community_area'], inplace = True, axis = 1)
            begin = 1
        
        for i in range(begin, len(categorical_variables)):
            df[categorical_variables[i]] = df[categorical_variables[i]].astype('category').cat.set_categories(categories[i])
        return df
    
    
    def load(name, need_pickup = False, drop_correlated = False):
    
        # This parameter has to be set to True if the "pickup_community_area" variable is needed in the model
        

        # Load needed dataset and choose the useful columns
        df = pd.read_csv(name) #'dataset_train.csv')

        x = df[['pickup_community_area' ,'temperature', 'relative_humidity', 'wind_direction', 'wind_speed', 'precipitation_cat', 
                'sky_level', 'daytype', 'Day Name', 'Month', 'Hour', 'Fare Last Month', 'Trips Last Hour',
                'Trips Last Week (Same Hour)', 'Trips 2 Weeks Ago (Same Hour)', 'Year']]
#        float32=['temperature','relative_humidity','wind_direction','wind_speed','Fare Last Month', 'Trips Last Hour',
#                'Trips Last Week (Same Hour)', 'Trips 2 Weeks Ago (Same Hour)']
#        x= x[float32]=x[float32].astype('float32')
        # Convert the categorical variables
        categorical_variables = ['pickup_community_area', 'daytype', 'sky_level', 'Day Name', 'Month','Hour', 'Year']
        categories = [[*(range(1,78))], ['U', 'W', 'A'], ['OVC', 'BKN', 'SCT', 'FEW', 'CLR', 'VV '], 
                      ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'], 
                      [*(range(1,13))], [*(range(0, 24))], [2017, 2018, 2019]]

        
        
        x = convert_to_categorical(x, categorical_variables, categories, need_pickup = need_pickup)

        float32=['temperature','relative_humidity','wind_direction','wind_speed','Fare Last Month', 'Trips Last Hour',
                'Trips Last Week (Same Hour)', 'Trips 2 Weeks Ago (Same Hour)']
        
        x[float32]=x[float32].astype('float32')
        # Make dummy variables with the categorical ones
        if need_pickup:
            begin = 0
        else:
            begin = 1
        for i in range(begin, len(categorical_variables)):
            x = dummie_and_drop(x, name = categorical_variables[i])

        y = df['Trips'].to_numpy()

        if need_pickup == False:
            # If we don't need the pickup, it means this is Neural Network case. Therefore we have to modify Y, in order
            # to have "n_areas" outputs per input (because there are "n_areas" regressions per input)
            n_areas = 77
            y = np.reshape(y, [-1, n_areas]) # If 
        
        if drop_correlated:
            x.drop(columns = ['Trips Last Week (Same Hour)'], inplace = True, axis = 1)
            x.drop(columns = ['Trips 2 Weeks Ago (Same Hour)'], inplace = True, axis = 1)

#        x = x.to_numpy()
        
        return (x,y)   
    
# ------------------------------------- MAIN PROGRAM ------------------------

    need_pickup = True 
    drop_correlated = False
    
    
    name_train = 'dataset_train.csv'
#    name_test = 'dataset_test.csv'
    x, y = load(name_train, need_pickup, drop_correlated)
#    x_test, y_test = load(name_test, need_pickup, drop_correlated)
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.15,shuffle=True)
    
    return (x_train, x_test, y_train, y_test)

In [182]:
x_train, x_test, y_train, y_test=load_data()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[k1] = value[k2]


# Setting diffrent hyper-parameters to get the best metric.

In [183]:
rf3 = RandomForestRegressor(n_estimators = 256,bootstrap=True,min_samples_leaf=64,oob_score=True,n_jobs=-1,
                           max_features='sqrt',verbose=4,min_samples_split=64 )

rf3=rf3.fit(x_train, y_train)


[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


building tree 1 of 256
building tree 2 of 256
building tree 3 of 256
building tree 4 of 256building tree 5 of 256
building tree 6 of 256

building tree 7 of 256
building tree 8 of 256
building tree 9 of 256building tree 10 of 256building tree 11 of 256
building tree 12 of 256

building tree 13 of 256

building tree 14 of 256
building tree 15 of 256
building tree 16 of 256
building tree 17 of 256
building tree 18 of 256building tree 19 of 256
building tree 20 of 256

building tree 21 of 256
building tree 22 of 256
building tree 23 of 256
building tree 24 of 256
building tree 25 of 256
building tree 26 of 256
building tree 27 of 256
building tree 28 of 256
building tree 29 of 256
building tree 30 of 256
building tree 31 of 256
building tree 32 of 256
building tree 33 of 256
building tree 34 of 256
building tree 35 of 256
building tree 36 of 256
building tree 37 of 256
building tree 38 of 256
building tree 39 of 256
building tree 40 of 256
building tree 41 of 256
building tree 42 of 256
b

[Parallel(n_jobs=-1)]: Done  58 tasks      | elapsed:  1.3min


building tree 79 of 256
building tree 80 of 256
building tree 81 of 256
building tree 82 of 256
building tree 83 of 256
building tree 84 of 256
building tree 85 of 256
building tree 86 of 256
building tree 87 of 256
building tree 88 of 256
building tree 89 of 256
building tree 90 of 256
building tree 91 of 256
building tree 92 of 256
building tree 93 of 256
building tree 94 of 256
building tree 95 of 256
building tree 96 of 256
building tree 97 of 256
building tree 98 of 256
building tree 99 of 256
building tree 100 of 256
building tree 101 of 256
building tree 102 of 256
building tree 103 of 256
building tree 104 of 256
building tree 105 of 256
building tree 106 of 256
building tree 107 of 256
building tree 108 of 256
building tree 109 of 256
building tree 110 of 256
building tree 111 of 256
building tree 112 of 256
building tree 113 of 256
building tree 114 of 256
building tree 115 of 256
building tree 116 of 256
building tree 117 of 256
building tree 118 of 256
building tree 119 of 

[Parallel(n_jobs=-1)]: Done 181 tasks      | elapsed:  4.1min


building tree 201 of 256
building tree 202 of 256
building tree 203 of 256
building tree 204 of 256
building tree 205 of 256
building tree 206 of 256
building tree 207 of 256
building tree 208 of 256
building tree 209 of 256
building tree 210 of 256
building tree 211 of 256
building tree 212 of 256
building tree 213 of 256
building tree 214 of 256
building tree 215 of 256
building tree 216 of 256
building tree 217 of 256
building tree 218 of 256
building tree 219 of 256
building tree 220 of 256
building tree 221 of 256
building tree 222 of 256
building tree 223 of 256
building tree 224 of 256
building tree 225 of 256
building tree 226 of 256
building tree 227 of 256
building tree 228 of 256
building tree 229 of 256
building tree 230 of 256
building tree 231 of 256
building tree 232 of 256
building tree 233 of 256
building tree 234 of 256
building tree 235 of 256
building tree 236 of 256
building tree 237 of 256
building tree 238 of 256
building tree 239 of 256
building tree 240 of 256


[Parallel(n_jobs=-1)]: Done 256 out of 256 | elapsed:  5.5min finished


In [184]:
metric(rf3)

Traning Score


[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  58 tasks      | elapsed:   26.0s
[Parallel(n_jobs=20)]: Done 181 tasks      | elapsed:  1.7min
[Parallel(n_jobs=20)]: Done 256 out of 256 | elapsed:  2.3min finished


0.9386046293730707
Test Score


[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  58 tasks      | elapsed:    4.4s
[Parallel(n_jobs=20)]: Done 181 tasks      | elapsed:   17.6s
[Parallel(n_jobs=20)]: Done 256 out of 256 | elapsed:   24.3s finished


0.937499777107107
MAE Train


[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  58 tasks      | elapsed:   24.9s
[Parallel(n_jobs=20)]: Done 181 tasks      | elapsed:  1.7min
[Parallel(n_jobs=20)]: Done 256 out of 256 | elapsed:  2.3min finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  58 tasks      | elapsed:    5.3s
[Parallel(n_jobs=20)]: Done 181 tasks      | elapsed:   17.6s


1.0629368864503763
MAE Test
1.0656965623268202
MSE Train
16.46358435698075
MSE Test
16.405455943080856
RMSE Train
4.0575342705861095
RMSE Test
4.050364914804696


[Parallel(n_jobs=20)]: Done 256 out of 256 | elapsed:   24.3s finished


In [185]:
rf4 = RandomForestRegressor(n_estimators = 256,bootstrap=True,min_samples_leaf=64,oob_score=True,n_jobs=-1,
                           max_features=0.4,verbose=4,min_samples_split=64 )
rf4=rf4.fit(x_train, y_train)


[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


building tree 1 of 256building tree 2 of 256
building tree 3 of 256
building tree 4 of 256
building tree 5 of 256
building tree 6 of 256
building tree 7 of 256building tree 8 of 256


building tree 9 of 256
building tree 10 of 256
building tree 11 of 256
building tree 12 of 256
building tree 13 of 256
building tree 14 of 256building tree 15 of 256

building tree 16 of 256
building tree 17 of 256building tree 18 of 256
building tree 19 of 256
building tree 20 of 256

building tree 21 of 256
building tree 22 of 256
building tree 23 of 256
building tree 24 of 256
building tree 25 of 256
building tree 26 of 256
building tree 27 of 256
building tree 28 of 256
building tree 29 of 256
building tree 30 of 256
building tree 31 of 256
building tree 32 of 256
building tree 33 of 256
building tree 34 of 256
building tree 35 of 256
building tree 36 of 256
building tree 37 of 256
building tree 38 of 256
building tree 39 of 256
building tree 40 of 256
building tree 41 of 256
building tree 42 of 256
b

[Parallel(n_jobs=-1)]: Done  58 tasks      | elapsed:  3.2min


building tree 79 of 256
building tree 80 of 256
building tree 81 of 256
building tree 82 of 256
building tree 83 of 256
building tree 84 of 256
building tree 85 of 256
building tree 86 of 256
building tree 87 of 256
building tree 88 of 256
building tree 89 of 256
building tree 90 of 256
building tree 91 of 256
building tree 92 of 256
building tree 93 of 256
building tree 94 of 256
building tree 95 of 256
building tree 96 of 256
building tree 97 of 256
building tree 98 of 256
building tree 99 of 256
building tree 100 of 256
building tree 101 of 256
building tree 102 of 256
building tree 103 of 256
building tree 104 of 256
building tree 105 of 256
building tree 106 of 256
building tree 107 of 256
building tree 108 of 256
building tree 109 of 256
building tree 110 of 256
building tree 111 of 256
building tree 112 of 256
building tree 113 of 256
building tree 114 of 256
building tree 115 of 256
building tree 116 of 256
building tree 117 of 256
building tree 118 of 256
building tree 119 of 

[Parallel(n_jobs=-1)]: Done 181 tasks      | elapsed:  9.6min


building tree 201 of 256
building tree 202 of 256
building tree 203 of 256
building tree 204 of 256
building tree 205 of 256
building tree 206 of 256
building tree 207 of 256
building tree 208 of 256
building tree 209 of 256
building tree 210 of 256
building tree 211 of 256
building tree 212 of 256
building tree 213 of 256
building tree 214 of 256
building tree 215 of 256
building tree 216 of 256
building tree 217 of 256
building tree 218 of 256
building tree 219 of 256
building tree 220 of 256
building tree 221 of 256
building tree 222 of 256
building tree 223 of 256
building tree 224 of 256
building tree 225 of 256
building tree 226 of 256
building tree 227 of 256
building tree 228 of 256
building tree 229 of 256
building tree 230 of 256
building tree 231 of 256
building tree 232 of 256
building tree 233 of 256
building tree 234 of 256
building tree 235 of 256
building tree 236 of 256
building tree 237 of 256
building tree 238 of 256
building tree 239 of 256
building tree 240 of 256


[Parallel(n_jobs=-1)]: Done 256 out of 256 | elapsed: 12.7min finished


# Best metric in the hyper-parameters

In [186]:
metric(rf4)

Traning Score


[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  58 tasks      | elapsed:   21.9s
[Parallel(n_jobs=20)]: Done 181 tasks      | elapsed:  1.4min
[Parallel(n_jobs=20)]: Done 256 out of 256 | elapsed:  1.9min finished


0.9536129966789673
Test Score


[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  58 tasks      | elapsed:    4.8s
[Parallel(n_jobs=20)]: Done 181 tasks      | elapsed:   15.0s
[Parallel(n_jobs=20)]: Done 256 out of 256 | elapsed:   20.2s finished


0.9516296617820463
MAE Train


[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  58 tasks      | elapsed:   22.1s
[Parallel(n_jobs=20)]: Done 181 tasks      | elapsed:  1.4min
[Parallel(n_jobs=20)]: Done 256 out of 256 | elapsed:  1.9min finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  58 tasks      | elapsed:    4.3s
[Parallel(n_jobs=20)]: Done 181 tasks      | elapsed:   14.6s


0.979572058700125
MAE Test
0.9962280341085825
MSE Train
12.453671029195482
MSE Test
12.706984393714581
RMSE Train
3.528975917910957
RMSE Test
3.5646857356174584


[Parallel(n_jobs=20)]: Done 256 out of 256 | elapsed:   20.4s finished


In [195]:
feature_Imp(rf4)


Feature ranking:
1. feature 6 (0.528896)
2. feature 7 (0.265902)
3. feature 8 (0.155177)
4. feature 5 (0.018457)
5. feature 16 (0.006476)
6. feature 40 (0.003958)
7. feature 1 (0.002112)
8. feature 0 (0.001901)
9. feature 116 (0.001743)
10. feature 2 (0.001474)
11. feature 36 (0.001401)
12. feature 117 (0.001308)
13. feature 3 (0.001251)
14. feature 120 (0.000841)
15. feature 86 (0.000738)
16. feature 85 (0.000655)
17. feature 132 (0.000526)
18. feature 113 (0.000395)
19. feature 114 (0.000392)
20. feature 125 (0.000390)
21. feature 129 (0.000388)
22. feature 126 (0.000374)
23. feature 84 (0.000341)
24. feature 110 (0.000300)
25. feature 97 (0.000292)
26. feature 133 (0.000249)
27. feature 90 (0.000249)
28. feature 14 (0.000206)
29. feature 128 (0.000199)
30. feature 92 (0.000183)
31. feature 115 (0.000165)
32. feature 127 (0.000161)
33. feature 131 (0.000158)
34. feature 109 (0.000154)
35. feature 41 (0.000132)
36. feature 96 (0.000129)
37. feature 100 (0.000125)
38. feature 95 (0.000

### Saving the model

In [193]:
with open('rf4.pickle', 'wb') as f:
    pickle.dump(rf4, f)



In [187]:
rf5 = RandomForestRegressor(n_estimators =128 ,bootstrap=True,min_samples_leaf=32,oob_score=True,n_jobs=-1,max_features=0.6,verbose=4)

rf5=rf5.fit(x_train, y_train)

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


building tree 1 of 128
building tree 2 of 128
building tree 3 of 128
building tree 4 of 128
building tree 5 of 128
building tree 6 of 128
building tree 7 of 128
building tree 8 of 128
building tree 9 of 128
building tree 10 of 128
building tree 11 of 128
building tree 12 of 128
building tree 13 of 128building tree 14 of 128
building tree 15 of 128
building tree 16 of 128
building tree 17 of 128
building tree 18 of 128building tree 19 of 128building tree 20 of 128



building tree 21 of 128
building tree 22 of 128
building tree 23 of 128
building tree 24 of 128
building tree 25 of 128
building tree 26 of 128
building tree 27 of 128
building tree 28 of 128
building tree 29 of 128
building tree 30 of 128
building tree 31 of 128
building tree 32 of 128
building tree 33 of 128
building tree 34 of 128
building tree 35 of 128
building tree 36 of 128
building tree 37 of 128
building tree 38 of 128
building tree 39 of 128
building tree 40 of 128
building tree 41 of 128
building tree 42 of 128
b

[Parallel(n_jobs=-1)]: Done  58 tasks      | elapsed:  4.3min


building tree 78 of 128
building tree 79 of 128
building tree 80 of 128
building tree 81 of 128
building tree 82 of 128
building tree 83 of 128
building tree 84 of 128
building tree 85 of 128
building tree 86 of 128
building tree 87 of 128
building tree 88 of 128
building tree 89 of 128
building tree 90 of 128
building tree 91 of 128
building tree 92 of 128
building tree 93 of 128
building tree 94 of 128
building tree 95 of 128
building tree 96 of 128
building tree 97 of 128
building tree 98 of 128
building tree 99 of 128
building tree 100 of 128
building tree 101 of 128
building tree 102 of 128
building tree 103 of 128
building tree 104 of 128
building tree 105 of 128
building tree 106 of 128
building tree 107 of 128
building tree 108 of 128
building tree 109 of 128
building tree 110 of 128
building tree 111 of 128
building tree 112 of 128
building tree 113 of 128
building tree 114 of 128
building tree 115 of 128
building tree 116 of 128
building tree 117 of 128
building tree 118 of 1

[Parallel(n_jobs=-1)]: Done 122 out of 128 | elapsed:  9.0min remaining:   26.4s
[Parallel(n_jobs=-1)]: Done 128 out of 128 | elapsed:  9.1min finished


In [188]:
metric(rf5)

Traning Score


[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  58 tasks      | elapsed:   24.6s
[Parallel(n_jobs=20)]: Done 122 out of 128 | elapsed:   59.7s remaining:    2.8s
[Parallel(n_jobs=20)]: Done 128 out of 128 | elapsed:  1.0min finished


0.9590743083041773
Test Score


[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  58 tasks      | elapsed:    4.5s
[Parallel(n_jobs=20)]: Done 122 out of 128 | elapsed:   10.2s remaining:    0.4s
[Parallel(n_jobs=20)]: Done 128 out of 128 | elapsed:   10.5s finished


0.9541365129824101
MAE Train


[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  58 tasks      | elapsed:   24.4s
[Parallel(n_jobs=20)]: Done 122 out of 128 | elapsed:  1.0min remaining:    2.9s
[Parallel(n_jobs=20)]: Done 128 out of 128 | elapsed:  1.0min finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  58 tasks      | elapsed:    4.7s
[Parallel(n_jobs=20)]: Done 122 out of 128 | elapsed:   10.4s remaining:    0.4s
[Parallel(n_jobs=20)]: Done 128 out of 128 | elapsed:   10.6s finished


0.9292325183903208
MAE Test
0.9811151432810741
MSE Train
10.988216566336707
MSE Test
12.052803947156724
RMSE Train
3.314847894902073
RMSE Test
3.471714842431147


In [189]:
rf6 = RandomForestRegressor(n_estimators =128 ,bootstrap=True,min_samples_leaf=32,oob_score=True,n_jobs=-1,max_features='sqrt',verbose=4)

rf6=rf6.fit(x_train, y_train)


[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


building tree 1 of 128
building tree 2 of 128building tree 3 of 128
building tree 4 of 128
building tree 5 of 128
building tree 6 of 128
building tree 7 of 128

building tree 8 of 128
building tree 9 of 128building tree 10 of 128

building tree 11 of 128
building tree 12 of 128
building tree 13 of 128building tree 14 of 128

building tree 15 of 128building tree 16 of 128
building tree 17 of 128

building tree 18 of 128
building tree 19 of 128
building tree 20 of 128
building tree 21 of 128
building tree 22 of 128
building tree 23 of 128
building tree 24 of 128
building tree 25 of 128
building tree 26 of 128
building tree 27 of 128
building tree 28 of 128
building tree 29 of 128
building tree 30 of 128
building tree 31 of 128
building tree 32 of 128
building tree 33 of 128
building tree 34 of 128
building tree 35 of 128
building tree 36 of 128
building tree 37 of 128
building tree 38 of 128
building tree 39 of 128
building tree 40 of 128
building tree 41 of 128
building tree 42 of 128
b

[Parallel(n_jobs=-1)]: Done  58 tasks      | elapsed:  1.4min


building tree 78 of 128
building tree 79 of 128
building tree 80 of 128
building tree 81 of 128
building tree 82 of 128
building tree 83 of 128
building tree 84 of 128
building tree 85 of 128
building tree 86 of 128
building tree 87 of 128
building tree 88 of 128
building tree 89 of 128
building tree 90 of 128
building tree 91 of 128
building tree 92 of 128
building tree 93 of 128
building tree 94 of 128
building tree 95 of 128
building tree 96 of 128
building tree 97 of 128
building tree 98 of 128
building tree 99 of 128
building tree 100 of 128
building tree 101 of 128
building tree 102 of 128
building tree 103 of 128
building tree 104 of 128
building tree 105 of 128
building tree 106 of 128
building tree 107 of 128
building tree 108 of 128
building tree 109 of 128
building tree 110 of 128
building tree 111 of 128
building tree 112 of 128
building tree 113 of 128
building tree 114 of 128
building tree 115 of 128
building tree 116 of 128
building tree 117 of 128
building tree 118 of 1

[Parallel(n_jobs=-1)]: Done 122 out of 128 | elapsed:  2.8min remaining:    8.3s
[Parallel(n_jobs=-1)]: Done 128 out of 128 | elapsed:  2.9min finished


In [190]:
metric(rf6)

Traning Score


[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  58 tasks      | elapsed:   28.4s
[Parallel(n_jobs=20)]: Done 122 out of 128 | elapsed:  1.2min remaining:    3.4s
[Parallel(n_jobs=20)]: Done 128 out of 128 | elapsed:  1.2min finished


0.9456311784690812
Test Score


[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  58 tasks      | elapsed:    5.2s
[Parallel(n_jobs=20)]: Done 122 out of 128 | elapsed:   12.1s remaining:    0.5s
[Parallel(n_jobs=20)]: Done 128 out of 128 | elapsed:   12.4s finished


0.9433632198185233
MAE Train


[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  58 tasks      | elapsed:   28.7s
[Parallel(n_jobs=20)]: Done 122 out of 128 | elapsed:  1.2min remaining:    3.4s
[Parallel(n_jobs=20)]: Done 128 out of 128 | elapsed:  1.2min finished
[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  58 tasks      | elapsed:    5.4s
[Parallel(n_jobs=20)]: Done 122 out of 128 | elapsed:   12.3s remaining:    0.5s


1.0165104354305565
MAE Test
1.031301953914085
MSE Train
14.58442133662095
MSE Test
14.875303455549796
RMSE Train
3.8189555295422006
RMSE Test
3.8568514951382036


[Parallel(n_jobs=20)]: Done 128 out of 128 | elapsed:   12.5s finished


# After changing hyper-parameters many times we found out that these are best for our dataset

n_estimators = 256,bootstrap=True,min_samples_leaf=64,oob_score=True,n_jobs=-1,
                           max_features=0.4,verbose=4,min_samples_split=64 
                           