In [2]:
import numpy as np
import pandas as pd
from sklearn import ensemble 
from sklearn import metrics
from sklearn import model_selection

if __name__ == "__main__":
    
    df = pd.read_csv("train.csv")
    
    X = df.drop("price_range",axis=1).values
    print(X)
    y = df.price_range.values
    print(y)
    
    # define the model
    # n_jobs=-1 => use all cores
    
    classifier = ensemble.RandomForestClassifier(n_jobs=-1)
    param_grid = {
        "n_estimators":np.arange(100,1500,100),
#          "n_estimators":[200],
        "max_depth": np.arange(1,31),
#          "max_depth": [15],
        "criterion": ["gini","entropy"]
#          "criterion": ["entropy"]
    }
    
    # initialize grid search
    # estimator is the model that we have defined
    # pram_grid is the grid of parameters
    # we use accuracy as our metric. 
    #cv = 5  means that we are using 5 fold cv (not stratified) 
    model = model_selection.RandomizedSearchCV(
        estimator = classifier,
        param_distributions= param_grid,
        n_iter = 20,
        scoring="accuracy",
        verbose = 10,
        n_jobs=1,
        cv=5    
    )

    # fit the model and extract best score
    model.fit(X,y)
    
    print(f"Best score : {model.best_score_}")
    
    print("Best parameters set : ")
    best_parameters = model.best_estimator_.get_params()
    
    for param_name in sorted(param_grid.keys()):
        print(f"\t{param_name}: {best_parameters[param_name]}")

[[8.420e+02 0.000e+00 2.200e+00 ... 0.000e+00 0.000e+00 1.000e+00]
 [1.021e+03 1.000e+00 5.000e-01 ... 1.000e+00 1.000e+00 0.000e+00]
 [5.630e+02 1.000e+00 5.000e-01 ... 1.000e+00 1.000e+00 0.000e+00]
 ...
 [1.911e+03 0.000e+00 9.000e-01 ... 1.000e+00 1.000e+00 0.000e+00]
 [1.512e+03 0.000e+00 9.000e-01 ... 1.000e+00 1.000e+00 1.000e+00]
 [5.100e+02 1.000e+00 2.000e+00 ... 1.000e+00 1.000e+00 1.000e+00]]
[1 2 2 ... 3 0 3]
Fitting 5 folds for each of 20 candidates, totalling 100 fits
[CV 1/5; 1/20] START criterion=gini, max_depth=13, n_estimators=100.............
[CV 1/5; 1/20] END criterion=gini, max_depth=13, n_estimators=100;, score=0.882 total time=   1.0s
[CV 2/5; 1/20] START criterion=gini, max_depth=13, n_estimators=100.............
[CV 2/5; 1/20] END criterion=gini, max_depth=13, n_estimators=100;, score=0.882 total time=   0.1s
[CV 3/5; 1/20] START criterion=gini, max_depth=13, n_estimators=100.............
[CV 3/5; 1/20] END criterion=gini, max_depth=13, n_estimators=100;, sco

[CV 4/5; 9/20] END criterion=gini, max_depth=15, n_estimators=900;, score=0.882 total time=   0.6s
[CV 5/5; 9/20] START criterion=gini, max_depth=15, n_estimators=900.............
[CV 5/5; 9/20] END criterion=gini, max_depth=15, n_estimators=900;, score=0.870 total time=   0.6s
[CV 1/5; 10/20] START criterion=entropy, max_depth=12, n_estimators=500.........
[CV 1/5; 10/20] END criterion=entropy, max_depth=12, n_estimators=500;, score=0.880 total time=   0.4s
[CV 2/5; 10/20] START criterion=entropy, max_depth=12, n_estimators=500.........
[CV 2/5; 10/20] END criterion=entropy, max_depth=12, n_estimators=500;, score=0.895 total time=   0.4s
[CV 3/5; 10/20] START criterion=entropy, max_depth=12, n_estimators=500.........
[CV 3/5; 10/20] END criterion=entropy, max_depth=12, n_estimators=500;, score=0.915 total time=   0.4s
[CV 4/5; 10/20] START criterion=entropy, max_depth=12, n_estimators=500.........
[CV 4/5; 10/20] END criterion=entropy, max_depth=12, n_estimators=500;, score=0.873 tota

[CV 4/5; 18/20] END criterion=gini, max_depth=28, n_estimators=1100;, score=0.877 total time=   0.8s
[CV 5/5; 18/20] START criterion=gini, max_depth=28, n_estimators=1100...........
[CV 5/5; 18/20] END criterion=gini, max_depth=28, n_estimators=1100;, score=0.863 total time=   0.7s
[CV 1/5; 19/20] START criterion=entropy, max_depth=9, n_estimators=500..........
[CV 1/5; 19/20] END criterion=entropy, max_depth=9, n_estimators=500;, score=0.882 total time=   0.4s
[CV 2/5; 19/20] START criterion=entropy, max_depth=9, n_estimators=500..........
[CV 2/5; 19/20] END criterion=entropy, max_depth=9, n_estimators=500;, score=0.885 total time=   0.4s
[CV 3/5; 19/20] START criterion=entropy, max_depth=9, n_estimators=500..........
[CV 3/5; 19/20] END criterion=entropy, max_depth=9, n_estimators=500;, score=0.905 total time=   0.4s
[CV 4/5; 19/20] START criterion=entropy, max_depth=9, n_estimators=500..........
[CV 4/5; 19/20] END criterion=entropy, max_depth=9, n_estimators=500;, score=0.870 tota