In [2]:
import pandas as pd
dataset = pd.read_csv("PreAnxiety.csv")

In [3]:
dataset.head(3)

Unnamed: 0,1. Age,2. Gender,3. University,4. Department,5. Academic Year,6. Current CGPA,7. Did you receive a waiver or scholarship at your university?,Anxiety Label
0,18-22,Female,"Independent University, Bangladesh (IUB)",Engineering - CS / CSE / CSC / Similar to CS,Second Year or Equivalent,2.50 - 2.99,No,More Anxious
1,18-22,Male,"Independent University, Bangladesh (IUB)",Engineering - CS / CSE / CSC / Similar to CS,Third Year or Equivalent,3.00 - 3.39,No,More Anxious
2,18-22,Male,American International University Bangladesh (...,Engineering - CS / CSE / CSC / Similar to CS,Third Year or Equivalent,3.00 - 3.39,No,Less Anxious


In [4]:
x = dataset.iloc[:,0:7]
y = dataset.iloc[:,-1]

In [5]:
y.value_counts()

More Anxious    1869
Less Anxious     159
Name: Anxiety Label, dtype: int64

<!-- Our Dataset seems to be imbalance, Lets fix this imbalance using SMOTE -->

## Dataset is highly imbalanced, let's balance it using SMOTE

In [6]:
x_dum = pd.get_dummies(x,drop_first=True, dtype=int)
y_dum = pd.get_dummies(y,drop_first=True,dtype=int)

In [7]:
from imblearn.over_sampling import SMOTE

In [8]:
sm = SMOTE(random_state=42)

In [9]:
x_resampled, y_resampled = sm.fit_resample(x_dum,y_dum)

In [76]:
len(x_resampled.columns)

41

In [10]:
y_resampled.value_counts()

More Anxious
0               1869
1               1869
dtype: int64

# Now we got the balanced dataset

## Feature Selection:

In [11]:
from sklearn.ensemble import RandomForestClassifier

In [12]:
re = RandomForestClassifier(n_estimators=100, criterion='gini')

In [13]:
re.fit(x_resampled,y_resampled)

  re.fit(x_resampled,y_resampled)


In [14]:
pd.DataFrame(index=x_resampled.columns,data=re.feature_importances_).sort_values(by=0,ascending=False)

Unnamed: 0,0
3. University_Islamic University of Technology (IUT),0.08639
2. Gender_Male,0.076057
"3. University_Independent University, Bangladesh (IUB)",0.066875
6. Current CGPA_3.00 - 3.39,0.05069
4. Department_Engineering - CS / CSE / CSC / Similar to CS,0.050533
5. Academic Year_Third Year or Equivalent,0.049523
1. Age_23-26,0.048203
6. Current CGPA_3.40 - 3.79,0.046785
7. Did you receive a waiver or scholarship at your university?_Yes,0.044332
5. Academic Year_Second Year or Equivalent,0.04303


## Model Training

## RandomForest

In [15]:
from sklearn.model_selection import GridSearchCV

In [16]:
param_grid = {
    "n_estimators":[200,100],
    'max_features':['sqrt', 'log2', None],
    "criterion":["gini", "entropy", "log_loss"]
}

grid = GridSearchCV(RandomForestClassifier(),param_grid,scoring="accuracy",verbose=3,refit=True,n_jobs=-1)

In [17]:
grid.fit(x_resampled,y_resampled)

Fitting 5 folds for each of 18 candidates, totalling 90 fits


  self.best_estimator_.fit(X, y, **fit_params)


In [18]:
pd.DataFrame(grid.cv_results_)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_criterion,param_max_features,param_n_estimators,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,1.135107,0.059173,0.083199,0.022701,gini,sqrt,200,"{'criterion': 'gini', 'max_features': 'sqrt', ...",0.775401,0.836898,0.859626,0.851406,0.862115,0.837089,0.032077,12
1,0.547024,0.01919,0.038907,0.002723,gini,sqrt,100,"{'criterion': 'gini', 'max_features': 'sqrt', ...",0.779412,0.832888,0.863636,0.848728,0.86747,0.838427,0.031933,10
2,1.088434,0.019657,0.059912,0.005927,gini,log2,200,"{'criterion': 'gini', 'max_features': 'log2', ...",0.783422,0.838235,0.862299,0.851406,0.862115,0.839496,0.029391,6
3,0.520666,0.012461,0.031846,0.003454,gini,log2,100,"{'criterion': 'gini', 'max_features': 'log2', ...",0.778075,0.838235,0.864973,0.84739,0.868809,0.839496,0.032695,5
4,3.312926,0.273445,0.100311,0.002996,gini,,200,"{'criterion': 'gini', 'max_features': None, 'n...",0.772727,0.838235,0.860963,0.84739,0.863454,0.836554,0.033207,13
5,1.862971,0.085498,0.055252,0.008446,gini,,100,"{'criterion': 'gini', 'max_features': None, 'n...",0.77139,0.835561,0.858289,0.843373,0.85676,0.833075,0.031981,18
6,2.050658,0.069908,0.115655,0.006534,entropy,sqrt,200,"{'criterion': 'entropy', 'max_features': 'sqrt...",0.783422,0.836898,0.871658,0.844712,0.870147,0.841368,0.032045,2
7,1.070672,0.057205,0.065884,0.010257,entropy,sqrt,100,"{'criterion': 'entropy', 'max_features': 'sqrt...",0.782086,0.835561,0.860963,0.846051,0.871486,0.839229,0.031107,8
8,2.020852,0.013691,0.10774,0.009824,entropy,log2,200,"{'criterion': 'entropy', 'max_features': 'log2...",0.783422,0.836898,0.863636,0.850067,0.864793,0.839763,0.02995,4
9,1.021487,0.033011,0.063641,0.000327,entropy,log2,100,"{'criterion': 'entropy', 'max_features': 'log2...",0.774064,0.840909,0.86631,0.850067,0.86747,0.839764,0.03434,3


In [19]:
grid.best_params_

{'criterion': 'log_loss', 'max_features': 'log2', 'n_estimators': 100}

In [20]:
best_model = grid.best_estimator_

## Logistic Regression

In [21]:
from sklearn.linear_model import LogisticRegression

In [22]:
param_grid = {
    "solver":["lbfgs","liblinear","newton-cg","newton-cholesky"]
}

grid = GridSearchCV(LogisticRegression(),param_grid,scoring="accuracy",verbose=3,refit=True,n_jobs=-1)

In [23]:
grid.fit(x_resampled,y_resampled)

Fitting 5 folds for each of 4 candidates, totalling 20 fits


  y = column_or_1d(y, warn=True)


In [24]:
pd.DataFrame(grid.cv_results_)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_solver,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.195436,0.066002,0.012435,0.003028,lbfgs,{'solver': 'lbfgs'},0.683155,0.78877,0.786096,0.776439,0.789826,0.764857,0.041124,1
1,0.065655,0.022198,0.014521,0.004157,liblinear,{'solver': 'liblinear'},0.685829,0.787433,0.784759,0.780455,0.78581,0.764857,0.039582,1
2,0.108718,0.01371,0.013251,0.006589,newton-cg,{'solver': 'newton-cg'},0.683155,0.78877,0.786096,0.776439,0.789826,0.764857,0.041124,1
3,0.08749,0.050604,0.017151,0.003227,newton-cholesky,{'solver': 'newton-cholesky'},0.683155,0.78877,0.786096,0.776439,0.789826,0.764857,0.041124,1


## Naive Bayes

In [25]:
from sklearn.naive_bayes import ComplementNB

param_grid = {}

grid = GridSearchCV(ComplementNB(),param_grid,scoring="accuracy",verbose=3,refit=True,n_jobs=-1)
grid.fit(x_resampled,y_resampled)
pd.DataFrame(grid.cv_results_)

Fitting 5 folds for each of 1 candidates, totalling 5 fits


  y = column_or_1d(y, warn=True)


Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.053273,0.022658,0.013898,0.002994,{},0.644385,0.72861,0.709893,0.733601,0.709505,0.705199,0.031918,1


## Best model appeared with Random Forest

## confusion matrix and classification report for the Best Model

In [26]:
from sklearn.model_selection import train_test_split

In [27]:
x_train, x_test, y_train,y_test = train_test_split(x_resampled,y_resampled,test_size=0.3,random_state=0)

In [28]:
y_pred = best_model.predict(x_test)

In [29]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test,y_pred)

array([[540,  14],
       [131, 437]], dtype=int64)

In [30]:
from sklearn.metrics import classification_report
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.80      0.97      0.88       554
           1       0.97      0.77      0.86       568

    accuracy                           0.87      1122
   macro avg       0.89      0.87      0.87      1122
weighted avg       0.89      0.87      0.87      1122



In [31]:
import pickle

In [32]:
with open("AnxietyModel.sav","wb") as file:
    pickle.dump(best_model,file)

In [35]:
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2

In [130]:
selectBest = SelectKBest(score_func=chi2, k=25)

In [131]:
selectkbest = selectBest.fit(x_resampled,y_resampled).transform(x_resampled)

In [132]:
selectkbest

array([[0, 0, 0, ..., 1, 0, 0],
       [0, 0, 1, ..., 0, 0, 0],
       [0, 0, 1, ..., 0, 0, 0],
       ...,
       [0, 0, 1, ..., 0, 1, 0],
       [0, 0, 1, ..., 0, 0, 0],
       [0, 0, 1, ..., 0, 0, 0]])

In [133]:
param_grid = {
    "n_estimators":[200,100],
    'max_features':['sqrt', 'log2', None],
    "criterion":["gini", "entropy", "log_loss"]
}

grid = GridSearchCV(RandomForestClassifier(),param_grid,scoring="accuracy",verbose=3,refit=True,n_jobs=-1)
grid.fit(selectkbest,y_resampled)

Fitting 5 folds for each of 18 candidates, totalling 90 fits


  self.best_estimator_.fit(X, y, **fit_params)


In [134]:
pd.DataFrame(grid.cv_results_)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_criterion,param_max_features,param_n_estimators,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.816964,0.039841,0.056628,0.020288,gini,sqrt,200,"{'criterion': 'gini', 'max_features': 'sqrt', ...",0.745989,0.775401,0.807487,0.793842,0.807229,0.78599,0.023182,7
1,0.425451,0.01031,0.038512,0.028979,gini,sqrt,100,"{'criterion': 'gini', 'max_features': 'sqrt', ...",0.745989,0.778075,0.807487,0.78581,0.803213,0.784115,0.021927,16
2,0.835899,0.009796,0.065008,0.027003,gini,log2,200,"{'criterion': 'gini', 'max_features': 'log2', ...",0.748663,0.778075,0.808824,0.789826,0.804552,0.785988,0.021617,8
3,0.460895,0.014357,0.030679,0.010322,gini,log2,100,"{'criterion': 'gini', 'max_features': 'log2', ...",0.743316,0.774064,0.807487,0.796519,0.80589,0.785455,0.024211,11
4,1.379325,0.066194,0.054289,0.008047,gini,,200,"{'criterion': 'gini', 'max_features': None, 'n...",0.747326,0.774064,0.800802,0.797858,0.801874,0.784385,0.021151,13
5,0.722035,0.028257,0.028278,0.004359,gini,,100,"{'criterion': 'gini', 'max_features': None, 'n...",0.75,0.772727,0.802139,0.795181,0.800535,0.784117,0.020052,15
6,0.835085,0.031649,0.047474,0.003701,entropy,sqrt,200,"{'criterion': 'entropy', 'max_features': 'sqrt...",0.745989,0.776738,0.807487,0.797858,0.807229,0.78706,0.023384,4
7,0.393185,0.014691,0.023784,0.001509,entropy,sqrt,100,"{'criterion': 'entropy', 'max_features': 'sqrt...",0.748663,0.774064,0.807487,0.792503,0.80589,0.785721,0.022071,9
8,0.775413,0.031301,0.069724,0.050508,entropy,log2,200,"{'criterion': 'entropy', 'max_features': 'log2...",0.745989,0.778075,0.807487,0.796519,0.80589,0.786792,0.022928,5
9,0.584091,0.24529,0.033812,0.012778,entropy,log2,100,"{'criterion': 'entropy', 'max_features': 'log2...",0.743316,0.772727,0.807487,0.796519,0.807229,0.785456,0.024575,10


In [174]:
from sklearn.feature_selection import RFE
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC


log_model = LogisticRegression(solver='lbfgs')
RF = RandomForestClassifier(n_estimators = 100, criterion = 'entropy', random_state = 0)
# NB = GaussianNB()
DT= DecisionTreeClassifier(criterion = 'gini', max_features='sqrt',splitter='best',random_state = 0)
svc_model = SVC(kernel = 'linear', random_state = 0)
#knn = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2)
rfemodellist=[log_model,svc_model,RF,DT] 
log_rfe_feature=[]
for i in   rfemodellist:
    print(i)
    log_rfe = RFE(estimator=i,n_features_to_select=15)
    log_fit = log_rfe.fit(x_resampled, y_resampled)
    log_rfe_feature.append(log_fit.transform(x_resampled))

LogisticRegression()


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


SVC(kernel='linear', random_state=0)


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


RandomForestClassifier(criterion='entropy', random_state=0)


  estimator.fit(X[:, features], y, **fit_params)
  estimator.fit(X[:, features], y, **fit_params)
  estimator.fit(X[:, features], y, **fit_params)
  estimator.fit(X[:, features], y, **fit_params)
  estimator.fit(X[:, features], y, **fit_params)
  estimator.fit(X[:, features], y, **fit_params)
  estimator.fit(X[:, features], y, **fit_params)
  estimator.fit(X[:, features], y, **fit_params)
  estimator.fit(X[:, features], y, **fit_params)
  estimator.fit(X[:, features], y, **fit_params)
  estimator.fit(X[:, features], y, **fit_params)
  estimator.fit(X[:, features], y, **fit_params)
  estimator.fit(X[:, features], y, **fit_params)
  estimator.fit(X[:, features], y, **fit_params)
  estimator.fit(X[:, features], y, **fit_params)
  estimator.fit(X[:, features], y, **fit_params)
  estimator.fit(X[:, features], y, **fit_params)
  estimator.fit(X[:, features], y, **fit_params)
  estimator.fit(X[:, features], y, **fit_params)
  estimator.fit(X[:, features], y, **fit_params)
  estimator.fit(X[:,

DecisionTreeClassifier(max_features='sqrt', random_state=0)


## Using Logistic Regression Selected feature from RFE

In [175]:
param_grid = {
    "n_estimators":[200,100],
    'max_features':['sqrt', 'log2', None],
    "criterion":["gini", "entropy", "log_loss"]
}

grid = GridSearchCV(RandomForestClassifier(),param_grid,scoring="accuracy",verbose=3,refit=True,n_jobs=-1)
grid.fit(log_rfe_feature[0],y_resampled)

Fitting 5 folds for each of 18 candidates, totalling 90 fits


  self.best_estimator_.fit(X, y, **fit_params)


In [176]:
pd.DataFrame(grid.cv_results_)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_criterion,param_max_features,param_n_estimators,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.661235,0.016613,0.037097,0.013544,gini,sqrt,200,"{'criterion': 'gini', 'max_features': 'sqrt', ...",0.643048,0.696524,0.68984,0.686747,0.702811,0.683794,0.021115,1
1,0.333638,0.033177,0.020821,0.005697,gini,sqrt,100,"{'criterion': 'gini', 'max_features': 'sqrt', ...",0.643048,0.696524,0.68984,0.686747,0.702811,0.683794,0.021115,1
2,0.519009,0.027147,0.041264,0.008242,gini,log2,200,"{'criterion': 'gini', 'max_features': 'log2', ...",0.643048,0.696524,0.68984,0.686747,0.702811,0.683794,0.021115,1
3,0.256942,0.011773,0.022113,0.007468,gini,log2,100,"{'criterion': 'gini', 'max_features': 'log2', ...",0.643048,0.696524,0.68984,0.686747,0.702811,0.683794,0.021115,1
4,0.652002,0.021093,0.038131,0.006434,gini,,200,"{'criterion': 'gini', 'max_features': None, 'n...",0.643048,0.696524,0.68984,0.685408,0.702811,0.683526,0.021084,15
5,0.321858,0.004158,0.019984,0.006018,gini,,100,"{'criterion': 'gini', 'max_features': None, 'n...",0.643048,0.696524,0.68984,0.685408,0.702811,0.683526,0.021084,15
6,0.523011,0.016741,0.032586,0.002437,entropy,sqrt,200,"{'criterion': 'entropy', 'max_features': 'sqrt...",0.643048,0.696524,0.68984,0.686747,0.702811,0.683794,0.021115,1
7,0.263804,0.01107,0.022008,0.006897,entropy,sqrt,100,"{'criterion': 'entropy', 'max_features': 'sqrt...",0.643048,0.696524,0.68984,0.686747,0.702811,0.683794,0.021115,1
8,0.564411,0.012698,0.035605,0.000328,entropy,log2,200,"{'criterion': 'entropy', 'max_features': 'log2...",0.643048,0.696524,0.68984,0.686747,0.702811,0.683794,0.021115,1
9,0.28593,0.000833,0.022214,0.002352,entropy,log2,100,"{'criterion': 'entropy', 'max_features': 'log2...",0.643048,0.696524,0.68984,0.686747,0.702811,0.683794,0.021115,1


## Using SVC Selected features from RFE

In [177]:
param_grid = {
    "n_estimators":[200,100],
    'max_features':['sqrt', 'log2', None],
    "criterion":["gini", "entropy", "log_loss"]
}

grid = GridSearchCV(RandomForestClassifier(),param_grid,scoring="accuracy",verbose=3,refit=True,n_jobs=-1)
grid.fit(log_rfe_feature[1],y_resampled)

Fitting 5 folds for each of 18 candidates, totalling 90 fits


  self.best_estimator_.fit(X, y, **fit_params)


In [178]:
pd.DataFrame(grid.cv_results_)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_criterion,param_max_features,param_n_estimators,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.707247,0.019747,0.035835,0.002447,gini,sqrt,200,"{'criterion': 'gini', 'max_features': 'sqrt', ...",0.663102,0.708556,0.720588,0.716198,0.713521,0.704393,0.021011,1
1,0.380804,0.017014,0.026915,0.005032,gini,sqrt,100,"{'criterion': 'gini', 'max_features': 'sqrt', ...",0.663102,0.708556,0.720588,0.716198,0.713521,0.704393,0.021011,1
2,0.629991,0.028972,0.042914,0.006939,gini,log2,200,"{'criterion': 'gini', 'max_features': 'log2', ...",0.663102,0.708556,0.720588,0.716198,0.713521,0.704393,0.021011,1
3,0.307209,0.017488,0.019995,0.001035,gini,log2,100,"{'criterion': 'gini', 'max_features': 'log2', ...",0.663102,0.708556,0.720588,0.716198,0.713521,0.704393,0.021011,1
4,0.712982,0.009698,0.037488,0.006151,gini,,200,"{'criterion': 'gini', 'max_features': None, 'n...",0.663102,0.708556,0.720588,0.716198,0.713521,0.704393,0.021011,1
5,0.391617,0.054853,0.022658,0.003386,gini,,100,"{'criterion': 'gini', 'max_features': None, 'n...",0.663102,0.708556,0.720588,0.716198,0.713521,0.704393,0.021011,1
6,0.697159,0.062995,0.040706,0.00417,entropy,sqrt,200,"{'criterion': 'entropy', 'max_features': 'sqrt...",0.663102,0.708556,0.720588,0.716198,0.713521,0.704393,0.021011,1
7,0.312314,0.033566,0.018971,0.001438,entropy,sqrt,100,"{'criterion': 'entropy', 'max_features': 'sqrt...",0.663102,0.708556,0.720588,0.716198,0.713521,0.704393,0.021011,1
8,0.574136,0.020641,0.031354,0.003173,entropy,log2,200,"{'criterion': 'entropy', 'max_features': 'log2...",0.663102,0.708556,0.720588,0.716198,0.713521,0.704393,0.021011,1
9,0.27171,0.011581,0.0187,0.005116,entropy,log2,100,"{'criterion': 'entropy', 'max_features': 'log2...",0.663102,0.708556,0.720588,0.716198,0.713521,0.704393,0.021011,1


## Using Random Forest Selected features from RFE

In [179]:
param_grid = {
    "n_estimators":[200,100],
    'max_features':['sqrt', 'log2', None],
    "criterion":["gini", "entropy", "log_loss"]
}

grid = GridSearchCV(RandomForestClassifier(),param_grid,scoring="accuracy",verbose=3,refit=True,n_jobs=-1)
grid.fit(log_rfe_feature[2],y_resampled)

Fitting 5 folds for each of 18 candidates, totalling 90 fits


  self.best_estimator_.fit(X, y, **fit_params)


In [180]:
pd.DataFrame(grid.cv_results_)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_criterion,param_max_features,param_n_estimators,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.832069,0.023931,0.048046,0.005815,gini,sqrt,200,"{'criterion': 'gini', 'max_features': 'sqrt', ...",0.747326,0.792781,0.82754,0.800535,0.8166,0.796956,0.027621,9
1,0.450415,0.03003,0.038693,0.008199,gini,sqrt,100,"{'criterion': 'gini', 'max_features': 'sqrt', ...",0.751337,0.794118,0.82754,0.800535,0.817938,0.798294,0.026339,2
2,0.786933,0.035822,0.050384,0.005516,gini,log2,200,"{'criterion': 'gini', 'max_features': 'log2', ...",0.752674,0.800802,0.824866,0.800535,0.813922,0.79856,0.024668,1
3,0.352975,0.009803,0.025863,0.002359,gini,log2,100,"{'criterion': 'gini', 'max_features': 'log2', ...",0.751337,0.796791,0.826203,0.803213,0.812584,0.798026,0.025354,3
4,1.011869,0.062077,0.049339,0.008031,gini,,200,"{'criterion': 'gini', 'max_features': None, 'n...",0.744652,0.791444,0.824866,0.796519,0.812584,0.794013,0.027372,17
5,0.568136,0.038987,0.027242,0.003341,gini,,100,"{'criterion': 'gini', 'max_features': None, 'n...",0.745989,0.795455,0.826203,0.796519,0.813922,0.795618,0.027329,14
6,0.795055,0.054007,0.042789,0.008848,entropy,sqrt,200,"{'criterion': 'entropy', 'max_features': 'sqrt...",0.751337,0.795455,0.82754,0.801874,0.811245,0.79749,0.025478,7
7,0.342764,0.004134,0.022303,0.007367,entropy,sqrt,100,"{'criterion': 'entropy', 'max_features': 'sqrt...",0.75,0.792781,0.826203,0.801874,0.815261,0.797224,0.026217,8
8,0.673698,0.01325,0.048792,0.00176,entropy,log2,200,"{'criterion': 'entropy', 'max_features': 'log2...",0.752674,0.796791,0.819519,0.800535,0.812584,0.796421,0.023348,12
9,0.344291,0.008313,0.023057,0.004776,entropy,log2,100,"{'criterion': 'entropy', 'max_features': 'log2...",0.748663,0.798128,0.826203,0.799197,0.8166,0.797758,0.026746,4
