In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier, ExtraTreesClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import OneHotEncoder
import numpy as np
from xgboost import XGBClassifier
from imblearn.over_sampling import SMOTE


# Load the dataset
df = pd.read_csv('../train.csv')

# Create a mapping from metaphorID to the actual metaphor words
metaphor_mapping = {
    0: 'road', 1: 'candle', 2: 'light', 3: 'spice', 4: 'ride', 5: 'train', 6: 'boat'
}
df['metaphor_word'] = df['metaphorID'].map(metaphor_mapping)

# Convert the 'label_boolean' column to integers (True to 1, False to 0)
df['label_boolean'] = df['label_boolean'].astype(int)

# Splitting the dataset into training and testing sets first
X_train_raw, X_test_raw, y_train, y_test = train_test_split(df[['text', 'metaphor_word']], df['label_boolean'], test_size=0.2, random_state=42, stratify=df['label_boolean'])

# Feature extraction using TF-IDF for the text (fit on training data only)
tfidf_vectorizer = TfidfVectorizer(max_features=1000)
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train_raw['text']).toarray()
X_test_tfidf = tfidf_vectorizer.transform(X_test_raw['text']).toarray()

# One-hot encode the 'metaphor_word' column
onehot_encoder = OneHotEncoder(sparse=False)
X_train_metaphor = onehot_encoder.fit_transform(X_train_raw[['metaphor_word']])
X_test_metaphor = onehot_encoder.transform(X_test_raw[['metaphor_word']])

# Combine TF-IDF features with one-hot encoded metaphor_word features
X_train = np.hstack((X_train_tfidf, X_train_metaphor))
X_test = np.hstack((X_test_tfidf, X_test_metaphor))

smote = SMOTE()
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

In [2]:
from sklearn.model_selection import RandomizedSearchCV

# Random Forest parameter grid
rf_param_grid = {
    'n_estimators': [100, 200, 300, 400, 500],
    'max_features': ['auto', 'sqrt'],
    'max_depth': [10, 20, 30, 40, 50, None],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'bootstrap': [True, False]
}

# Random Forest RandomizedSearchCV
rf_random_search = RandomizedSearchCV(estimator=RandomForestClassifier(random_state=42),
                                      param_distributions=rf_param_grid, n_iter=100, cv=3, 
                                      verbose=2, random_state=42, n_jobs=-1)
rf_random_search.fit(X_train_resampled, y_train_resampled)
best_rf_model = rf_random_search.best_estimator_

Fitting 3 folds for each of 100 candidates, totalling 300 fits
[CV] END bootstrap=False, max_depth=30, max_features=auto, min_samples_leaf=1, min_samples_split=10, n_estimators=200; total time=   4.5s
[CV] END bootstrap=False, max_depth=30, max_features=auto, min_samples_leaf=1, min_samples_split=10, n_estimators=200; total time=   4.7s
[CV] END bootstrap=True, max_depth=50, max_features=sqrt, min_samples_leaf=2, min_samples_split=2, n_estimators=400; total time=   5.8s
[CV] END bootstrap=True, max_depth=50, max_features=sqrt, min_samples_leaf=2, min_samples_split=2, n_estimators=400; total time=   6.1s
[CV] END bootstrap=True, max_depth=50, max_features=sqrt, min_samples_leaf=2, min_samples_split=2, n_estimators=400; total time=   6.1s
[CV] END bootstrap=False, max_depth=10, max_features=auto, min_samples_leaf=1, min_samples_split=5, n_estimators=400; total time=   6.7s
[CV] END bootstrap=False, max_depth=10, max_features=auto, min_samples_leaf=1, min_samples_split=5, n_estimators=400

In [3]:
# AdaBoost parameter grid
ab_param_grid = {
    'n_estimators': [50, 100, 200, 300, 400],
    'learning_rate': [0.01, 0.05, 0.1, 0.3, 1]
}

# AdaBoost RandomizedSearchCV
ab_random_search = RandomizedSearchCV(estimator=AdaBoostClassifier(random_state=42),
                                      param_distributions=ab_param_grid, n_iter=100, cv=3, 
                                      verbose=2, random_state=42, n_jobs=-1)
ab_random_search.fit(X_train_resampled, y_train_resampled)
best_ab_model = ab_random_search.best_estimator_




Fitting 3 folds for each of 25 candidates, totalling 75 fits
[CV] END ................learning_rate=0.01, n_estimators=50; total time=   4.0s
[CV] END ................learning_rate=0.01, n_estimators=50; total time=   4.1s
[CV] END ................learning_rate=0.01, n_estimators=50; total time=   5.0s
[CV] END ...............learning_rate=0.01, n_estimators=100; total time=   8.4s
[CV] END ...............learning_rate=0.01, n_estimators=100; total time=   8.6s
[CV] END ...............learning_rate=0.01, n_estimators=100; total time=   9.6s
[CV] END ...............learning_rate=0.01, n_estimators=200; total time=  17.1s
[CV] END ...............learning_rate=0.01, n_estimators=200; total time=  19.4s
[CV] END ...............learning_rate=0.01, n_estimators=200; total time=  16.9s
[CV] END ................learning_rate=0.05, n_estimators=50; total time=   4.7s
[CV] END ................learning_rate=0.05, n_estimators=50; total time=   4.1s
[CV] END ................learning_rate=0.05, n_e

In [4]:
# Gradient Boosting parameter grid
gb_param_grid = {
    'n_estimators': [100, 200, 300, 400, 500],
    'learning_rate': [0.01, 0.05, 0.1, 0.2],
    'max_depth': [3, 5, 7, 9],
    'min_samples_split': [2, 4, 6],
    'min_samples_leaf': [1, 2, 4]
}

# Gradient Boosting RandomizedSearchCV
gb_random_search = RandomizedSearchCV(estimator=GradientBoostingClassifier(random_state=42),
                                      param_distributions=gb_param_grid, n_iter=100, cv=3, 
                                      verbose=2, random_state=42, n_jobs=-1)
gb_random_search.fit(X_train_resampled, y_train_resampled)
best_gb_model = gb_random_search.best_estimator_


Fitting 3 folds for each of 100 candidates, totalling 300 fits
[CV] END learning_rate=0.05, max_depth=7, min_samples_leaf=2, min_samples_split=4, n_estimators=100; total time=  32.7s
[CV] END learning_rate=0.05, max_depth=7, min_samples_leaf=2, min_samples_split=4, n_estimators=100; total time=  33.7s
[CV] END learning_rate=0.05, max_depth=7, min_samples_leaf=2, min_samples_split=4, n_estimators=100; total time=  39.8s
[CV] END learning_rate=0.05, max_depth=9, min_samples_leaf=2, min_samples_split=6, n_estimators=100; total time=  40.7s
[CV] END learning_rate=0.05, max_depth=9, min_samples_leaf=2, min_samples_split=6, n_estimators=100; total time=  42.0s
[CV] END learning_rate=0.05, max_depth=9, min_samples_leaf=2, min_samples_split=6, n_estimators=100; total time=  48.2s
[CV] END learning_rate=0.05, max_depth=3, min_samples_leaf=2, min_samples_split=2, n_estimators=400; total time= 1.1min
[CV] END learning_rate=0.05, max_depth=3, min_samples_leaf=2, min_samples_split=2, n_estimators=4

In [5]:
# Extra Trees parameter grid
et_param_grid = {
    'n_estimators': [100, 200, 300, 400, 500],
    'max_features': ['auto', 'sqrt'],
    'max_depth': [10, 20, 30, 40, 50, None],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Extra Trees RandomizedSearchCV
et_random_search = RandomizedSearchCV(estimator=ExtraTreesClassifier(random_state=42),
                                      param_distributions=et_param_grid, n_iter=100, cv=3, 
                                      verbose=2, random_state=42, n_jobs=-1)
et_random_search.fit(X_train_resampled, y_train_resampled)
best_et_model = et_random_search.best_estimator_


Fitting 3 folds for each of 100 candidates, totalling 300 fits
[CV] END max_depth=None, max_features=sqrt, min_samples_leaf=2, min_samples_split=10, n_estimators=200; total time=   4.1s
[CV] END max_depth=None, max_features=sqrt, min_samples_leaf=2, min_samples_split=10, n_estimators=200; total time=   4.5s
[CV] END max_depth=10, max_features=sqrt, min_samples_leaf=2, min_samples_split=10, n_estimators=400; total time=   5.2s
[CV] END max_depth=10, max_features=sqrt, min_samples_leaf=2, min_samples_split=10, n_estimators=400; total time=   5.3s
[CV] END max_depth=10, max_features=sqrt, min_samples_leaf=2, min_samples_split=10, n_estimators=400; total time=   5.2s
[CV] END max_depth=10, max_features=sqrt, min_samples_leaf=4, min_samples_split=10, n_estimators=200; total time=   1.8s
[CV] END max_depth=10, max_features=sqrt, min_samples_leaf=4, min_samples_split=10, n_estimators=200; total time=   2.0s
[CV] END max_depth=10, max_features=sqrt, min_samples_leaf=4, min_samples_split=10, n_

In [6]:
# XGBoost parameter grid
xgb_param_grid = {
    'n_estimators': [100, 200, 300, 400, 500],
    'learning_rate': [0.01, 0.05, 0.1, 0.2],
    'max_depth': [3, 5, 7, 9],
    'min_child_weight': [1, 2, 3, 4],
    'subsample': [0.6, 0.7, 0.8, 0.9],
    'colsample_bytree': [0.6, 0.7, 0.8, 0.9]
}

# XGBoost RandomizedSearchCV
xgb_random_search = RandomizedSearchCV(estimator=XGBClassifier(random_state=42, use_label_encoder=False, eval_metric='logloss'),
                                       param_distributions=xgb_param_grid, n_iter=100, cv=3, 
                                       verbose=2, random_state=42, n_jobs=-1)
xgb_random_search.fit(X_train_resampled, y_train_resampled)
best_xgb_model = xgb_random_search.best_estimator_




Fitting 3 folds for each of 100 candidates, totalling 300 fits


  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.2, max_depth=3, min_child_weight=3, n_estimators=300, subsample=0.8; total time=  20.5s


  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.2, max_depth=3, min_child_weight=3, n_estimators=300, subsample=0.8; total time=  22.7s


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.05, max_depth=5, min_child_weight=3, n_estimators=300, subsample=0.7; total time=  35.8s


  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.05, max_depth=7, min_child_weight=3, n_estimators=300, subsample=0.8; total time=  37.3s


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.05, max_depth=5, min_child_weight=3, n_estimators=300, subsample=0.7; total time=  37.8s


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.05, max_depth=5, min_child_weight=3, n_estimators=300, subsample=0.7; total time=  38.0s


  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.2, max_depth=3, min_child_weight=3, n_estimators=300, subsample=0.8; total time=  21.1s


  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.2, max_depth=7, min_child_weight=2, n_estimators=300, subsample=0.7; total time=  21.5s


  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.05, max_depth=7, min_child_weight=3, n_estimators=300, subsample=0.8; total time=  44.9s
[CV] END colsample_bytree=0.6, learning_rate=0.05, max_depth=7, min_child_weight=3, n_estimators=300, subsample=0.8; total time=  45.0s


  if is_sparse(data):
  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.01, max_depth=5, min_child_weight=2, n_estimators=100, subsample=0.9; total time=  14.7s


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.01, max_depth=5, min_child_weight=2, n_estimators=100, subsample=0.9; total time=  15.3s


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.01, max_depth=5, min_child_weight=2, n_estimators=100, subsample=0.9; total time=  14.6s


  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.2, max_depth=7, min_child_weight=2, n_estimators=300, subsample=0.7; total time=  33.2s


  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.2, max_depth=7, min_child_weight=2, n_estimators=300, subsample=0.7; total time=  32.5s


  if is_sparse(data):


[CV] END colsample_bytree=0.9, learning_rate=0.01, max_depth=9, min_child_weight=2, n_estimators=100, subsample=0.8; total time=  29.5s
[CV] END colsample_bytree=0.9, learning_rate=0.01, max_depth=9, min_child_weight=2, n_estimators=100, subsample=0.8; total time=  29.5s


  if is_sparse(data):
  if is_sparse(data):


[CV] END colsample_bytree=0.9, learning_rate=0.01, max_depth=9, min_child_weight=2, n_estimators=100, subsample=0.8; total time=  30.9s


  if is_sparse(data):


[CV] END colsample_bytree=0.9, learning_rate=0.2, max_depth=3, min_child_weight=3, n_estimators=500, subsample=0.7; total time=  38.5s


  if is_sparse(data):


[CV] END colsample_bytree=0.9, learning_rate=0.2, max_depth=3, min_child_weight=3, n_estimators=500, subsample=0.7; total time=  47.6s


  if is_sparse(data):


[CV] END colsample_bytree=0.9, learning_rate=0.2, max_depth=3, min_child_weight=3, n_estimators=500, subsample=0.7; total time=  47.5s


  if is_sparse(data):


[CV] END colsample_bytree=0.9, learning_rate=0.2, max_depth=5, min_child_weight=2, n_estimators=400, subsample=0.9; total time=  40.6s


  if is_sparse(data):


[CV] END colsample_bytree=0.9, learning_rate=0.01, max_depth=5, min_child_weight=3, n_estimators=200, subsample=0.8; total time=  35.4s


  if is_sparse(data):


[CV] END colsample_bytree=0.9, learning_rate=0.01, max_depth=5, min_child_weight=3, n_estimators=200, subsample=0.8; total time=  36.7s


  if is_sparse(data):


[CV] END colsample_bytree=0.9, learning_rate=0.01, max_depth=5, min_child_weight=3, n_estimators=200, subsample=0.8; total time=  34.7s


  if is_sparse(data):


[CV] END colsample_bytree=0.9, learning_rate=0.2, max_depth=5, min_child_weight=2, n_estimators=400, subsample=0.9; total time= 1.0min


  if is_sparse(data):


[CV] END colsample_bytree=0.9, learning_rate=0.2, max_depth=5, min_child_weight=2, n_estimators=400, subsample=0.9; total time= 1.0min


  if is_sparse(data):


[CV] END colsample_bytree=0.8, learning_rate=0.05, max_depth=5, min_child_weight=3, n_estimators=300, subsample=0.9; total time=  43.7s


  if is_sparse(data):


[CV] END colsample_bytree=0.8, learning_rate=0.05, max_depth=5, min_child_weight=3, n_estimators=300, subsample=0.9; total time=  47.3s


  if is_sparse(data):


[CV] END colsample_bytree=0.9, learning_rate=0.01, max_depth=3, min_child_weight=4, n_estimators=200, subsample=0.8; total time=  23.4s


  if is_sparse(data):


[CV] END colsample_bytree=0.8, learning_rate=0.05, max_depth=5, min_child_weight=3, n_estimators=300, subsample=0.9; total time=  46.8s


  if is_sparse(data):


[CV] END colsample_bytree=0.9, learning_rate=0.01, max_depth=3, min_child_weight=4, n_estimators=200, subsample=0.8; total time=  22.8s


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.05, max_depth=5, min_child_weight=3, n_estimators=100, subsample=0.6; total time=  13.1s


  if is_sparse(data):


[CV] END colsample_bytree=0.9, learning_rate=0.01, max_depth=3, min_child_weight=4, n_estimators=200, subsample=0.8; total time=  23.0s
[CV] END colsample_bytree=0.7, learning_rate=0.05, max_depth=5, min_child_weight=3, n_estimators=100, subsample=0.6; total time=  12.5s


  if is_sparse(data):
  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.05, max_depth=5, min_child_weight=3, n_estimators=100, subsample=0.6; total time=  12.3s


  if is_sparse(data):


[CV] END colsample_bytree=0.8, learning_rate=0.05, max_depth=5, min_child_weight=1, n_estimators=500, subsample=0.8; total time= 1.2min


  if is_sparse(data):


[CV] END colsample_bytree=0.8, learning_rate=0.2, max_depth=3, min_child_weight=3, n_estimators=200, subsample=0.9; total time=  18.5s


  if is_sparse(data):


[CV] END colsample_bytree=0.8, learning_rate=0.05, max_depth=5, min_child_weight=1, n_estimators=500, subsample=0.8; total time= 1.3min


  if is_sparse(data):


[CV] END colsample_bytree=0.8, learning_rate=0.2, max_depth=3, min_child_weight=3, n_estimators=200, subsample=0.9; total time=  19.4s


  if is_sparse(data):


[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=5, min_child_weight=1, n_estimators=300, subsample=0.8; total time=  39.4s


  if is_sparse(data):


[CV] END colsample_bytree=0.8, learning_rate=0.2, max_depth=3, min_child_weight=3, n_estimators=200, subsample=0.9; total time=  19.8s


  if is_sparse(data):


[CV] END colsample_bytree=0.8, learning_rate=0.05, max_depth=5, min_child_weight=1, n_estimators=500, subsample=0.8; total time= 1.3min


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.2, max_depth=7, min_child_weight=3, n_estimators=200, subsample=0.9; total time=  21.1s


  if is_sparse(data):


[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=5, min_child_weight=1, n_estimators=300, subsample=0.8; total time=  45.4s


  if is_sparse(data):


[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=5, min_child_weight=1, n_estimators=300, subsample=0.8; total time=  45.4s


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.2, max_depth=7, min_child_weight=3, n_estimators=200, subsample=0.9; total time=  31.3s


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.2, max_depth=7, min_child_weight=3, n_estimators=200, subsample=0.9; total time=  32.2s


  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.2, max_depth=3, min_child_weight=4, n_estimators=200, subsample=0.7; total time=  13.4s


  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.2, max_depth=3, min_child_weight=4, n_estimators=200, subsample=0.7; total time=  14.4s


  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.2, max_depth=3, min_child_weight=4, n_estimators=200, subsample=0.7; total time=  14.0s


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.2, max_depth=9, min_child_weight=3, n_estimators=200, subsample=0.7; total time=  19.1s


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.2, max_depth=9, min_child_weight=3, n_estimators=200, subsample=0.7; total time=  27.9s


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.2, max_depth=9, min_child_weight=3, n_estimators=200, subsample=0.7; total time=  27.8s


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.05, max_depth=3, min_child_weight=1, n_estimators=400, subsample=0.8; total time=  34.6s


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.05, max_depth=3, min_child_weight=1, n_estimators=400, subsample=0.8; total time=  34.5s


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.05, max_depth=3, min_child_weight=1, n_estimators=400, subsample=0.8; total time=  34.0s


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.2, max_depth=5, min_child_weight=1, n_estimators=500, subsample=0.7; total time=  36.1s


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.2, max_depth=5, min_child_weight=1, n_estimators=500, subsample=0.7; total time=  53.4s


  if is_sparse(data):


[CV] END colsample_bytree=0.9, learning_rate=0.01, max_depth=7, min_child_weight=4, n_estimators=500, subsample=0.8; total time= 1.8min


  if is_sparse(data):


[CV] END colsample_bytree=0.9, learning_rate=0.01, max_depth=7, min_child_weight=4, n_estimators=500, subsample=0.8; total time= 1.8min


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.2, max_depth=5, min_child_weight=1, n_estimators=500, subsample=0.7; total time=  52.9s


  if is_sparse(data):


[CV] END colsample_bytree=0.9, learning_rate=0.01, max_depth=7, min_child_weight=4, n_estimators=500, subsample=0.8; total time= 1.8min


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.05, max_depth=9, min_child_weight=4, n_estimators=500, subsample=0.8; total time=  56.2s


  if is_sparse(data):


[CV] END colsample_bytree=0.9, learning_rate=0.05, max_depth=7, min_child_weight=1, n_estimators=100, subsample=0.8; total time=  23.2s


  if is_sparse(data):


[CV] END colsample_bytree=0.9, learning_rate=0.05, max_depth=7, min_child_weight=1, n_estimators=100, subsample=0.8; total time=  23.1s


  if is_sparse(data):


[CV] END colsample_bytree=0.9, learning_rate=0.05, max_depth=7, min_child_weight=1, n_estimators=100, subsample=0.8; total time=  22.8s


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.05, max_depth=7, min_child_weight=4, n_estimators=300, subsample=0.9; total time=  40.9s


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.05, max_depth=9, min_child_weight=4, n_estimators=500, subsample=0.8; total time= 1.4min
[CV] END colsample_bytree=0.7, learning_rate=0.05, max_depth=7, min_child_weight=4, n_estimators=300, subsample=0.9; total time=  51.3s


  if is_sparse(data):
  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.1, max_depth=5, min_child_weight=3, n_estimators=100, subsample=0.9; total time=  11.8s


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.05, max_depth=7, min_child_weight=4, n_estimators=300, subsample=0.9; total time=  50.8s


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.05, max_depth=9, min_child_weight=4, n_estimators=500, subsample=0.8; total time= 1.3min


  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.1, max_depth=5, min_child_weight=3, n_estimators=100, subsample=0.9; total time=  11.9s
[CV] END colsample_bytree=0.6, learning_rate=0.1, max_depth=5, min_child_weight=3, n_estimators=100, subsample=0.9; total time=  11.9s


  if is_sparse(data):
  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.1, max_depth=5, min_child_weight=3, n_estimators=200, subsample=0.6; total time=  17.4s


  if is_sparse(data):


[CV] END colsample_bytree=0.8, learning_rate=0.01, max_depth=7, min_child_weight=1, n_estimators=200, subsample=0.8; total time=  42.3s
[CV] END colsample_bytree=0.8, learning_rate=0.01, max_depth=7, min_child_weight=1, n_estimators=200, subsample=0.8; total time=  43.9s


  if is_sparse(data):
  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.1, max_depth=5, min_child_weight=3, n_estimators=200, subsample=0.6; total time=  19.3s


  if is_sparse(data):


[CV] END colsample_bytree=0.8, learning_rate=0.01, max_depth=7, min_child_weight=1, n_estimators=200, subsample=0.8; total time=  42.4s


  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.1, max_depth=5, min_child_weight=3, n_estimators=200, subsample=0.6; total time=  19.5s


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.01, max_depth=5, min_child_weight=4, n_estimators=200, subsample=0.7; total time=  25.0s


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.01, max_depth=5, min_child_weight=4, n_estimators=200, subsample=0.7; total time=  25.8s


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.01, max_depth=5, min_child_weight=4, n_estimators=200, subsample=0.7; total time=  24.8s


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.2, max_depth=3, min_child_weight=3, n_estimators=200, subsample=0.9; total time=  15.9s


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.2, max_depth=3, min_child_weight=3, n_estimators=200, subsample=0.9; total time=  16.6s


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.2, max_depth=3, min_child_weight=3, n_estimators=200, subsample=0.9; total time=  16.3s


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.1, max_depth=9, min_child_weight=4, n_estimators=200, subsample=0.6; total time=  20.3s


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.1, max_depth=9, min_child_weight=4, n_estimators=200, subsample=0.6; total time=  29.1s


  if is_sparse(data):


[CV] END colsample_bytree=0.9, learning_rate=0.01, max_depth=9, min_child_weight=4, n_estimators=400, subsample=0.6; total time= 1.3min


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.1, max_depth=9, min_child_weight=4, n_estimators=200, subsample=0.6; total time=  28.9s


  if is_sparse(data):


[CV] END colsample_bytree=0.9, learning_rate=0.01, max_depth=9, min_child_weight=4, n_estimators=400, subsample=0.6; total time= 1.4min


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.01, max_depth=9, min_child_weight=3, n_estimators=500, subsample=0.8; total time= 1.6min


  if is_sparse(data):


[CV] END colsample_bytree=0.9, learning_rate=0.01, max_depth=9, min_child_weight=4, n_estimators=400, subsample=0.6; total time= 1.4min


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.01, max_depth=9, min_child_weight=3, n_estimators=500, subsample=0.8; total time= 1.8min


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.01, max_depth=9, min_child_weight=3, n_estimators=500, subsample=0.8; total time= 1.8min


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.05, max_depth=7, min_child_weight=2, n_estimators=100, subsample=0.6; total time=  15.7s


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.05, max_depth=7, min_child_weight=2, n_estimators=100, subsample=0.6; total time=  16.0s


  if is_sparse(data):


[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=9, min_child_weight=2, n_estimators=500, subsample=0.9; total time=  58.2s


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.05, max_depth=7, min_child_weight=2, n_estimators=100, subsample=0.6; total time=  16.5s


  if is_sparse(data):


[CV] END colsample_bytree=0.9, learning_rate=0.2, max_depth=3, min_child_weight=2, n_estimators=500, subsample=0.8; total time=  43.1s


  if is_sparse(data):


[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=9, min_child_weight=2, n_estimators=500, subsample=0.9; total time= 1.5min


  if is_sparse(data):


[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=9, min_child_weight=2, n_estimators=500, subsample=0.9; total time= 1.5min


  if is_sparse(data):


[CV] END colsample_bytree=0.9, learning_rate=0.2, max_depth=3, min_child_weight=2, n_estimators=500, subsample=0.8; total time=  55.3s


  if is_sparse(data):


[CV] END colsample_bytree=0.9, learning_rate=0.2, max_depth=3, min_child_weight=2, n_estimators=500, subsample=0.8; total time=  56.0s


  if is_sparse(data):


[CV] END colsample_bytree=0.9, learning_rate=0.01, max_depth=7, min_child_weight=2, n_estimators=500, subsample=0.6; total time= 1.8min


  if is_sparse(data):


[CV] END colsample_bytree=0.9, learning_rate=0.01, max_depth=7, min_child_weight=2, n_estimators=500, subsample=0.6; total time= 1.8min


  if is_sparse(data):


[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=5, min_child_weight=4, n_estimators=400, subsample=0.6; total time=  40.1s


  if is_sparse(data):


[CV] END colsample_bytree=0.9, learning_rate=0.01, max_depth=7, min_child_weight=2, n_estimators=500, subsample=0.6; total time= 1.8min


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.2, max_depth=5, min_child_weight=3, n_estimators=400, subsample=0.8; total time=  31.7s


  if is_sparse(data):


[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=5, min_child_weight=4, n_estimators=400, subsample=0.6; total time=  53.8s


  if is_sparse(data):


[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=5, min_child_weight=4, n_estimators=400, subsample=0.6; total time=  53.3s


  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.2, max_depth=3, min_child_weight=4, n_estimators=500, subsample=0.8; total time=  29.1s


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.2, max_depth=5, min_child_weight=3, n_estimators=400, subsample=0.8; total time=  45.7s


  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.2, max_depth=3, min_child_weight=4, n_estimators=500, subsample=0.8; total time=  35.1s
[CV] END colsample_bytree=0.6, learning_rate=0.2, max_depth=3, min_child_weight=4, n_estimators=500, subsample=0.8; total time=  35.0s


  if is_sparse(data):
  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.2, max_depth=5, min_child_weight=3, n_estimators=400, subsample=0.8; total time=  44.8s


  if is_sparse(data):


[CV] END colsample_bytree=0.9, learning_rate=0.1, max_depth=9, min_child_weight=3, n_estimators=200, subsample=0.8; total time=  33.3s


  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.1, max_depth=5, min_child_weight=4, n_estimators=500, subsample=0.8; total time=  36.6s


  if is_sparse(data):


[CV] END colsample_bytree=0.9, learning_rate=0.1, max_depth=9, min_child_weight=3, n_estimators=200, subsample=0.8; total time=  46.4s


  if is_sparse(data):


[CV] END colsample_bytree=0.9, learning_rate=0.1, max_depth=9, min_child_weight=3, n_estimators=200, subsample=0.8; total time=  45.8s


  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.1, max_depth=5, min_child_weight=4, n_estimators=500, subsample=0.8; total time=  52.6s


  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.1, max_depth=5, min_child_weight=4, n_estimators=500, subsample=0.8; total time=  52.1s


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.05, max_depth=5, min_child_weight=2, n_estimators=500, subsample=0.7; total time=  54.8s


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.05, max_depth=5, min_child_weight=2, n_estimators=500, subsample=0.7; total time= 1.1min


  if is_sparse(data):


[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=5, min_child_weight=2, n_estimators=100, subsample=0.9; total time=  16.6s


  if is_sparse(data):


[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=5, min_child_weight=2, n_estimators=100, subsample=0.9; total time=  16.8s


  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.01, max_depth=3, min_child_weight=4, n_estimators=500, subsample=0.9; total time=  40.7s


  if is_sparse(data):


[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=5, min_child_weight=2, n_estimators=100, subsample=0.9; total time=  16.8s


  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.01, max_depth=3, min_child_weight=4, n_estimators=500, subsample=0.9; total time=  40.1s


  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.01, max_depth=3, min_child_weight=4, n_estimators=500, subsample=0.9; total time=  39.8s


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.05, max_depth=5, min_child_weight=2, n_estimators=500, subsample=0.7; total time= 1.1min


  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.2, max_depth=7, min_child_weight=2, n_estimators=200, subsample=0.6; total time=  17.1s


  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.2, max_depth=7, min_child_weight=2, n_estimators=200, subsample=0.6; total time=  23.7s
[CV] END colsample_bytree=0.6, learning_rate=0.2, max_depth=7, min_child_weight=2, n_estimators=200, subsample=0.6; total time=  23.3s


  if is_sparse(data):
  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.2, max_depth=5, min_child_weight=1, n_estimators=300, subsample=0.9; total time=  30.4s


  if is_sparse(data):


[CV] END colsample_bytree=0.8, learning_rate=0.05, max_depth=5, min_child_weight=2, n_estimators=200, subsample=0.9; total time=  33.6s


  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.2, max_depth=5, min_child_weight=1, n_estimators=100, subsample=0.7; total time=  11.7s


  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.2, max_depth=5, min_child_weight=1, n_estimators=100, subsample=0.7; total time=  12.4s
[CV] END colsample_bytree=0.6, learning_rate=0.2, max_depth=5, min_child_weight=1, n_estimators=100, subsample=0.7; total time=  12.3s


  if is_sparse(data):
  if is_sparse(data):


[CV] END colsample_bytree=0.8, learning_rate=0.05, max_depth=5, min_child_weight=2, n_estimators=200, subsample=0.9; total time=  34.2s


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.2, max_depth=5, min_child_weight=1, n_estimators=300, subsample=0.9; total time=  41.6s
[CV] END colsample_bytree=0.7, learning_rate=0.2, max_depth=5, min_child_weight=1, n_estimators=300, subsample=0.9; total time=  42.3s


  if is_sparse(data):
  if is_sparse(data):


[CV] END colsample_bytree=0.8, learning_rate=0.05, max_depth=5, min_child_weight=2, n_estimators=200, subsample=0.9; total time=  33.6s


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.2, max_depth=9, min_child_weight=2, n_estimators=400, subsample=0.6; total time=  30.5s
[CV] END colsample_bytree=0.6, learning_rate=0.1, max_depth=7, min_child_weight=1, n_estimators=300, subsample=0.8; total time=  34.9s


  if is_sparse(data):
  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.1, max_depth=7, min_child_weight=1, n_estimators=300, subsample=0.8; total time=  47.1s
[CV] END colsample_bytree=0.7, learning_rate=0.05, max_depth=9, min_child_weight=2, n_estimators=300, subsample=0.7; total time=  46.3s


  if is_sparse(data):
  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.1, max_depth=7, min_child_weight=1, n_estimators=300, subsample=0.8; total time=  46.7s


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.2, max_depth=9, min_child_weight=2, n_estimators=400, subsample=0.6; total time=  47.4s


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.05, max_depth=9, min_child_weight=2, n_estimators=300, subsample=0.7; total time=  59.7s


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.05, max_depth=9, min_child_weight=2, n_estimators=300, subsample=0.7; total time= 1.0min


  if is_sparse(data):


[CV] END colsample_bytree=0.9, learning_rate=0.01, max_depth=3, min_child_weight=2, n_estimators=200, subsample=0.7; total time=  23.8s


  if is_sparse(data):


[CV] END colsample_bytree=0.9, learning_rate=0.2, max_depth=9, min_child_weight=4, n_estimators=500, subsample=0.6; total time=  41.3s


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.2, max_depth=9, min_child_weight=2, n_estimators=400, subsample=0.6; total time=  46.9s


  if is_sparse(data):


[CV] END colsample_bytree=0.9, learning_rate=0.01, max_depth=3, min_child_weight=2, n_estimators=200, subsample=0.7; total time=  24.1s


  if is_sparse(data):


[CV] END colsample_bytree=0.9, learning_rate=0.01, max_depth=3, min_child_weight=2, n_estimators=200, subsample=0.7; total time=  24.4s


  if is_sparse(data):


[CV] END colsample_bytree=0.9, learning_rate=0.1, max_depth=3, min_child_weight=2, n_estimators=100, subsample=0.9; total time=  15.0s


  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.01, max_depth=3, min_child_weight=2, n_estimators=400, subsample=0.7; total time=  35.5s


  if is_sparse(data):


[CV] END colsample_bytree=0.9, learning_rate=0.1, max_depth=3, min_child_weight=2, n_estimators=100, subsample=0.9; total time=  14.6s


  if is_sparse(data):


[CV] END colsample_bytree=0.9, learning_rate=0.1, max_depth=3, min_child_weight=2, n_estimators=100, subsample=0.9; total time=  14.4s


  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.01, max_depth=3, min_child_weight=2, n_estimators=400, subsample=0.7; total time=  34.7s


  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.01, max_depth=3, min_child_weight=2, n_estimators=400, subsample=0.7; total time=  34.3s


  if is_sparse(data):


[CV] END colsample_bytree=0.9, learning_rate=0.2, max_depth=9, min_child_weight=4, n_estimators=500, subsample=0.6; total time= 1.1min


  if is_sparse(data):


[CV] END colsample_bytree=0.9, learning_rate=0.2, max_depth=9, min_child_weight=4, n_estimators=500, subsample=0.6; total time= 1.1min


  if is_sparse(data):


[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=3, min_child_weight=3, n_estimators=400, subsample=0.8; total time=  39.9s


  if is_sparse(data):


[CV] END colsample_bytree=0.9, learning_rate=0.01, max_depth=3, min_child_weight=2, n_estimators=300, subsample=0.7; total time=  35.6s


  if is_sparse(data):


[CV] END colsample_bytree=0.9, learning_rate=0.01, max_depth=3, min_child_weight=2, n_estimators=300, subsample=0.7; total time=  35.7s
[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=3, min_child_weight=3, n_estimators=400, subsample=0.8; total time=  43.1s


  if is_sparse(data):


[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=3, min_child_weight=3, n_estimators=400, subsample=0.8; total time=  42.9s


  if is_sparse(data):
  if is_sparse(data):


[CV] END colsample_bytree=0.9, learning_rate=0.01, max_depth=3, min_child_weight=2, n_estimators=300, subsample=0.7; total time=  36.4s


  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.01, max_depth=3, min_child_weight=2, n_estimators=100, subsample=0.9; total time=  11.0s


  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.01, max_depth=3, min_child_weight=2, n_estimators=100, subsample=0.9; total time=  10.4s
[CV] END colsample_bytree=0.6, learning_rate=0.01, max_depth=3, min_child_weight=2, n_estimators=100, subsample=0.9; total time=  10.2s


  if is_sparse(data):
  if is_sparse(data):


[CV] END colsample_bytree=0.8, learning_rate=0.01, max_depth=9, min_child_weight=2, n_estimators=200, subsample=0.7; total time=  55.9s


  if is_sparse(data):


[CV] END colsample_bytree=0.8, learning_rate=0.01, max_depth=9, min_child_weight=2, n_estimators=200, subsample=0.7; total time=  55.6s


  if is_sparse(data):


[CV] END colsample_bytree=0.8, learning_rate=0.01, max_depth=9, min_child_weight=2, n_estimators=200, subsample=0.7; total time=  54.1s


  if is_sparse(data):


[CV] END colsample_bytree=0.8, learning_rate=0.05, max_depth=3, min_child_weight=1, n_estimators=400, subsample=0.7; total time=  39.0s


  if is_sparse(data):


[CV] END colsample_bytree=0.8, learning_rate=0.05, max_depth=3, min_child_weight=1, n_estimators=400, subsample=0.7; total time=  39.6s


  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.2, max_depth=9, min_child_weight=4, n_estimators=400, subsample=0.6; total time=  24.2s


  if is_sparse(data):


[CV] END colsample_bytree=0.8, learning_rate=0.05, max_depth=3, min_child_weight=1, n_estimators=400, subsample=0.7; total time=  38.4s


  if is_sparse(data):


[CV] END colsample_bytree=0.9, learning_rate=0.01, max_depth=9, min_child_weight=3, n_estimators=300, subsample=0.7; total time= 1.3min


  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.2, max_depth=9, min_child_weight=4, n_estimators=400, subsample=0.6; total time=  36.7s
[CV] END colsample_bytree=0.9, learning_rate=0.01, max_depth=9, min_child_weight=3, n_estimators=300, subsample=0.7; total time= 1.3min


  if is_sparse(data):
  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.2, max_depth=9, min_child_weight=4, n_estimators=400, subsample=0.6; total time=  36.1s


  if is_sparse(data):


[CV] END colsample_bytree=0.9, learning_rate=0.01, max_depth=9, min_child_weight=3, n_estimators=300, subsample=0.7; total time= 1.3min


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.2, max_depth=3, min_child_weight=1, n_estimators=200, subsample=0.6; total time=  16.1s


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.2, max_depth=3, min_child_weight=1, n_estimators=200, subsample=0.6; total time=  16.4s


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.2, max_depth=3, min_child_weight=3, n_estimators=400, subsample=0.9; total time=  30.2s


  if is_sparse(data):


[CV] END colsample_bytree=0.9, learning_rate=0.05, max_depth=5, min_child_weight=4, n_estimators=500, subsample=0.6; total time=  59.0s


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.2, max_depth=3, min_child_weight=1, n_estimators=200, subsample=0.6; total time=  16.6s


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.2, max_depth=3, min_child_weight=3, n_estimators=400, subsample=0.9; total time=  36.0s
[CV] END colsample_bytree=0.7, learning_rate=0.2, max_depth=3, min_child_weight=3, n_estimators=400, subsample=0.9; total time=  36.1s


  if is_sparse(data):
  if is_sparse(data):


[CV] END colsample_bytree=0.9, learning_rate=0.05, max_depth=5, min_child_weight=4, n_estimators=500, subsample=0.6; total time= 1.2min


  if is_sparse(data):


[CV] END colsample_bytree=0.9, learning_rate=0.05, max_depth=5, min_child_weight=4, n_estimators=500, subsample=0.6; total time= 1.2min


  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.1, max_depth=7, min_child_weight=1, n_estimators=200, subsample=0.7; total time=  24.4s


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.05, max_depth=5, min_child_weight=4, n_estimators=300, subsample=0.9; total time=  39.2s


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.05, max_depth=5, min_child_weight=4, n_estimators=300, subsample=0.9; total time=  42.2s
[CV] END colsample_bytree=0.7, learning_rate=0.05, max_depth=5, min_child_weight=4, n_estimators=300, subsample=0.9; total time=  42.4s


  if is_sparse(data):
  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.1, max_depth=7, min_child_weight=1, n_estimators=200, subsample=0.7; total time=  31.1s


  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.1, max_depth=7, min_child_weight=1, n_estimators=200, subsample=0.7; total time=  31.8s


  if is_sparse(data):


[CV] END colsample_bytree=0.9, learning_rate=0.01, max_depth=5, min_child_weight=2, n_estimators=500, subsample=0.9; total time= 1.6min


  if is_sparse(data):


[CV] END colsample_bytree=0.9, learning_rate=0.01, max_depth=5, min_child_weight=2, n_estimators=500, subsample=0.9; total time= 1.6min


  if is_sparse(data):


[CV] END colsample_bytree=0.9, learning_rate=0.01, max_depth=5, min_child_weight=2, n_estimators=500, subsample=0.9; total time= 1.6min


  if is_sparse(data):


[CV] END colsample_bytree=0.9, learning_rate=0.2, max_depth=5, min_child_weight=3, n_estimators=500, subsample=0.8; total time=  44.2s


  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.05, max_depth=9, min_child_weight=2, n_estimators=500, subsample=0.9; total time= 1.0min


  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.2, max_depth=3, min_child_weight=1, n_estimators=300, subsample=0.7; total time=  20.5s


  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.2, max_depth=3, min_child_weight=1, n_estimators=300, subsample=0.7; total time=  21.3s


  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.2, max_depth=3, min_child_weight=1, n_estimators=300, subsample=0.7; total time=  21.2s


  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.1, max_depth=9, min_child_weight=3, n_estimators=200, subsample=0.8; total time=  23.7s


  if is_sparse(data):


[CV] END colsample_bytree=0.9, learning_rate=0.2, max_depth=5, min_child_weight=3, n_estimators=500, subsample=0.8; total time= 1.1min


  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.05, max_depth=9, min_child_weight=2, n_estimators=500, subsample=0.9; total time= 1.4min


  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.05, max_depth=9, min_child_weight=2, n_estimators=500, subsample=0.9; total time= 1.5min


  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.1, max_depth=9, min_child_weight=3, n_estimators=200, subsample=0.8; total time=  33.1s


  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.1, max_depth=9, min_child_weight=3, n_estimators=200, subsample=0.8; total time=  32.8s


  if is_sparse(data):


[CV] END colsample_bytree=0.8, learning_rate=0.2, max_depth=5, min_child_weight=4, n_estimators=400, subsample=0.7; total time=  32.1s


  if is_sparse(data):


[CV] END colsample_bytree=0.9, learning_rate=0.2, max_depth=5, min_child_weight=3, n_estimators=500, subsample=0.8; total time= 1.1min


  if is_sparse(data):


[CV] END colsample_bytree=0.8, learning_rate=0.2, max_depth=5, min_child_weight=4, n_estimators=400, subsample=0.7; total time=  46.7s


  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.1, max_depth=7, min_child_weight=4, n_estimators=500, subsample=0.7; total time=  36.2s


  if is_sparse(data):


[CV] END colsample_bytree=0.8, learning_rate=0.2, max_depth=5, min_child_weight=4, n_estimators=400, subsample=0.7; total time=  45.8s


  if is_sparse(data):


[CV] END colsample_bytree=0.8, learning_rate=0.05, max_depth=9, min_child_weight=4, n_estimators=500, subsample=0.6; total time=  56.2s


  if is_sparse(data):


[CV] END colsample_bytree=0.8, learning_rate=0.05, max_depth=7, min_child_weight=2, n_estimators=100, subsample=0.8; total time=  21.4s


  if is_sparse(data):


[CV] END colsample_bytree=0.8, learning_rate=0.05, max_depth=7, min_child_weight=2, n_estimators=100, subsample=0.8; total time=  21.8s


  if is_sparse(data):


[CV] END colsample_bytree=0.8, learning_rate=0.05, max_depth=7, min_child_weight=2, n_estimators=100, subsample=0.8; total time=  21.9s


  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.1, max_depth=7, min_child_weight=4, n_estimators=500, subsample=0.7; total time=  54.9s
[CV] END colsample_bytree=0.6, learning_rate=0.2, max_depth=5, min_child_weight=3, n_estimators=200, subsample=0.6; total time=  14.9s
[CV] END colsample_bytree=0.6, learning_rate=0.1, max_depth=7, min_child_weight=4, n_estimators=500, subsample=0.7; total time=  53.9s


  if is_sparse(data):
  if is_sparse(data):
  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.2, max_depth=5, min_child_weight=3, n_estimators=200, subsample=0.6; total time=  19.3s


  if is_sparse(data):


[CV] END colsample_bytree=0.8, learning_rate=0.05, max_depth=9, min_child_weight=4, n_estimators=500, subsample=0.6; total time= 1.3min


  if is_sparse(data):


[CV] END colsample_bytree=0.8, learning_rate=0.2, max_depth=9, min_child_weight=3, n_estimators=100, subsample=0.9; total time=  15.8s


  if is_sparse(data):


[CV] END colsample_bytree=0.8, learning_rate=0.05, max_depth=9, min_child_weight=4, n_estimators=500, subsample=0.6; total time= 1.3min
[CV] END colsample_bytree=0.6, learning_rate=0.2, max_depth=5, min_child_weight=3, n_estimators=200, subsample=0.6; total time=  19.4s


  if is_sparse(data):
  if is_sparse(data):


[CV] END colsample_bytree=0.8, learning_rate=0.2, max_depth=9, min_child_weight=3, n_estimators=100, subsample=0.9; total time=  22.6s
[CV] END colsample_bytree=0.8, learning_rate=0.2, max_depth=9, min_child_weight=3, n_estimators=100, subsample=0.9; total time=  22.5s


  if is_sparse(data):
  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.1, max_depth=5, min_child_weight=3, n_estimators=200, subsample=0.7; total time=  23.0s


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.1, max_depth=9, min_child_weight=1, n_estimators=300, subsample=0.6; total time=  35.8s


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.1, max_depth=5, min_child_weight=3, n_estimators=200, subsample=0.7; total time=  25.9s


  if is_sparse(data):
  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.1, max_depth=5, min_child_weight=3, n_estimators=200, subsample=0.7; total time=  26.1s
[CV] END colsample_bytree=0.7, learning_rate=0.1, max_depth=9, min_child_weight=1, n_estimators=300, subsample=0.6; total time=  49.5s


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.1, max_depth=9, min_child_weight=1, n_estimators=300, subsample=0.6; total time=  49.2s


  if is_sparse(data):


[CV] END colsample_bytree=0.8, learning_rate=0.2, max_depth=9, min_child_weight=1, n_estimators=500, subsample=0.8; total time=  47.8s


  if is_sparse(data):


[CV] END colsample_bytree=0.9, learning_rate=0.01, max_depth=7, min_child_weight=3, n_estimators=300, subsample=0.8; total time= 1.2min


  if is_sparse(data):


[CV] END colsample_bytree=0.9, learning_rate=0.01, max_depth=7, min_child_weight=3, n_estimators=300, subsample=0.8; total time= 1.2min


  if is_sparse(data):


[CV] END colsample_bytree=0.9, learning_rate=0.01, max_depth=7, min_child_weight=3, n_estimators=300, subsample=0.8; total time= 1.2min


  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.2, max_depth=9, min_child_weight=3, n_estimators=200, subsample=0.6; total time=  18.1s


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.01, max_depth=5, min_child_weight=3, n_estimators=400, subsample=0.7; total time=  57.6s


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.01, max_depth=5, min_child_weight=3, n_estimators=400, subsample=0.7; total time=  56.8s


  if is_sparse(data):


[CV] END colsample_bytree=0.8, learning_rate=0.2, max_depth=9, min_child_weight=1, n_estimators=500, subsample=0.8; total time= 1.3min


  if is_sparse(data):


[CV] END colsample_bytree=0.8, learning_rate=0.2, max_depth=9, min_child_weight=1, n_estimators=500, subsample=0.8; total time= 1.3min


  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.2, max_depth=9, min_child_weight=3, n_estimators=200, subsample=0.6; total time=  25.6s


  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.1, max_depth=3, min_child_weight=3, n_estimators=100, subsample=0.7; total time=   9.1s


  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.1, max_depth=3, min_child_weight=3, n_estimators=100, subsample=0.7; total time=   9.6s


  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.1, max_depth=3, min_child_weight=3, n_estimators=100, subsample=0.7; total time=  11.3s


  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.2, max_depth=9, min_child_weight=3, n_estimators=200, subsample=0.6; total time=  27.6s


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.01, max_depth=5, min_child_weight=3, n_estimators=400, subsample=0.7; total time=  58.9s


  if is_sparse(data):


[CV] END colsample_bytree=0.9, learning_rate=0.1, max_depth=5, min_child_weight=4, n_estimators=300, subsample=0.7; total time=  40.0s


  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.05, max_depth=5, min_child_weight=4, n_estimators=300, subsample=0.9; total time=  36.0s


  if is_sparse(data):


[CV] END colsample_bytree=0.9, learning_rate=0.1, max_depth=5, min_child_weight=4, n_estimators=300, subsample=0.7; total time=  49.7s


  if is_sparse(data):


[CV] END colsample_bytree=0.9, learning_rate=0.1, max_depth=5, min_child_weight=4, n_estimators=300, subsample=0.7; total time=  49.9s
[CV] END colsample_bytree=0.6, learning_rate=0.05, max_depth=5, min_child_weight=4, n_estimators=300, subsample=0.9; total time=  38.2s


  if is_sparse(data):
  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.05, max_depth=5, min_child_weight=4, n_estimators=300, subsample=0.9; total time=  36.4s


  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.05, max_depth=5, min_child_weight=1, n_estimators=300, subsample=0.8; total time=  35.3s


  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.05, max_depth=5, min_child_weight=1, n_estimators=300, subsample=0.8; total time=  38.2s


  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.1, max_depth=5, min_child_weight=1, n_estimators=200, subsample=0.6; total time=  24.4s


  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.05, max_depth=5, min_child_weight=1, n_estimators=300, subsample=0.8; total time=  41.6s


  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.1, max_depth=5, min_child_weight=1, n_estimators=200, subsample=0.6; total time=  27.0s


  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.1, max_depth=5, min_child_weight=1, n_estimators=200, subsample=0.6; total time=  27.0s


  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.05, max_depth=7, min_child_weight=2, n_estimators=100, subsample=0.7; total time=  18.1s


  if is_sparse(data):


[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=7, min_child_weight=1, n_estimators=400, subsample=0.6; total time=  54.9s


  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.05, max_depth=7, min_child_weight=2, n_estimators=100, subsample=0.7; total time=  18.8s


  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.05, max_depth=7, min_child_weight=2, n_estimators=100, subsample=0.7; total time=  19.5s


  if is_sparse(data):


[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=7, min_child_weight=1, n_estimators=400, subsample=0.6; total time= 1.3min


  if is_sparse(data):


[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=7, min_child_weight=1, n_estimators=400, subsample=0.6; total time= 1.3min


  if is_sparse(data):


[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=7, min_child_weight=1, n_estimators=500, subsample=0.8; total time= 1.2min


  if is_sparse(data):


[CV] END colsample_bytree=0.9, learning_rate=0.1, max_depth=5, min_child_weight=3, n_estimators=400, subsample=0.8; total time=  53.4s


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.2, max_depth=5, min_child_weight=2, n_estimators=300, subsample=0.9; total time=  28.9s


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.1, max_depth=3, min_child_weight=2, n_estimators=100, subsample=0.8; total time=  10.8s


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.1, max_depth=3, min_child_weight=2, n_estimators=100, subsample=0.8; total time=  10.8s


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.2, max_depth=5, min_child_weight=2, n_estimators=300, subsample=0.9; total time=  43.8s


  if is_sparse(data):


[CV] END colsample_bytree=0.9, learning_rate=0.1, max_depth=5, min_child_weight=3, n_estimators=400, subsample=0.8; total time= 1.2min


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.1, max_depth=3, min_child_weight=2, n_estimators=100, subsample=0.8; total time=  10.2s


  if is_sparse(data):


[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=7, min_child_weight=1, n_estimators=500, subsample=0.8; total time= 1.7min


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.2, max_depth=5, min_child_weight=2, n_estimators=300, subsample=0.9; total time=  42.8s


  if is_sparse(data):


[CV] END colsample_bytree=0.8, learning_rate=0.1, max_depth=7, min_child_weight=1, n_estimators=500, subsample=0.8; total time= 1.6min


  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.2, max_depth=3, min_child_weight=3, n_estimators=100, subsample=0.7; total time=   7.6s


  if is_sparse(data):


[CV] END colsample_bytree=0.9, learning_rate=0.1, max_depth=5, min_child_weight=3, n_estimators=400, subsample=0.8; total time= 1.1min


  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.2, max_depth=3, min_child_weight=3, n_estimators=100, subsample=0.7; total time=   7.7s


  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.2, max_depth=3, min_child_weight=3, n_estimators=100, subsample=0.7; total time=   7.7s


  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.01, max_depth=3, min_child_weight=4, n_estimators=200, subsample=0.7; total time=  15.4s
[CV] END colsample_bytree=0.6, learning_rate=0.01, max_depth=3, min_child_weight=4, n_estimators=200, subsample=0.7; total time=  15.8s


  if is_sparse(data):
  if is_sparse(data):


[CV] END colsample_bytree=0.6, learning_rate=0.01, max_depth=3, min_child_weight=4, n_estimators=200, subsample=0.7; total time=  15.6s


  if is_sparse(data):


[CV] END colsample_bytree=0.9, learning_rate=0.1, max_depth=9, min_child_weight=4, n_estimators=500, subsample=0.7; total time=  54.4s


  if is_sparse(data):


[CV] END colsample_bytree=0.7, learning_rate=0.05, max_depth=9, min_child_weight=1, n_estimators=500, subsample=0.8; total time= 1.2min
[CV] END colsample_bytree=0.9, learning_rate=0.05, max_depth=7, min_child_weight=4, n_estimators=200, subsample=0.6; total time=  34.2s
[CV] END colsample_bytree=0.9, learning_rate=0.1, max_depth=9, min_child_weight=4, n_estimators=500, subsample=0.7; total time= 1.4min
[CV] END colsample_bytree=0.9, learning_rate=0.1, max_depth=9, min_child_weight=4, n_estimators=500, subsample=0.7; total time= 1.3min
[CV] END colsample_bytree=0.9, learning_rate=0.05, max_depth=7, min_child_weight=4, n_estimators=200, subsample=0.6; total time=  36.4s
[CV] END colsample_bytree=0.9, learning_rate=0.05, max_depth=7, min_child_weight=4, n_estimators=200, subsample=0.6; total time=  34.8s
[CV] END colsample_bytree=0.7, learning_rate=0.05, max_depth=9, min_child_weight=1, n_estimators=500, subsample=0.8; total time= 1.6min
[CV] END colsample_bytree=0.7, learning_rate=0.05,

  if is_sparse(data):


In [7]:
!pip install joblib



In [8]:
import joblib

# Save the best models
joblib.dump(best_rf_model, 'best_rf_model.pkl')
joblib.dump(best_ab_model, 'best_ab_model.pkl')
joblib.dump(best_gb_model, 'best_gb_model.pkl')
joblib.dump(best_et_model, 'best_et_model.pkl')
joblib.dump(best_xgb_model, 'best_xgb_model.pkl')


['best_xgb_model.pkl']

In [9]:
# Load the models
best_rf_model = joblib.load('best_rf_model.pkl')
best_ab_model = joblib.load('best_ab_model.pkl')
best_gb_model = joblib.load('best_gb_model.pkl')
best_et_model = joblib.load('best_et_model.pkl')
best_xgb_model = joblib.load('best_xgb_model.pkl')

# Evaluate each model
for model, name in [(best_rf_model, "Random Forest"), (best_ab_model, "AdaBoost"), 
                    (best_gb_model, "Gradient Boosting"), (best_et_model, "Extra Trees"), 
                    (best_xgb_model, "XGBoost")]:
    y_pred = model.predict(X_test)
    print(f"Model: {name}")
    print(classification_report(y_test, y_pred))
    print(f"Accuracy: {accuracy_score(y_test, y_pred)}\n")


Model: Random Forest
              precision    recall  f1-score   support

           0       0.72      0.33      0.45        88
           1       0.82      0.96      0.89       286

    accuracy                           0.81       374
   macro avg       0.77      0.65      0.67       374
weighted avg       0.80      0.81      0.78       374

Accuracy: 0.8128342245989305

Model: AdaBoost
              precision    recall  f1-score   support

           0       0.67      0.66      0.66        88
           1       0.90      0.90      0.90       286

    accuracy                           0.84       374
   macro avg       0.78      0.78      0.78       374
weighted avg       0.84      0.84      0.84       374

Accuracy: 0.8422459893048129

Model: Gradient Boosting
              precision    recall  f1-score   support

           0       0.72      0.58      0.64        88
           1       0.88      0.93      0.90       286

    accuracy                           0.85       374
   mac

In [10]:
# Initialize an array to store the sum of predicted probabilities
prob_sum = np.zeros((len(y_test), 2))  # Adjust this if you have more than two classes

# Evaluate each model and accumulate probabilities
for model, name in [(best_rf_model, "Random Forest"), (best_ab_model, "AdaBoost"), 
                    (best_gb_model, "Gradient Boosting"), (best_et_model, "Extra Trees"), 
                    (best_xgb_model, "XGBoost")]:
    y_pred = model.predict(X_test)
    probs = model.predict_proba(X_test)
    prob_sum += probs

    print(f"Model: {name}")
    print(classification_report(y_test, y_pred))
    print(f"Accuracy: {accuracy_score(y_test, y_pred)}\n")

# Compute the average probabilities
avg_prob = prob_sum / 5  # Dividing by the number of models

# Determine final predictions based on the highest average probability
final_predictions = np.argmax(avg_prob, axis=1)

# Evaluate the final ensemble model
print("Ensemble Model Performance:")
print(classification_report(y_test, final_predictions))
print(f"Accuracy: {accuracy_score(y_test, final_predictions)}\n")

Model: Random Forest
              precision    recall  f1-score   support

           0       0.72      0.33      0.45        88
           1       0.82      0.96      0.89       286

    accuracy                           0.81       374
   macro avg       0.77      0.65      0.67       374
weighted avg       0.80      0.81      0.78       374

Accuracy: 0.8128342245989305

Model: AdaBoost
              precision    recall  f1-score   support

           0       0.67      0.66      0.66        88
           1       0.90      0.90      0.90       286

    accuracy                           0.84       374
   macro avg       0.78      0.78      0.78       374
weighted avg       0.84      0.84      0.84       374

Accuracy: 0.8422459893048129

Model: Gradient Boosting
              precision    recall  f1-score   support

           0       0.72      0.58      0.64        88
           1       0.88      0.93      0.90       286

    accuracy                           0.85       374
   mac

In [11]:
# Print the best parameters for each model
model_names = ["Random Forest", "AdaBoost", "Gradient Boosting", "Extra Trees", "XGBoost"]
models = [best_rf_model, best_ab_model, best_gb_model, best_et_model, best_xgb_model]

for model, name in zip(models, model_names):
    print(f"{name} Best Parameters:")
    print(model.get_params())
    print("\n")

Random Forest Best Parameters:
{'bootstrap': False, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 50, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 42, 'verbose': 0, 'warm_start': False}


AdaBoost Best Parameters:
{'algorithm': 'SAMME.R', 'base_estimator': None, 'learning_rate': 0.3, 'n_estimators': 300, 'random_state': 42}


Gradient Boosting Best Parameters:
{'ccp_alpha': 0.0, 'criterion': 'friedman_mse', 'init': None, 'learning_rate': 0.1, 'loss': 'deviance', 'max_depth': 7, 'max_features': None, 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 400, 'n_iter_no_change': None, 'random_state': 42, 'subsample': 1.0, 'tol': 0.0001, 'validation_fraction': 0.1