Code from: https://www.analyticsvidhya.com/blog/2018/06/comprehensive-guide-for-ensemble-models/?utm_source=feedburner&utm_medium=email&utm_campaign=Feed%3A+AnalyticsVidhya+%28Analytics+Vidhya%29 (with minor changes)

## Simple Ensemble Techniques
### Max Voting Ensemble

In [60]:
from sklearn.datasets import load_iris
import numpy as np

# Load dataset and define train and test
iris = load_iris()
x = iris.data
y = iris.target

test_ratio = 0.3
instances = x.shape[0]
test_instances = int(instances * test_ratio)

index_arr = np.arange(instances)
np.random.shuffle(index_arr)

random_test_inst = index_arr[:test_instances]
random_train_inst = index_arr[test_instances:]

x_train = x[random_train_inst]
y_train = y[random_train_inst]
x_test = x[random_test_inst]
y_test = y[random_test_inst]

In [61]:
from sklearn import tree
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from scipy.stats import mode

# Train models and perform prediction
model1 = tree.DecisionTreeClassifier()
model2 = KNeighborsClassifier()
model3 = LogisticRegression()

model1.fit(x_train,y_train)
model2.fit(x_train,y_train)
model3.fit(x_train,y_train)

pred1=model1.predict(x_test)
pred2=model2.predict(x_test)
pred3=model3.predict(x_test)

# Max Voting Ensemble
final_pred = np.array([])
for i in range(0,len(x_test)):
    final_pred = np.append(final_pred, mode([pred1[i], pred2[i], pred3[i]])[0])

In [62]:
from sklearn.metrics import accuracy_score

accuracy_ens = accuracy_score(y_test, final_pred)
print(accuracy_ens)

accuracy_model1 = accuracy_score(y_test, pred1)
print(accuracy_model1)

accuracy_model2 = accuracy_score(y_test, pred2)
print(accuracy_model2)

accuracy_model3 = accuracy_score(y_test, pred3)
print(accuracy_model3)

0.9555555555555556
0.9333333333333333
0.9333333333333333
0.9777777777777777


#### Using 'VotingClassifier' module

In [63]:
from sklearn.ensemble import VotingClassifier
model1 = LogisticRegression(random_state=1)
model2 = tree.DecisionTreeClassifier(random_state=1)
model = VotingClassifier(estimators=[('lr', model1), ('dt', model2)], voting='hard')
model.fit(x_train,y_train)
model.score(x_test,y_test)

  if diff:


0.9333333333333333

### Averaging

In [65]:
model1 = tree.DecisionTreeClassifier()
model2 = KNeighborsClassifier()
model3= LogisticRegression()

model1.fit(x_train,y_train)
model2.fit(x_train,y_train)
model3.fit(x_train,y_train)

pred1=model1.predict_proba(x_test)
pred2=model2.predict_proba(x_test)
pred3=model3.predict_proba(x_test)

finalpred=(pred1+pred2+pred3)/3

### Weighted Average

In [66]:
model1 = tree.DecisionTreeClassifier()
model2 = KNeighborsClassifier()
model3= LogisticRegression()

model1.fit(x_train,y_train)
model2.fit(x_train,y_train)
model3.fit(x_train,y_train)

pred1=model1.predict_proba(x_test)
pred2=model2.predict_proba(x_test)
pred3=model3.predict_proba(x_test)

finalpred=(pred1*0.3+pred2*0.3+pred3*0.4)

## Advanced Ensemble Techniques
### Stacking

In [106]:
from sklearn.model_selection import StratifiedKFold

def Stacking(model, train, y, test, n_fold):
   folds = StratifiedKFold(n_splits=n_fold, random_state=1)
   test_pred = np.empty((0, 1), float)
   train_pred = np.empty((0, 1), float)

   for train_indices, val_indices in folds.split(train, y):
      x_train, x_val = train[train_indices], train[val_indices]
      y_train, y_val = y[train_indices], y[val_indices]

      model.fit(X=x_train, y=y_train)
      train_pred = np.append(train_pred, model.predict(x_val))
   
   test_pred = np.append(test_pred, model.predict(test))
   return test_pred.reshape(-1,1), train_pred

In [109]:
import pandas as pd
model1 = tree.DecisionTreeClassifier(random_state=1)

test_pred1, train_pred1 = Stacking(model=model1, n_fold=10, train=x_train, test=x_test, y=y_train)

train_pred1 = pd.DataFrame(train_pred1)
test_pred1 = pd.DataFrame(test_pred1)

In [111]:
model2 = KNeighborsClassifier()

test_pred2 ,train_pred2=Stacking(model=model2,n_fold=10,train=x_train,test=x_test,y=y_train)

train_pred2=pd.DataFrame(train_pred2)
test_pred2=pd.DataFrame(test_pred2)

In [112]:
# Logistic regression model on the predictions of DT and KNN
df = pd.concat([train_pred1, train_pred2], axis=1)
df_test = pd.concat([test_pred1, test_pred2], axis=1)

model = LogisticRegression(random_state=1)
model.fit(df,y_train)
model.score(df_test, y_test)

0.6222222222222222

### Blending

In [116]:
val_ratio = 0.3
instances = x_train.shape[0]
val_instances = int(instances * val_ratio)

index_arr = np.arange(instances)
np.random.shuffle(index_arr)

random_val_inst = index_arr[:val_instances]
random_train_inst = index_arr[val_instances:]

x_train_new = x_train[random_train_inst]
y_train_new = y_train[random_train_inst]
x_val = x_train[random_val_inst]
y_val = y_train[random_val_inst]
x_train = x_train_new
y_train = y_train_new

(74, 4) (74,) (31, 4) (31,) (45, 4) (45,)


In [117]:
model1 = tree.DecisionTreeClassifier()
model1.fit(x_train, y_train)
val_pred1=model1.predict(x_val)
test_pred1=model1.predict(x_test)
val_pred1=pd.DataFrame(val_pred1)
test_pred1=pd.DataFrame(test_pred1)

model2 = KNeighborsClassifier()
model2.fit(x_train,y_train)
val_pred2=model2.predict(x_val)
test_pred2=model2.predict(x_test)
val_pred2=pd.DataFrame(val_pred2)
test_pred2=pd.DataFrame(test_pred2)

In [129]:
x_val = pd.DataFrame(x_val)
x_test = pd.DataFrame(x_test)
df_val=pd.concat([x_val, val_pred1,val_pred2],axis=1)
df_test=pd.concat([x_test, test_pred1,test_pred2],axis=1)

model = LogisticRegression()
model.fit(df_val,y_val)
model.score(df_test,y_test)

(45, 6) (185,)


### Bagging

### Boosting

--------------------------------------------------------------------------------------------------------------------

In [1]:
#importing important packages
import pandas as pd
import numpy as np

#reading the dataset
df=pd.read_csv("data/train_u6lujuX_CVtuZ9i.csv")

#filling missing values
df['Gender'].fillna('Male', inplace=True)
df['Married'].fillna('No', inplace=True)
df['Dependents'].fillna('0', inplace=True)
df['Self_Employed'].fillna('No', inplace=True)
df['LoanAmount'].fillna(0, inplace=True)
df['Loan_Amount_Term'].fillna(0, inplace=True)
df['Credit_History'].fillna(0, inplace=True)

df.drop('Loan_ID', axis=1, inplace=True)

In [2]:
#split dataset into train and test
from sklearn.model_selection import train_test_split

train, test = train_test_split(df, test_size=0.3, random_state=0)

x_train=train.drop('Loan_Status',axis=1)
y_train=train['Loan_Status']

x_test=test.drop('Loan_Status',axis=1)
y_test=test['Loan_Status']

#create dummies
x_train=pd.get_dummies(x_train)
x_test=pd.get_dummies(x_test)

y_train = np.where(y_train=='Y', 1, 0)
y_test = np.where(y_test=='Y', 1, 0)

print(x_train.shape, y_train.shape, x_test.shape, y_test.shape)

(429, 20) (429,) (185, 20) (185,)


#### Bagging meta-estimator

In [33]:
from sklearn.ensemble import BaggingClassifier
model = BaggingClassifier(tree.DecisionTreeClassifier(random_state=1))
model.fit(x_train, y_train)
model.score(x_test, y_test)

0.7513513513513513

In [36]:
from sklearn.ensemble import BaggingRegressor
model = BaggingRegressor(tree.DecisionTreeClassifier(random_state=1))
model.fit(x_train, y_train)
model.score(x_test, y_test)

0.04062042727538784

#### Random Forest

In [37]:
from sklearn.ensemble import RandomForestClassifier
model= RandomForestClassifier(random_state=1)
model.fit(x_train, y_train)
model.score(x_test,y_test)

0.7297297297297297

In [38]:
for i, j in sorted(zip(x_train.columns, model.feature_importances_)):
    print(i, j)

ApplicantIncome 0.20619915810055023
CoapplicantIncome 0.14023302896973128
Credit_History 0.14424378147061429
Dependents_0 0.022812759081104066
Dependents_1 0.011017420432025693
Dependents_2 0.015626281908906588
Dependents_3+ 0.015899769762191936
Education_Graduate 0.016446809179378307
Education_Not Graduate 0.020918235446052865
Gender_Female 0.013506339083008958
Gender_Male 0.009643762358149385
LoanAmount 0.17681775377669445
Loan_Amount_Term 0.039840002006671565
Married_No 0.0170681424311086
Married_Yes 0.028655495091033933
Property_Area_Rural 0.03288496917994647
Property_Area_Semiurban 0.022161855006493036
Property_Area_Urban 0.026975470377103638
Self_Employed_No 0.021349307176709527
Self_Employed_Yes 0.017699659162525166


In [39]:
from sklearn.ensemble import RandomForestRegressor
model= RandomForestRegressor()
model.fit(x_train, y_train)
model.score(x_test,y_test)

0.06308896693005561

#### AdaBoost

In [7]:
from sklearn.ensemble import AdaBoostClassifier
model = AdaBoostClassifier(random_state=1)
model.fit(x_train, y_train)
model.score(x_test,y_test)

0.7513513513513513

In [8]:
from sklearn.ensemble import AdaBoostRegressor
model = AdaBoostRegressor()
model.fit(x_train, y_train)
model.score(x_test,y_test)

0.15284203485259995

#### Gradient Boosting

In [18]:
from sklearn.ensemble import GradientBoostingClassifier
model= GradientBoostingClassifier(learning_rate=0.01,random_state=1)
model.fit(x_train, y_train)
model.score(x_test,y_test)

0.7837837837837838

In [19]:
from sklearn.ensemble import GradientBoostingRegressor
model= GradientBoostingRegressor()
model.fit(x_train, y_train)
model.score(x_test,y_test)

0.14115065271014282

#### XGBoost

In [7]:
import xgboost as xgb
model=xgb.XGBClassifier(random_state=1,learning_rate=0.01)
model.fit(x_train, y_train)
model.score(x_test,y_test)

  if diff:


0.7891891891891892

In [8]:
import xgboost as xgb
model=xgb.XGBRegressor()
model.fit(x_train, y_train)
model.score(x_test,y_test)

0.10637732935087396

#### Light GBM

In [11]:
import lightgbm as lgb
train_data=lgb.Dataset(x_train,label=y_train)
#define parameters
params = {'learning_rate':0.001}
model= lgb.train(params, train_data, 100) 
y_pred=model.predict(x_test)
for i in range(0,185):
    if y_pred[i]>=0.5: 
       y_pred[i]=1
    else: 
       y_pred[i]=0

In [13]:
import lightgbm as lgb
train_data=lgb.Dataset(x_train,label=y_train)
params = {'learning_rate':0.001}
model= lgb.train(params, train_data, 100)
from sklearn.metrics import mean_squared_error
rmse=mean_squared_error(y_pred,y_test)**0.5
print(rmse)

0.5250482603301106


#### CatBoost

In [22]:
x_train=train.drop('Loan_Status',axis=1)
x_test=test.drop('Loan_Status',axis=1)

In [23]:
x_train.head()

Unnamed: 0,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area
351,Male,No,0,Graduate,No,8750,4167.0,308.0,360.0,1.0,Rural
593,Male,Yes,0,Graduate,No,3859,3300.0,142.0,180.0,1.0,Rural
320,Male,Yes,0,Graduate,No,2400,2167.0,115.0,360.0,1.0,Semiurban
186,Male,Yes,1,Graduate,Yes,2178,0.0,66.0,300.0,0.0,Rural
557,Male,Yes,3+,Graduate,Yes,10139,0.0,260.0,360.0,1.0,Semiurban


In [26]:
categorical_features_indices = np.where(x_train.dtypes != np.float)[0]
print(categorical_features_indices)

[ 0  1  2  3  4  5 10]


In [27]:
from catboost import CatBoostClassifier
model=CatBoostClassifier()
categorical_features_indices = np.where(x_train.dtypes != np.float)[0]
model.fit(x_train,y_train,cat_features=categorical_features_indices, eval_set=(x_test, y_test))
model.score(x_test,y_test)

0:	learn: 0.6837884	test: 0.6825501	best: 0.6825501 (0)	total: 81.5ms	remaining: 1m 21s
1:	learn: 0.6733362	test: 0.6697444	best: 0.6697444 (1)	total: 139ms	remaining: 1m 9s
2:	learn: 0.6644604	test: 0.6604474	best: 0.6604474 (2)	total: 162ms	remaining: 53.7s
3:	learn: 0.6547385	test: 0.6502114	best: 0.6502114 (3)	total: 196ms	remaining: 48.9s
4:	learn: 0.6465910	test: 0.6412028	best: 0.6412028 (4)	total: 224ms	remaining: 44.5s
5:	learn: 0.6398984	test: 0.6335822	best: 0.6335822 (5)	total: 240ms	remaining: 39.7s
6:	learn: 0.6334073	test: 0.6264964	best: 0.6264964 (6)	total: 254ms	remaining: 36s
7:	learn: 0.6271836	test: 0.6196943	best: 0.6196943 (7)	total: 273ms	remaining: 33.9s
8:	learn: 0.6216131	test: 0.6133325	best: 0.6133325 (8)	total: 304ms	remaining: 33.5s
9:	learn: 0.6164434	test: 0.6073516	best: 0.6073516 (9)	total: 320ms	remaining: 31.7s
10:	learn: 0.6096109	test: 0.6008197	best: 0.6008197 (10)	total: 349ms	remaining: 31.3s
11:	learn: 0.6038913	test: 0.5937371	best: 0.5937371

97:	learn: 0.4780884	test: 0.4959712	best: 0.4959712 (97)	total: 3.19s	remaining: 29.3s
98:	learn: 0.4770673	test: 0.4957639	best: 0.4957639 (98)	total: 3.23s	remaining: 29.4s
99:	learn: 0.4765022	test: 0.4959144	best: 0.4957639 (98)	total: 3.26s	remaining: 29.4s
100:	learn: 0.4761468	test: 0.4955838	best: 0.4955838 (100)	total: 3.28s	remaining: 29.2s
101:	learn: 0.4755722	test: 0.4955084	best: 0.4955084 (101)	total: 3.29s	remaining: 29s
102:	learn: 0.4742997	test: 0.4946791	best: 0.4946791 (102)	total: 3.31s	remaining: 28.9s
103:	learn: 0.4735963	test: 0.4945574	best: 0.4945574 (103)	total: 3.34s	remaining: 28.8s
104:	learn: 0.4708967	test: 0.4939792	best: 0.4939792 (104)	total: 3.38s	remaining: 28.8s
105:	learn: 0.4706998	test: 0.4939713	best: 0.4939713 (105)	total: 3.4s	remaining: 28.6s
106:	learn: 0.4693903	test: 0.4935094	best: 0.4935094 (106)	total: 3.44s	remaining: 28.7s
107:	learn: 0.4682609	test: 0.4933583	best: 0.4933583 (107)	total: 3.46s	remaining: 28.6s
108:	learn: 0.46818

190:	learn: 0.4133501	test: 0.4942593	best: 0.4901148 (124)	total: 5.53s	remaining: 23.4s
191:	learn: 0.4132848	test: 0.4941890	best: 0.4901148 (124)	total: 5.55s	remaining: 23.4s
192:	learn: 0.4115833	test: 0.4934211	best: 0.4901148 (124)	total: 5.62s	remaining: 23.5s
193:	learn: 0.4110633	test: 0.4931792	best: 0.4901148 (124)	total: 5.68s	remaining: 23.6s
194:	learn: 0.4106996	test: 0.4936117	best: 0.4901148 (124)	total: 5.74s	remaining: 23.7s
195:	learn: 0.4097725	test: 0.4926622	best: 0.4901148 (124)	total: 5.81s	remaining: 23.8s
196:	learn: 0.4091379	test: 0.4925683	best: 0.4901148 (124)	total: 5.84s	remaining: 23.8s
197:	learn: 0.4083970	test: 0.4927771	best: 0.4901148 (124)	total: 5.87s	remaining: 23.8s
198:	learn: 0.4069303	test: 0.4923612	best: 0.4901148 (124)	total: 5.89s	remaining: 23.7s
199:	learn: 0.4057510	test: 0.4925029	best: 0.4901148 (124)	total: 5.92s	remaining: 23.7s
200:	learn: 0.4053815	test: 0.4926785	best: 0.4901148 (124)	total: 5.96s	remaining: 23.7s
201:	learn

290:	learn: 0.3354897	test: 0.4983228	best: 0.4901148 (124)	total: 11.2s	remaining: 27.3s
291:	learn: 0.3344211	test: 0.4989958	best: 0.4901148 (124)	total: 11.2s	remaining: 27.3s
292:	learn: 0.3336485	test: 0.4992694	best: 0.4901148 (124)	total: 11.3s	remaining: 27.2s
293:	learn: 0.3333526	test: 0.4992909	best: 0.4901148 (124)	total: 11.3s	remaining: 27.1s
294:	learn: 0.3328564	test: 0.4995171	best: 0.4901148 (124)	total: 11.3s	remaining: 27.1s
295:	learn: 0.3321523	test: 0.4998136	best: 0.4901148 (124)	total: 11.3s	remaining: 27s
296:	learn: 0.3313303	test: 0.4996895	best: 0.4901148 (124)	total: 11.4s	remaining: 26.9s
297:	learn: 0.3307184	test: 0.4997103	best: 0.4901148 (124)	total: 11.4s	remaining: 26.8s
298:	learn: 0.3302710	test: 0.5001509	best: 0.4901148 (124)	total: 11.4s	remaining: 26.8s
299:	learn: 0.3299720	test: 0.5003613	best: 0.4901148 (124)	total: 11.4s	remaining: 26.7s
300:	learn: 0.3291567	test: 0.5002202	best: 0.4901148 (124)	total: 11.5s	remaining: 26.6s
301:	learn: 

389:	learn: 0.2851814	test: 0.5083960	best: 0.4901148 (124)	total: 13.7s	remaining: 21.4s
390:	learn: 0.2844979	test: 0.5084580	best: 0.4901148 (124)	total: 13.7s	remaining: 21.3s
391:	learn: 0.2841554	test: 0.5084696	best: 0.4901148 (124)	total: 13.7s	remaining: 21.3s
392:	learn: 0.2837937	test: 0.5082015	best: 0.4901148 (124)	total: 13.7s	remaining: 21.2s
393:	learn: 0.2833553	test: 0.5084295	best: 0.4901148 (124)	total: 13.8s	remaining: 21.2s
394:	learn: 0.2830648	test: 0.5084236	best: 0.4901148 (124)	total: 13.8s	remaining: 21.1s
395:	learn: 0.2828065	test: 0.5086146	best: 0.4901148 (124)	total: 13.8s	remaining: 21.1s
396:	learn: 0.2820609	test: 0.5087530	best: 0.4901148 (124)	total: 13.8s	remaining: 21s
397:	learn: 0.2817541	test: 0.5085562	best: 0.4901148 (124)	total: 13.8s	remaining: 20.9s
398:	learn: 0.2812997	test: 0.5088784	best: 0.4901148 (124)	total: 13.9s	remaining: 20.9s
399:	learn: 0.2811469	test: 0.5088443	best: 0.4901148 (124)	total: 13.9s	remaining: 20.9s
400:	learn: 

482:	learn: 0.2447462	test: 0.5209339	best: 0.4901148 (124)	total: 17.2s	remaining: 18.4s
483:	learn: 0.2443204	test: 0.5214303	best: 0.4901148 (124)	total: 17.3s	remaining: 18.4s
484:	learn: 0.2439188	test: 0.5217494	best: 0.4901148 (124)	total: 17.3s	remaining: 18.3s
485:	learn: 0.2435574	test: 0.5217401	best: 0.4901148 (124)	total: 17.3s	remaining: 18.3s
486:	learn: 0.2431057	test: 0.5213968	best: 0.4901148 (124)	total: 17.3s	remaining: 18.2s
487:	learn: 0.2428526	test: 0.5210034	best: 0.4901148 (124)	total: 17.3s	remaining: 18.2s
488:	learn: 0.2426630	test: 0.5208987	best: 0.4901148 (124)	total: 17.4s	remaining: 18.1s
489:	learn: 0.2425569	test: 0.5211285	best: 0.4901148 (124)	total: 17.4s	remaining: 18.1s
490:	learn: 0.2419327	test: 0.5213192	best: 0.4901148 (124)	total: 17.4s	remaining: 18s
491:	learn: 0.2417182	test: 0.5214405	best: 0.4901148 (124)	total: 17.4s	remaining: 18s
492:	learn: 0.2409790	test: 0.5217509	best: 0.4901148 (124)	total: 17.5s	remaining: 18s
493:	learn: 0.24

579:	learn: 0.2110340	test: 0.5306735	best: 0.4901148 (124)	total: 19.7s	remaining: 14.3s
580:	learn: 0.2109179	test: 0.5304280	best: 0.4901148 (124)	total: 19.7s	remaining: 14.2s
581:	learn: 0.2105376	test: 0.5306356	best: 0.4901148 (124)	total: 19.7s	remaining: 14.2s
582:	learn: 0.2101243	test: 0.5304952	best: 0.4901148 (124)	total: 19.8s	remaining: 14.1s
583:	learn: 0.2095914	test: 0.5305404	best: 0.4901148 (124)	total: 19.8s	remaining: 14.1s
584:	learn: 0.2095461	test: 0.5304932	best: 0.4901148 (124)	total: 19.8s	remaining: 14.1s
585:	learn: 0.2091778	test: 0.5303654	best: 0.4901148 (124)	total: 19.9s	remaining: 14s
586:	learn: 0.2090765	test: 0.5303419	best: 0.4901148 (124)	total: 19.9s	remaining: 14s
587:	learn: 0.2087508	test: 0.5306583	best: 0.4901148 (124)	total: 20s	remaining: 14s
588:	learn: 0.2085799	test: 0.5307115	best: 0.4901148 (124)	total: 20.1s	remaining: 14s
589:	learn: 0.2083547	test: 0.5309414	best: 0.4901148 (124)	total: 20.1s	remaining: 14s
590:	learn: 0.2081382	

673:	learn: 0.1843831	test: 0.5404312	best: 0.4901148 (124)	total: 23.4s	remaining: 11.3s
674:	learn: 0.1841087	test: 0.5396397	best: 0.4901148 (124)	total: 23.4s	remaining: 11.3s
675:	learn: 0.1838477	test: 0.5399854	best: 0.4901148 (124)	total: 23.5s	remaining: 11.2s
676:	learn: 0.1836555	test: 0.5399991	best: 0.4901148 (124)	total: 23.5s	remaining: 11.2s
677:	learn: 0.1833910	test: 0.5403428	best: 0.4901148 (124)	total: 23.6s	remaining: 11.2s
678:	learn: 0.1832718	test: 0.5401000	best: 0.4901148 (124)	total: 23.7s	remaining: 11.2s
679:	learn: 0.1831492	test: 0.5402768	best: 0.4901148 (124)	total: 23.8s	remaining: 11.2s
680:	learn: 0.1828324	test: 0.5401703	best: 0.4901148 (124)	total: 23.9s	remaining: 11.2s
681:	learn: 0.1824694	test: 0.5400754	best: 0.4901148 (124)	total: 24s	remaining: 11.2s
682:	learn: 0.1823230	test: 0.5398517	best: 0.4901148 (124)	total: 24.1s	remaining: 11.2s
683:	learn: 0.1819215	test: 0.5398145	best: 0.4901148 (124)	total: 24.2s	remaining: 11.2s
684:	learn: 

772:	learn: 0.1613962	test: 0.5477879	best: 0.4901148 (124)	total: 26.9s	remaining: 7.91s
773:	learn: 0.1610858	test: 0.5479235	best: 0.4901148 (124)	total: 27s	remaining: 7.87s
774:	learn: 0.1609363	test: 0.5477023	best: 0.4901148 (124)	total: 27s	remaining: 7.83s
775:	learn: 0.1605468	test: 0.5481229	best: 0.4901148 (124)	total: 27s	remaining: 7.8s
776:	learn: 0.1601799	test: 0.5483797	best: 0.4901148 (124)	total: 27s	remaining: 7.76s
777:	learn: 0.1600658	test: 0.5485861	best: 0.4901148 (124)	total: 27.1s	remaining: 7.72s
778:	learn: 0.1599554	test: 0.5484704	best: 0.4901148 (124)	total: 27.1s	remaining: 7.68s
779:	learn: 0.1598564	test: 0.5486404	best: 0.4901148 (124)	total: 27.1s	remaining: 7.64s
780:	learn: 0.1595437	test: 0.5484999	best: 0.4901148 (124)	total: 27.1s	remaining: 7.61s
781:	learn: 0.1593101	test: 0.5483455	best: 0.4901148 (124)	total: 27.2s	remaining: 7.57s
782:	learn: 0.1591210	test: 0.5483283	best: 0.4901148 (124)	total: 27.2s	remaining: 7.53s
783:	learn: 0.15885

871:	learn: 0.1441965	test: 0.5519489	best: 0.4901148 (124)	total: 29.3s	remaining: 4.3s
872:	learn: 0.1440886	test: 0.5519998	best: 0.4901148 (124)	total: 29.4s	remaining: 4.27s
873:	learn: 0.1440466	test: 0.5520153	best: 0.4901148 (124)	total: 29.4s	remaining: 4.24s
874:	learn: 0.1438656	test: 0.5517199	best: 0.4901148 (124)	total: 29.4s	remaining: 4.2s
875:	learn: 0.1438132	test: 0.5517063	best: 0.4901148 (124)	total: 29.4s	remaining: 4.17s
876:	learn: 0.1436158	test: 0.5517998	best: 0.4901148 (124)	total: 29.4s	remaining: 4.13s
877:	learn: 0.1434269	test: 0.5515874	best: 0.4901148 (124)	total: 29.5s	remaining: 4.09s
878:	learn: 0.1431937	test: 0.5519380	best: 0.4901148 (124)	total: 29.5s	remaining: 4.06s
879:	learn: 0.1430231	test: 0.5521143	best: 0.4901148 (124)	total: 29.5s	remaining: 4.03s
880:	learn: 0.1428787	test: 0.5521241	best: 0.4901148 (124)	total: 29.5s	remaining: 3.99s
881:	learn: 0.1427691	test: 0.5520732	best: 0.4901148 (124)	total: 29.6s	remaining: 3.96s
882:	learn: 

967:	learn: 0.1282636	test: 0.5604622	best: 0.4901148 (124)	total: 31.7s	remaining: 1.05s
968:	learn: 0.1281354	test: 0.5603648	best: 0.4901148 (124)	total: 31.7s	remaining: 1.01s
969:	learn: 0.1279950	test: 0.5608726	best: 0.4901148 (124)	total: 31.8s	remaining: 983ms
970:	learn: 0.1278942	test: 0.5609685	best: 0.4901148 (124)	total: 31.8s	remaining: 950ms
971:	learn: 0.1278038	test: 0.5606926	best: 0.4901148 (124)	total: 31.8s	remaining: 917ms
972:	learn: 0.1277147	test: 0.5607246	best: 0.4901148 (124)	total: 31.9s	remaining: 885ms
973:	learn: 0.1275713	test: 0.5609766	best: 0.4901148 (124)	total: 32s	remaining: 854ms
974:	learn: 0.1273000	test: 0.5610740	best: 0.4901148 (124)	total: 32.1s	remaining: 823ms
975:	learn: 0.1272243	test: 0.5611721	best: 0.4901148 (124)	total: 32.1s	remaining: 790ms
976:	learn: 0.1270517	test: 0.5615232	best: 0.4901148 (124)	total: 32.2s	remaining: 757ms
977:	learn: 0.1268886	test: 0.5614117	best: 0.4901148 (124)	total: 32.2s	remaining: 724ms
978:	learn: 

0.7891891891891892

In [28]:
from catboost import CatBoostRegressor
model=CatBoostRegressor()
categorical_features_indices = np.where(df.dtypes != np.float)[0]
model.fit(x_train,y_train,cat_features=([ 0,  1, 2, 3, 4, 10]),eval_set=(x_test, y_test))
model.score(x_test,y_test)

0:	learn: 0.8030489	test: 0.8341960	best: 0.8341960 (0)	total: 15.4ms	remaining: 15.3s
1:	learn: 0.7868970	test: 0.8177468	best: 0.8177468 (1)	total: 62.7ms	remaining: 31.3s
2:	learn: 0.7717693	test: 0.8019591	best: 0.8019591 (2)	total: 71.1ms	remaining: 23.6s
3:	learn: 0.7576954	test: 0.7871626	best: 0.7871626 (3)	total: 94.9ms	remaining: 23.6s
4:	learn: 0.7430431	test: 0.7715504	best: 0.7715504 (4)	total: 126ms	remaining: 25.2s
5:	learn: 0.7293368	test: 0.7575065	best: 0.7575065 (5)	total: 172ms	remaining: 28.5s
6:	learn: 0.7152969	test: 0.7427121	best: 0.7427121 (6)	total: 195ms	remaining: 27.7s
7:	learn: 0.7029930	test: 0.7297685	best: 0.7297685 (7)	total: 209ms	remaining: 26s
8:	learn: 0.6896986	test: 0.7155354	best: 0.7155354 (8)	total: 217ms	remaining: 23.9s
9:	learn: 0.6782278	test: 0.7033718	best: 0.7033718 (9)	total: 221ms	remaining: 21.9s
10:	learn: 0.6672501	test: 0.6916940	best: 0.6916940 (10)	total: 228ms	remaining: 20.5s
11:	learn: 0.6567240	test: 0.6805296	best: 0.68052

97:	learn: 0.4148852	test: 0.4115061	best: 0.4115061 (97)	total: 1.55s	remaining: 14.3s
98:	learn: 0.4146042	test: 0.4110898	best: 0.4110898 (98)	total: 1.56s	remaining: 14.2s
99:	learn: 0.4143554	test: 0.4108184	best: 0.4108184 (99)	total: 1.57s	remaining: 14.2s
100:	learn: 0.4142707	test: 0.4106367	best: 0.4106367 (100)	total: 1.58s	remaining: 14.1s
101:	learn: 0.4141911	test: 0.4104628	best: 0.4104628 (101)	total: 1.59s	remaining: 14s
102:	learn: 0.4137849	test: 0.4101670	best: 0.4101670 (102)	total: 1.62s	remaining: 14.1s
103:	learn: 0.4131554	test: 0.4097579	best: 0.4097579 (103)	total: 1.64s	remaining: 14.1s
104:	learn: 0.4130890	test: 0.4096076	best: 0.4096076 (104)	total: 1.65s	remaining: 14s
105:	learn: 0.4128390	test: 0.4093263	best: 0.4093263 (105)	total: 1.66s	remaining: 14s
106:	learn: 0.4126071	test: 0.4090632	best: 0.4090632 (106)	total: 1.67s	remaining: 14s
107:	learn: 0.4125512	test: 0.4089291	best: 0.4089291 (107)	total: 1.68s	remaining: 13.9s
108:	learn: 0.4120286	te

195:	learn: 0.3907655	test: 0.4037998	best: 0.4034811 (194)	total: 3.19s	remaining: 13.1s
196:	learn: 0.3904442	test: 0.4038725	best: 0.4034811 (194)	total: 3.22s	remaining: 13.1s
197:	learn: 0.3900906	test: 0.4038574	best: 0.4034811 (194)	total: 3.24s	remaining: 13.1s
198:	learn: 0.3898585	test: 0.4037899	best: 0.4034811 (194)	total: 3.26s	remaining: 13.1s
199:	learn: 0.3894281	test: 0.4039347	best: 0.4034811 (194)	total: 3.29s	remaining: 13.1s
200:	learn: 0.3891560	test: 0.4039601	best: 0.4034811 (194)	total: 3.31s	remaining: 13.1s
201:	learn: 0.3889926	test: 0.4039398	best: 0.4034811 (194)	total: 3.32s	remaining: 13.1s
202:	learn: 0.3887654	test: 0.4039124	best: 0.4034811 (194)	total: 3.34s	remaining: 13.1s
203:	learn: 0.3886684	test: 0.4039476	best: 0.4034811 (194)	total: 3.36s	remaining: 13.1s
204:	learn: 0.3884299	test: 0.4039187	best: 0.4034811 (194)	total: 3.38s	remaining: 13.1s
205:	learn: 0.3877578	test: 0.4040098	best: 0.4034811 (194)	total: 3.42s	remaining: 13.2s
206:	learn

294:	learn: 0.3558534	test: 0.4037230	best: 0.4033697 (226)	total: 5.33s	remaining: 12.7s
295:	learn: 0.3551362	test: 0.4037797	best: 0.4033697 (226)	total: 5.36s	remaining: 12.8s
296:	learn: 0.3543011	test: 0.4038167	best: 0.4033697 (226)	total: 5.38s	remaining: 12.7s
297:	learn: 0.3540875	test: 0.4039932	best: 0.4033697 (226)	total: 5.41s	remaining: 12.7s
298:	learn: 0.3539552	test: 0.4038413	best: 0.4033697 (226)	total: 5.42s	remaining: 12.7s
299:	learn: 0.3536746	test: 0.4037909	best: 0.4033697 (226)	total: 5.45s	remaining: 12.7s
300:	learn: 0.3532092	test: 0.4036999	best: 0.4033697 (226)	total: 5.47s	remaining: 12.7s
301:	learn: 0.3528182	test: 0.4039250	best: 0.4033697 (226)	total: 5.48s	remaining: 12.7s
302:	learn: 0.3526173	test: 0.4039340	best: 0.4033697 (226)	total: 5.5s	remaining: 12.7s
303:	learn: 0.3518945	test: 0.4040424	best: 0.4033697 (226)	total: 5.52s	remaining: 12.6s
304:	learn: 0.3514305	test: 0.4040637	best: 0.4033697 (226)	total: 5.55s	remaining: 12.7s
305:	learn:

392:	learn: 0.3282755	test: 0.4056909	best: 0.4033697 (226)	total: 7.45s	remaining: 11.5s
393:	learn: 0.3278650	test: 0.4056251	best: 0.4033697 (226)	total: 7.48s	remaining: 11.5s
394:	learn: 0.3275272	test: 0.4056792	best: 0.4033697 (226)	total: 7.52s	remaining: 11.5s
395:	learn: 0.3271734	test: 0.4055429	best: 0.4033697 (226)	total: 7.55s	remaining: 11.5s
396:	learn: 0.3269920	test: 0.4056492	best: 0.4033697 (226)	total: 7.59s	remaining: 11.5s
397:	learn: 0.3264709	test: 0.4057737	best: 0.4033697 (226)	total: 7.63s	remaining: 11.5s
398:	learn: 0.3263694	test: 0.4058512	best: 0.4033697 (226)	total: 7.67s	remaining: 11.6s
399:	learn: 0.3260954	test: 0.4058079	best: 0.4033697 (226)	total: 7.71s	remaining: 11.6s
400:	learn: 0.3253269	test: 0.4058850	best: 0.4033697 (226)	total: 7.73s	remaining: 11.5s
401:	learn: 0.3251989	test: 0.4058427	best: 0.4033697 (226)	total: 7.75s	remaining: 11.5s
402:	learn: 0.3249633	test: 0.4057887	best: 0.4033697 (226)	total: 7.77s	remaining: 11.5s
403:	learn

487:	learn: 0.3054029	test: 0.4062911	best: 0.4033697 (226)	total: 9.6s	remaining: 10.1s
488:	learn: 0.3048926	test: 0.4063639	best: 0.4033697 (226)	total: 9.62s	remaining: 10.1s
489:	learn: 0.3046211	test: 0.4062215	best: 0.4033697 (226)	total: 9.64s	remaining: 10s
490:	learn: 0.3043981	test: 0.4063145	best: 0.4033697 (226)	total: 9.66s	remaining: 10s
491:	learn: 0.3041314	test: 0.4062480	best: 0.4033697 (226)	total: 9.68s	remaining: 10s
492:	learn: 0.3040717	test: 0.4062934	best: 0.4033697 (226)	total: 9.7s	remaining: 9.98s
493:	learn: 0.3037529	test: 0.4062721	best: 0.4033697 (226)	total: 9.72s	remaining: 9.96s
494:	learn: 0.3036824	test: 0.4063202	best: 0.4033697 (226)	total: 9.74s	remaining: 9.94s
495:	learn: 0.3034561	test: 0.4062251	best: 0.4033697 (226)	total: 9.76s	remaining: 9.92s
496:	learn: 0.3031746	test: 0.4063297	best: 0.4033697 (226)	total: 9.78s	remaining: 9.89s
497:	learn: 0.3030200	test: 0.4063976	best: 0.4033697 (226)	total: 9.8s	remaining: 9.88s
498:	learn: 0.30288

587:	learn: 0.2852745	test: 0.4079764	best: 0.4033697 (226)	total: 12.3s	remaining: 8.63s
588:	learn: 0.2849701	test: 0.4081812	best: 0.4033697 (226)	total: 12.3s	remaining: 8.62s
589:	learn: 0.2848473	test: 0.4081755	best: 0.4033697 (226)	total: 12.4s	remaining: 8.59s
590:	learn: 0.2846863	test: 0.4081855	best: 0.4033697 (226)	total: 12.4s	remaining: 8.57s
591:	learn: 0.2845345	test: 0.4081481	best: 0.4033697 (226)	total: 12.4s	remaining: 8.55s
592:	learn: 0.2844898	test: 0.4081926	best: 0.4033697 (226)	total: 12.4s	remaining: 8.53s
593:	learn: 0.2843974	test: 0.4082787	best: 0.4033697 (226)	total: 12.5s	remaining: 8.51s
594:	learn: 0.2843035	test: 0.4082808	best: 0.4033697 (226)	total: 12.5s	remaining: 8.49s
595:	learn: 0.2842212	test: 0.4082090	best: 0.4033697 (226)	total: 12.5s	remaining: 8.47s
596:	learn: 0.2840371	test: 0.4082241	best: 0.4033697 (226)	total: 12.5s	remaining: 8.45s
597:	learn: 0.2837705	test: 0.4082885	best: 0.4033697 (226)	total: 12.5s	remaining: 8.43s
598:	learn

685:	learn: 0.2690983	test: 0.4090214	best: 0.4033697 (226)	total: 14.5s	remaining: 6.62s
686:	learn: 0.2690478	test: 0.4090737	best: 0.4033697 (226)	total: 14.5s	remaining: 6.6s
687:	learn: 0.2688451	test: 0.4091959	best: 0.4033697 (226)	total: 14.5s	remaining: 6.58s
688:	learn: 0.2686473	test: 0.4091916	best: 0.4033697 (226)	total: 14.5s	remaining: 6.56s
689:	learn: 0.2685549	test: 0.4091324	best: 0.4033697 (226)	total: 14.6s	remaining: 6.54s
690:	learn: 0.2685281	test: 0.4091136	best: 0.4033697 (226)	total: 14.6s	remaining: 6.52s
691:	learn: 0.2684184	test: 0.4090966	best: 0.4033697 (226)	total: 14.6s	remaining: 6.5s
692:	learn: 0.2683812	test: 0.4091242	best: 0.4033697 (226)	total: 14.6s	remaining: 6.47s
693:	learn: 0.2682629	test: 0.4091358	best: 0.4033697 (226)	total: 14.6s	remaining: 6.45s
694:	learn: 0.2681587	test: 0.4091480	best: 0.4033697 (226)	total: 14.7s	remaining: 6.43s
695:	learn: 0.2680850	test: 0.4092272	best: 0.4033697 (226)	total: 14.7s	remaining: 6.41s
696:	learn: 

785:	learn: 0.2551065	test: 0.4099620	best: 0.4033697 (226)	total: 16.6s	remaining: 4.51s
786:	learn: 0.2549966	test: 0.4100420	best: 0.4033697 (226)	total: 16.6s	remaining: 4.49s
787:	learn: 0.2547930	test: 0.4100305	best: 0.4033697 (226)	total: 16.6s	remaining: 4.47s
788:	learn: 0.2546828	test: 0.4100332	best: 0.4033697 (226)	total: 16.6s	remaining: 4.45s
789:	learn: 0.2544489	test: 0.4099914	best: 0.4033697 (226)	total: 16.6s	remaining: 4.43s
790:	learn: 0.2543264	test: 0.4099944	best: 0.4033697 (226)	total: 16.7s	remaining: 4.4s
791:	learn: 0.2542214	test: 0.4099920	best: 0.4033697 (226)	total: 16.7s	remaining: 4.38s
792:	learn: 0.2541680	test: 0.4099912	best: 0.4033697 (226)	total: 16.7s	remaining: 4.36s
793:	learn: 0.2540195	test: 0.4100318	best: 0.4033697 (226)	total: 16.7s	remaining: 4.34s
794:	learn: 0.2538088	test: 0.4101633	best: 0.4033697 (226)	total: 16.7s	remaining: 4.32s
795:	learn: 0.2537552	test: 0.4101594	best: 0.4033697 (226)	total: 16.8s	remaining: 4.3s
796:	learn: 

885:	learn: 0.2412678	test: 0.4115211	best: 0.4033697 (226)	total: 21.7s	remaining: 2.79s
886:	learn: 0.2411487	test: 0.4114816	best: 0.4033697 (226)	total: 21.7s	remaining: 2.77s
887:	learn: 0.2410348	test: 0.4115187	best: 0.4033697 (226)	total: 21.8s	remaining: 2.75s
888:	learn: 0.2409638	test: 0.4114973	best: 0.4033697 (226)	total: 21.8s	remaining: 2.72s
889:	learn: 0.2408429	test: 0.4114697	best: 0.4033697 (226)	total: 21.8s	remaining: 2.7s
890:	learn: 0.2408105	test: 0.4114881	best: 0.4033697 (226)	total: 21.8s	remaining: 2.67s
891:	learn: 0.2407554	test: 0.4114704	best: 0.4033697 (226)	total: 21.9s	remaining: 2.65s
892:	learn: 0.2406052	test: 0.4114385	best: 0.4033697 (226)	total: 21.9s	remaining: 2.62s
893:	learn: 0.2405639	test: 0.4115273	best: 0.4033697 (226)	total: 21.9s	remaining: 2.6s
894:	learn: 0.2403508	test: 0.4115092	best: 0.4033697 (226)	total: 21.9s	remaining: 2.57s
895:	learn: 0.2403165	test: 0.4115820	best: 0.4033697 (226)	total: 22s	remaining: 2.55s
896:	learn: 0.

983:	learn: 0.2313120	test: 0.4134952	best: 0.4033697 (226)	total: 23.8s	remaining: 387ms
984:	learn: 0.2312492	test: 0.4134489	best: 0.4033697 (226)	total: 23.9s	remaining: 363ms
985:	learn: 0.2312270	test: 0.4134507	best: 0.4033697 (226)	total: 23.9s	remaining: 339ms
986:	learn: 0.2311431	test: 0.4133954	best: 0.4033697 (226)	total: 23.9s	remaining: 315ms
987:	learn: 0.2310413	test: 0.4133825	best: 0.4033697 (226)	total: 23.9s	remaining: 291ms
988:	learn: 0.2309079	test: 0.4133129	best: 0.4033697 (226)	total: 23.9s	remaining: 266ms
989:	learn: 0.2308599	test: 0.4133424	best: 0.4033697 (226)	total: 24s	remaining: 242ms
990:	learn: 0.2308497	test: 0.4133638	best: 0.4033697 (226)	total: 24s	remaining: 218ms
991:	learn: 0.2308209	test: 0.4133808	best: 0.4033697 (226)	total: 24s	remaining: 194ms
992:	learn: 0.2306568	test: 0.4133449	best: 0.4033697 (226)	total: 24s	remaining: 169ms
993:	learn: 0.2304716	test: 0.4134612	best: 0.4033697 (226)	total: 24s	remaining: 145ms
994:	learn: 0.230418

0.4033696885474043