In [1]:
import pandas as pd
import numpy as np

In [2]:
data_por = pd.read_csv("student-por.csv", sep = ';')

In [3]:
pd.set_option('display.max_columns', None)

### categorical transform

In [4]:
from sklearn.preprocessing import LabelEncoder

In [5]:
def categorical_transform(df):
    column_name = list(df.columns)
    transform_dic = {}
    for i in range(len(column_name)):
        labelencoder = LabelEncoder()
        if type(df[column_name[i]][1]) == str:
            df[column_name[i]] = labelencoder.fit_transform(df[column_name[i]])
            transform_dic[column_name[i]] = list(labelencoder.classes_)
    return df, transform_dic

In [6]:
data_por_2 = data_por.copy()
data_por_2, categorical_dic = categorical_transform(data_por_2)

In [7]:
data_por_2.head()

Unnamed: 0,school,sex,age,address,famsize,Pstatus,Medu,Fedu,Mjob,Fjob,reason,guardian,traveltime,studytime,failures,schoolsup,famsup,paid,activities,nursery,higher,internet,romantic,famrel,freetime,goout,Dalc,Walc,health,absences,G1,G2,G3
0,0,0,18,1,0,0,4,4,0,4,0,1,2,2,0,1,0,0,0,1,1,0,0,4,3,4,1,1,3,4,0,11,11
1,0,0,17,1,0,1,1,1,0,2,0,0,1,2,0,0,1,0,0,0,1,1,0,5,3,3,1,1,3,2,9,11,11
2,0,0,15,1,1,1,1,1,0,2,2,1,1,2,0,1,0,0,0,1,1,1,0,4,3,2,2,3,3,6,12,13,12
3,0,0,15,1,0,1,4,2,1,3,1,1,1,3,0,0,1,0,1,1,1,1,1,3,2,2,1,1,5,0,14,14,14
4,0,0,16,1,0,1,3,3,2,2,1,0,1,2,0,0,1,0,0,1,1,0,0,4,3,2,1,2,5,0,11,13,13


### pass or fail(based on result*60%)

In [8]:
def result_to_pass_fail(data, total_mark, pass_percentage):
    pass_fail = {}
    if type(data) == pd.DataFrame:
        for i in range(len(data)):
            result_sum = sum(data.iloc[i])
            if result_sum >= total_mark*pass_percentage:
                pass_fail[i] = "1"
            else:
                pass_fail[i] = "0"
    else:
        for i in range(len(data)):
            if data[i] >=  total_mark*pass_percentage:
                pass_fail[i] = "1"
            else:
                pass_fail[i] = "0"
    return pass_fail

In [9]:
total_result = data_por_2[["G1", "G2", "G3"]]
por_pass_fail = result_to_pass_fail(total_result, 60, 0.6)

In [10]:
data_por_2['pass_fail'] = por_pass_fail.values()

### run  model

In [11]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import cross_val_score

In [12]:
data_por_3 = data_por_2.copy()
x = data_por_3.drop(columns = ["G1", "G2", "G3", "pass_fail"])
y = data_por_3["pass_fail"]
train_x, test_x, train_y, test_y = train_test_split(x, y, train_size = 0.8, random_state = 1039)

In [13]:
rd_classifier = RandomForestClassifier()
rd_classifier.fit(train_x, train_y)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [14]:
pred_y = rd_classifier.predict(test_x)
print(classification_report(pred_y, test_y))
print(confusion_matrix(test_y, pred_y))

              precision    recall  f1-score   support

           0       0.79      0.74      0.76        72
           1       0.70      0.76      0.73        58

    accuracy                           0.75       130
   macro avg       0.74      0.75      0.74       130
weighted avg       0.75      0.75      0.75       130

[[53 14]
 [19 44]]


### based on random forest future importance

In [15]:
importances = rd_classifier.feature_importances_
forest_importances = pd.Series(importances, index=x.columns)
print(forest_importances)

school        0.033561
sex           0.021950
age           0.047102
address       0.027643
famsize       0.015131
Pstatus       0.010409
Medu          0.045969
Fedu          0.044137
Mjob          0.044858
Fjob          0.031945
reason        0.041383
guardian      0.023811
traveltime    0.031295
studytime     0.044737
failures      0.062260
schoolsup     0.014000
famsup        0.018335
paid          0.011040
activities    0.018790
nursery       0.012350
higher        0.037678
internet      0.016605
romantic      0.016465
famrel        0.037216
freetime      0.039527
goout         0.045109
Dalc          0.042804
Walc          0.048426
health        0.049522
absences      0.065943
dtype: float64


In [16]:
choosing_feature = []
feature_name = list(x.columns)
for i in range(len(forest_importances)):
    if forest_importances[i]>= 0.03:
        choosing_feature.append(feature_name[i])

In [17]:
data_por_4 = data_por_3[choosing_feature]
x_new = data_por_4
y_new = data_por_3["pass_fail"]
train_x_new, test_x_new, train_y_new, test_y_new = train_test_split(x_new, y_new, train_size = 0.8, random_state = 1039)

In [18]:
rd_new_classifier = RandomForestClassifier()
rd_new_classifier.fit(train_x_new, train_y_new)
pred_y_new = rd_new_classifier.predict(test_x_new)
print(classification_report(pred_y_new, test_y_new))
print(confusion_matrix(test_y_new, pred_y_new))

              precision    recall  f1-score   support

           0       0.78      0.70      0.74        74
           1       0.65      0.73      0.69        56

    accuracy                           0.72       130
   macro avg       0.71      0.72      0.71       130
weighted avg       0.72      0.72      0.72       130

[[52 15]
 [22 41]]


### onehot_encoding and result

In [99]:
data_por_3 = data_por.copy()
data_pre = data_por_3[['Mjob','Fjob','reason']]

In [100]:
data_onehot = pd.DataFrame(pd.get_dummies(data_pre)) 
data_por_3 = data_por_3.drop(columns = ['Mjob','Fjob','reason'])
data_por_3 = pd.concat([data_por_3,data_onehot], axis = 1)

In [101]:
data_por_3

Unnamed: 0,school,sex,age,address,famsize,Pstatus,Medu,Fedu,guardian,traveltime,studytime,failures,schoolsup,famsup,paid,activities,nursery,higher,internet,romantic,famrel,freetime,goout,Dalc,Walc,health,absences,G1,G2,G3,Mjob_at_home,Mjob_health,Mjob_other,Mjob_services,Mjob_teacher,Fjob_at_home,Fjob_health,Fjob_other,Fjob_services,Fjob_teacher,reason_course,reason_home,reason_other,reason_reputation
0,GP,F,18,U,GT3,A,4,4,mother,2,2,0,yes,no,no,no,yes,yes,no,no,4,3,4,1,1,3,4,0,11,11,1,0,0,0,0,0,0,0,0,1,1,0,0,0
1,GP,F,17,U,GT3,T,1,1,father,1,2,0,no,yes,no,no,no,yes,yes,no,5,3,3,1,1,3,2,9,11,11,1,0,0,0,0,0,0,1,0,0,1,0,0,0
2,GP,F,15,U,LE3,T,1,1,mother,1,2,0,yes,no,no,no,yes,yes,yes,no,4,3,2,2,3,3,6,12,13,12,1,0,0,0,0,0,0,1,0,0,0,0,1,0
3,GP,F,15,U,GT3,T,4,2,mother,1,3,0,no,yes,no,yes,yes,yes,yes,yes,3,2,2,1,1,5,0,14,14,14,0,1,0,0,0,0,0,0,1,0,0,1,0,0
4,GP,F,16,U,GT3,T,3,3,father,1,2,0,no,yes,no,no,yes,yes,no,no,4,3,2,1,2,5,0,11,13,13,0,0,1,0,0,0,0,1,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
644,MS,F,19,R,GT3,T,2,3,mother,1,3,1,no,no,no,yes,no,yes,yes,no,5,4,2,1,2,5,4,10,11,10,0,0,0,1,0,0,0,1,0,0,1,0,0,0
645,MS,F,18,U,LE3,T,3,1,mother,1,2,0,no,yes,no,no,yes,yes,yes,no,4,3,4,1,1,1,4,15,15,16,0,0,0,0,1,0,0,0,1,0,1,0,0,0
646,MS,F,18,U,GT3,T,1,1,mother,2,2,0,no,no,no,yes,yes,yes,no,no,1,1,1,1,1,5,6,11,12,9,0,0,1,0,0,0,0,1,0,0,1,0,0,0
647,MS,M,17,U,LE3,T,3,1,mother,2,1,0,no,no,no,no,no,yes,yes,no,2,4,5,3,4,2,6,10,10,10,0,0,0,1,0,0,0,0,1,0,1,0,0,0


In [102]:
data_por_3, a = categorical_transform(data_por_3)
total_result = data_por_3[["G1", "G2", "G3"]]
por_pass_fail = result_to_pass_fail(total_result, 60, 0.6)
data_por_3['pass_fail'] = por_pass_fail.values()

In [103]:
x = data_por_3.drop(columns = ["G1", "G2", "G3", "pass_fail"])
y = data_por_3["pass_fail"]
train_x, test_x, train_y, test_y = train_test_split(x, y, train_size = 0.8, random_state = 1039)

In [104]:
rd_classifier = RandomForestClassifier()
rd_classifier.fit(train_x, train_y)
pred_y = rd_classifier.predict(test_x)
print(classification_report(pred_y, test_y))
print(confusion_matrix(test_y, pred_y))

              precision    recall  f1-score   support

           0       0.79      0.72      0.75        74
           1       0.67      0.75      0.71        56

    accuracy                           0.73       130
   macro avg       0.73      0.73      0.73       130
weighted avg       0.74      0.73      0.73       130

[[53 14]
 [21 42]]


In [111]:
scores = cross_val_score(rd_classifier, x, y, cv=5)
print(scores)
print(np.mean(scores))

[0.64615385 0.73076923 0.72307692 0.67692308 0.53488372]
0.6623613595706619


In [28]:
importances = rd_classifier.feature_importances_
forest_importances = pd.Series(importances, index=x.columns)
print(forest_importances)

school               0.030950
sex                  0.023112
age                  0.044733
address              0.025810
famsize              0.014905
Pstatus              0.012136
Medu                 0.045041
Fedu                 0.037180
guardian             0.022372
traveltime           0.027433
studytime            0.039495
failures             0.052200
schoolsup            0.013177
famsup               0.018754
paid                 0.009093
activities           0.019609
nursery              0.013427
higher               0.041006
internet             0.016819
romantic             0.015594
famrel               0.037996
freetime             0.036466
goout                0.041308
Dalc                 0.038392
Walc                 0.047082
health               0.047185
absences             0.061723
Mjob_at_home         0.016134
Mjob_health          0.006814
Mjob_other           0.014261
Mjob_services        0.013454
Mjob_teacher         0.009702
Fjob_at_home         0.007496
Fjob_healt

### drop G2 G3 = 0 and its estimation result

In [91]:
data_por_4 = data_por.copy()

In [92]:
result_0 = data_por_4[data_por_4["G3"]==0].index
data_por_4 = data_por_4.drop(index = result_0)

In [93]:
data_pre = data_por_4[['Mjob','Fjob','reason']]

In [94]:
data_onehot = pd.DataFrame(pd.get_dummies(data_pre)) 
data_por_4 = data_por_4.drop(columns = ['Mjob','Fjob','reason'])
data_por_4 = pd.concat([data_por_4,data_onehot], axis = 1)

In [95]:
data_por_4, a = categorical_transform(data_por_4)
total_result = data_por_4[["G1", "G2", "G3"]]
por_pass_fail = result_to_pass_fail(total_result, 60, 0.6)
data_por_4['pass_fail'] = por_pass_fail.values()

In [96]:
x = data_por_4.drop(columns = ["G1", "G2", "G3", "pass_fail"])
y = data_por_4["pass_fail"]
train_x, test_x, train_y, test_y = train_test_split(x, y, train_size = 0.8, random_state = 1039)

In [97]:
rd_classifier = RandomForestClassifier()
rd_classifier.fit(train_x, train_y)
pred_y = rd_classifier.predict(test_x)
print(classification_report(pred_y, test_y))
print(confusion_matrix(test_y, pred_y))

              precision    recall  f1-score   support

           0       0.74      0.77      0.75        60
           1       0.78      0.76      0.77        67

    accuracy                           0.76       127
   macro avg       0.76      0.76      0.76       127
weighted avg       0.76      0.76      0.76       127

[[46 16]
 [14 51]]


In [98]:
scores = cross_val_score(rd_classifier, x, y, cv=5)
print(scores)
print(np.mean(scores))

[0.62204724 0.70866142 0.71653543 0.65354331 0.55555556]
0.6512685914260716


In [88]:
importances = rd_classifier.feature_importances_
forest_importances = pd.Series(importances, index=x.columns)
print(forest_importances)

school               0.026483
sex                  0.023906
age                  0.044026
address              0.020090
famsize              0.015484
Pstatus              0.012299
Medu                 0.046405
Fedu                 0.043612
guardian             0.018766
traveltime           0.027426
studytime            0.047346
failures             0.063722
schoolsup            0.015227
famsup               0.016144
paid                 0.009613
activities           0.019194
nursery              0.014404
higher               0.030898
internet             0.016999
romantic             0.016640
famrel               0.032678
freetime             0.037637
goout                0.037845
Dalc                 0.034117
Walc                 0.046688
health               0.049462
absences             0.063687
Mjob_at_home         0.020591
Mjob_health          0.007927
Mjob_other           0.013953
Mjob_services        0.012277
Mjob_teacher         0.009943
Fjob_at_home         0.006667
Fjob_healt

### catboost model

In [29]:
from catboost import CatBoostClassifier, Pool

In [30]:
train_pool = Pool(data = train_x, label = train_y)
clf = CatBoostClassifier(learning_rate = 0.01)
clf.fit(train_pool)

0:	learn: 0.6904282	total: 61ms	remaining: 1m
1:	learn: 0.6878889	total: 64.4ms	remaining: 32.1s
2:	learn: 0.6855097	total: 67.7ms	remaining: 22.5s
3:	learn: 0.6822365	total: 71ms	remaining: 17.7s
4:	learn: 0.6798539	total: 76.9ms	remaining: 15.3s
5:	learn: 0.6767681	total: 81.9ms	remaining: 13.6s
6:	learn: 0.6740264	total: 85.1ms	remaining: 12.1s
7:	learn: 0.6712006	total: 90.5ms	remaining: 11.2s
8:	learn: 0.6688729	total: 94.5ms	remaining: 10.4s
9:	learn: 0.6659267	total: 99.4ms	remaining: 9.84s
10:	learn: 0.6637157	total: 102ms	remaining: 9.21s
11:	learn: 0.6614146	total: 106ms	remaining: 8.72s
12:	learn: 0.6589030	total: 109ms	remaining: 8.29s
13:	learn: 0.6566371	total: 112ms	remaining: 7.89s
14:	learn: 0.6542336	total: 116ms	remaining: 7.59s
15:	learn: 0.6517810	total: 119ms	remaining: 7.33s
16:	learn: 0.6493349	total: 128ms	remaining: 7.37s
17:	learn: 0.6467922	total: 132ms	remaining: 7.21s
18:	learn: 0.6445657	total: 135ms	remaining: 6.98s
19:	learn: 0.6421758	total: 139ms	rema

177:	learn: 0.4548247	total: 867ms	remaining: 4s
178:	learn: 0.4543017	total: 881ms	remaining: 4.04s
179:	learn: 0.4536811	total: 885ms	remaining: 4.03s
180:	learn: 0.4531996	total: 893ms	remaining: 4.04s
181:	learn: 0.4521597	total: 897ms	remaining: 4.03s
182:	learn: 0.4514817	total: 901ms	remaining: 4.02s
183:	learn: 0.4509428	total: 913ms	remaining: 4.05s
184:	learn: 0.4501024	total: 923ms	remaining: 4.07s
185:	learn: 0.4496580	total: 931ms	remaining: 4.08s
186:	learn: 0.4487961	total: 936ms	remaining: 4.07s
187:	learn: 0.4482584	total: 944ms	remaining: 4.08s
188:	learn: 0.4476434	total: 947ms	remaining: 4.06s
189:	learn: 0.4468497	total: 951ms	remaining: 4.05s
190:	learn: 0.4463075	total: 956ms	remaining: 4.05s
191:	learn: 0.4457740	total: 960ms	remaining: 4.04s
192:	learn: 0.4451716	total: 965ms	remaining: 4.03s
193:	learn: 0.4444273	total: 978ms	remaining: 4.07s
194:	learn: 0.4441331	total: 982ms	remaining: 4.05s
195:	learn: 0.4428151	total: 988ms	remaining: 4.05s
196:	learn: 0.4

379:	learn: 0.3434296	total: 1.66s	remaining: 2.71s
380:	learn: 0.3430351	total: 1.67s	remaining: 2.71s
381:	learn: 0.3426785	total: 1.67s	remaining: 2.71s
382:	learn: 0.3424118	total: 1.68s	remaining: 2.7s
383:	learn: 0.3423212	total: 1.68s	remaining: 2.69s
384:	learn: 0.3417959	total: 1.68s	remaining: 2.69s
385:	learn: 0.3414151	total: 1.69s	remaining: 2.69s
386:	learn: 0.3408657	total: 1.69s	remaining: 2.68s
387:	learn: 0.3406393	total: 1.7s	remaining: 2.67s
388:	learn: 0.3403920	total: 1.7s	remaining: 2.67s
389:	learn: 0.3399764	total: 1.7s	remaining: 2.66s
390:	learn: 0.3395294	total: 1.71s	remaining: 2.67s
391:	learn: 0.3393309	total: 1.72s	remaining: 2.66s
392:	learn: 0.3388706	total: 1.72s	remaining: 2.65s
393:	learn: 0.3385685	total: 1.72s	remaining: 2.65s
394:	learn: 0.3381974	total: 1.73s	remaining: 2.64s
395:	learn: 0.3376938	total: 1.73s	remaining: 2.64s
396:	learn: 0.3372254	total: 1.74s	remaining: 2.64s
397:	learn: 0.3370495	total: 1.75s	remaining: 2.65s
398:	learn: 0.33

548:	learn: 0.2789697	total: 2.33s	remaining: 1.91s
549:	learn: 0.2786454	total: 2.34s	remaining: 1.91s
550:	learn: 0.2783437	total: 2.34s	remaining: 1.91s
551:	learn: 0.2779287	total: 2.34s	remaining: 1.9s
552:	learn: 0.2776127	total: 2.35s	remaining: 1.9s
553:	learn: 0.2772741	total: 2.35s	remaining: 1.89s
554:	learn: 0.2770366	total: 2.35s	remaining: 1.89s
555:	learn: 0.2765320	total: 2.35s	remaining: 1.88s
556:	learn: 0.2761639	total: 2.36s	remaining: 1.88s
557:	learn: 0.2759489	total: 2.36s	remaining: 1.87s
558:	learn: 0.2757476	total: 2.37s	remaining: 1.87s
559:	learn: 0.2751207	total: 2.37s	remaining: 1.86s
560:	learn: 0.2746514	total: 2.37s	remaining: 1.86s
561:	learn: 0.2743982	total: 2.38s	remaining: 1.85s
562:	learn: 0.2740721	total: 2.38s	remaining: 1.84s
563:	learn: 0.2737481	total: 2.38s	remaining: 1.84s
564:	learn: 0.2733176	total: 2.38s	remaining: 1.83s
565:	learn: 0.2729742	total: 2.38s	remaining: 1.83s
566:	learn: 0.2725002	total: 2.39s	remaining: 1.82s
567:	learn: 0.

728:	learn: 0.2179138	total: 2.95s	remaining: 1.1s
729:	learn: 0.2176767	total: 2.95s	remaining: 1.09s
730:	learn: 0.2172713	total: 2.96s	remaining: 1.09s
731:	learn: 0.2169885	total: 2.96s	remaining: 1.08s
732:	learn: 0.2165835	total: 2.96s	remaining: 1.08s
733:	learn: 0.2163438	total: 2.97s	remaining: 1.07s
734:	learn: 0.2158005	total: 2.97s	remaining: 1.07s
735:	learn: 0.2154811	total: 2.97s	remaining: 1.07s
736:	learn: 0.2152930	total: 2.98s	remaining: 1.06s
737:	learn: 0.2150729	total: 2.98s	remaining: 1.06s
738:	learn: 0.2146453	total: 2.98s	remaining: 1.05s
739:	learn: 0.2144741	total: 2.98s	remaining: 1.05s
740:	learn: 0.2143054	total: 2.99s	remaining: 1.04s
741:	learn: 0.2141040	total: 2.99s	remaining: 1.04s
742:	learn: 0.2137964	total: 2.99s	remaining: 1.03s
743:	learn: 0.2136090	total: 3s	remaining: 1.03s
744:	learn: 0.2133228	total: 3s	remaining: 1.03s
745:	learn: 0.2130971	total: 3s	remaining: 1.02s
746:	learn: 0.2126411	total: 3.01s	remaining: 1.02s
747:	learn: 0.2123250	

917:	learn: 0.1667908	total: 3.59s	remaining: 321ms
918:	learn: 0.1664808	total: 3.59s	remaining: 317ms
919:	learn: 0.1662889	total: 3.6s	remaining: 313ms
920:	learn: 0.1661302	total: 3.6s	remaining: 309ms
921:	learn: 0.1659534	total: 3.6s	remaining: 305ms
922:	learn: 0.1656772	total: 3.61s	remaining: 301ms
923:	learn: 0.1653711	total: 3.61s	remaining: 297ms
924:	learn: 0.1651532	total: 3.61s	remaining: 293ms
925:	learn: 0.1650368	total: 3.62s	remaining: 289ms
926:	learn: 0.1649345	total: 3.62s	remaining: 285ms
927:	learn: 0.1647783	total: 3.63s	remaining: 282ms
928:	learn: 0.1644705	total: 3.63s	remaining: 278ms
929:	learn: 0.1641303	total: 3.63s	remaining: 274ms
930:	learn: 0.1638609	total: 3.64s	remaining: 270ms
931:	learn: 0.1637641	total: 3.64s	remaining: 266ms
932:	learn: 0.1634546	total: 3.64s	remaining: 262ms
933:	learn: 0.1632425	total: 3.65s	remaining: 258ms
934:	learn: 0.1631027	total: 3.65s	remaining: 254ms
935:	learn: 0.1628115	total: 3.65s	remaining: 250ms
936:	learn: 0.1

<catboost.core.CatBoostClassifier at 0x2889ee0e278>

In [31]:
pred_y = clf.predict(test_x)
print(classification_report(pred_y, test_y))
print(confusion_matrix(test_y, pred_y))

              precision    recall  f1-score   support

           0       0.72      0.71      0.71        68
           1       0.68      0.69      0.69        62

    accuracy                           0.70       130
   macro avg       0.70      0.70      0.70       130
weighted avg       0.70      0.70      0.70       130

[[48 19]
 [20 43]]


In [32]:
scores = cross_val_score(clf, x, y, cv=5)
print(scores)
print(np.mean(scores))

0:	learn: 0.6894961	total: 4.58ms	remaining: 4.57s
1:	learn: 0.6859476	total: 8.15ms	remaining: 4.07s
2:	learn: 0.6826059	total: 11.2ms	remaining: 3.73s
3:	learn: 0.6788276	total: 15.4ms	remaining: 3.83s
4:	learn: 0.6758048	total: 18.3ms	remaining: 3.65s
5:	learn: 0.6718788	total: 21.7ms	remaining: 3.6s
6:	learn: 0.6682069	total: 26.2ms	remaining: 3.71s
7:	learn: 0.6652682	total: 34.8ms	remaining: 4.32s
8:	learn: 0.6624577	total: 37.7ms	remaining: 4.15s
9:	learn: 0.6592796	total: 41.2ms	remaining: 4.07s
10:	learn: 0.6559642	total: 44.3ms	remaining: 3.98s
11:	learn: 0.6531803	total: 48.3ms	remaining: 3.98s
12:	learn: 0.6500736	total: 52.1ms	remaining: 3.96s
13:	learn: 0.6475582	total: 55.1ms	remaining: 3.88s
14:	learn: 0.6447779	total: 58.3ms	remaining: 3.83s
15:	learn: 0.6421362	total: 61.3ms	remaining: 3.77s
16:	learn: 0.6387414	total: 64.1ms	remaining: 3.71s
17:	learn: 0.6356697	total: 67.7ms	remaining: 3.69s
18:	learn: 0.6324366	total: 70.4ms	remaining: 3.63s
19:	learn: 0.6296515	to

183:	learn: 0.4235817	total: 650ms	remaining: 2.88s
184:	learn: 0.4228657	total: 653ms	remaining: 2.88s
185:	learn: 0.4219820	total: 657ms	remaining: 2.88s
186:	learn: 0.4215364	total: 660ms	remaining: 2.87s
187:	learn: 0.4203387	total: 663ms	remaining: 2.86s
188:	learn: 0.4195463	total: 666ms	remaining: 2.86s
189:	learn: 0.4185916	total: 669ms	remaining: 2.85s
190:	learn: 0.4176267	total: 673ms	remaining: 2.85s
191:	learn: 0.4169077	total: 676ms	remaining: 2.84s
192:	learn: 0.4162189	total: 678ms	remaining: 2.84s
193:	learn: 0.4155096	total: 681ms	remaining: 2.83s
194:	learn: 0.4148861	total: 685ms	remaining: 2.83s
195:	learn: 0.4144151	total: 687ms	remaining: 2.82s
196:	learn: 0.4131972	total: 690ms	remaining: 2.81s
197:	learn: 0.4123917	total: 693ms	remaining: 2.81s
198:	learn: 0.4118201	total: 701ms	remaining: 2.82s
199:	learn: 0.4111902	total: 703ms	remaining: 2.81s
200:	learn: 0.4106210	total: 706ms	remaining: 2.81s
201:	learn: 0.4100061	total: 709ms	remaining: 2.8s
202:	learn: 0

351:	learn: 0.3310356	total: 1.3s	remaining: 2.39s
352:	learn: 0.3306536	total: 1.3s	remaining: 2.39s
353:	learn: 0.3302461	total: 1.31s	remaining: 2.39s
354:	learn: 0.3298786	total: 1.31s	remaining: 2.39s
355:	learn: 0.3294340	total: 1.32s	remaining: 2.38s
356:	learn: 0.3292434	total: 1.32s	remaining: 2.38s
357:	learn: 0.3288486	total: 1.32s	remaining: 2.37s
358:	learn: 0.3284518	total: 1.33s	remaining: 2.37s
359:	learn: 0.3278968	total: 1.33s	remaining: 2.37s
360:	learn: 0.3274558	total: 1.34s	remaining: 2.37s
361:	learn: 0.3270620	total: 1.34s	remaining: 2.37s
362:	learn: 0.3267391	total: 1.35s	remaining: 2.37s
363:	learn: 0.3263148	total: 1.35s	remaining: 2.37s
364:	learn: 0.3259081	total: 1.36s	remaining: 2.36s
365:	learn: 0.3253664	total: 1.37s	remaining: 2.37s
366:	learn: 0.3246801	total: 1.37s	remaining: 2.36s
367:	learn: 0.3244989	total: 1.37s	remaining: 2.36s
368:	learn: 0.3240207	total: 1.38s	remaining: 2.36s
369:	learn: 0.3237165	total: 1.39s	remaining: 2.37s
370:	learn: 0.

521:	learn: 0.2680618	total: 1.92s	remaining: 1.76s
522:	learn: 0.2675212	total: 1.93s	remaining: 1.76s
523:	learn: 0.2671815	total: 1.93s	remaining: 1.75s
524:	learn: 0.2669255	total: 1.93s	remaining: 1.75s
525:	learn: 0.2664187	total: 1.94s	remaining: 1.75s
526:	learn: 0.2661993	total: 1.95s	remaining: 1.75s
527:	learn: 0.2658343	total: 1.95s	remaining: 1.74s
528:	learn: 0.2654501	total: 1.95s	remaining: 1.74s
529:	learn: 0.2652326	total: 1.96s	remaining: 1.73s
530:	learn: 0.2649182	total: 1.96s	remaining: 1.73s
531:	learn: 0.2645763	total: 1.96s	remaining: 1.72s
532:	learn: 0.2642634	total: 1.96s	remaining: 1.72s
533:	learn: 0.2640001	total: 1.97s	remaining: 1.72s
534:	learn: 0.2633186	total: 1.97s	remaining: 1.72s
535:	learn: 0.2628149	total: 1.98s	remaining: 1.71s
536:	learn: 0.2624849	total: 1.98s	remaining: 1.71s
537:	learn: 0.2621491	total: 1.98s	remaining: 1.7s
538:	learn: 0.2618052	total: 1.99s	remaining: 1.7s
539:	learn: 0.2614907	total: 1.99s	remaining: 1.69s
540:	learn: 0.

717:	learn: 0.2035743	total: 2.56s	remaining: 1s
718:	learn: 0.2034242	total: 2.56s	remaining: 1s
719:	learn: 0.2032288	total: 2.56s	remaining: 997ms
720:	learn: 0.2028487	total: 2.57s	remaining: 993ms
721:	learn: 0.2025497	total: 2.57s	remaining: 990ms
722:	learn: 0.2021703	total: 2.57s	remaining: 986ms
723:	learn: 0.2018816	total: 2.58s	remaining: 982ms
724:	learn: 0.2017095	total: 2.58s	remaining: 978ms
725:	learn: 0.2013548	total: 2.58s	remaining: 975ms
726:	learn: 0.2011728	total: 2.59s	remaining: 971ms
727:	learn: 0.2008090	total: 2.59s	remaining: 968ms
728:	learn: 0.2005060	total: 2.59s	remaining: 964ms
729:	learn: 0.2001292	total: 2.6s	remaining: 960ms
730:	learn: 0.1997851	total: 2.6s	remaining: 957ms
731:	learn: 0.1995138	total: 2.6s	remaining: 953ms
732:	learn: 0.1993279	total: 2.61s	remaining: 952ms
733:	learn: 0.1989149	total: 2.62s	remaining: 949ms
734:	learn: 0.1985186	total: 2.62s	remaining: 945ms
735:	learn: 0.1980858	total: 2.63s	remaining: 942ms
736:	learn: 0.1978442

893:	learn: 0.1602054	total: 3.2s	remaining: 380ms
894:	learn: 0.1599452	total: 3.21s	remaining: 376ms
895:	learn: 0.1596840	total: 3.21s	remaining: 373ms
896:	learn: 0.1595449	total: 3.21s	remaining: 369ms
897:	learn: 0.1592674	total: 3.22s	remaining: 365ms
898:	learn: 0.1591058	total: 3.22s	remaining: 362ms
899:	learn: 0.1588530	total: 3.22s	remaining: 358ms
900:	learn: 0.1587265	total: 3.23s	remaining: 355ms
901:	learn: 0.1584814	total: 3.23s	remaining: 351ms
902:	learn: 0.1583280	total: 3.23s	remaining: 348ms
903:	learn: 0.1581787	total: 3.24s	remaining: 344ms
904:	learn: 0.1578368	total: 3.24s	remaining: 340ms
905:	learn: 0.1576557	total: 3.25s	remaining: 337ms
906:	learn: 0.1575359	total: 3.25s	remaining: 333ms
907:	learn: 0.1573399	total: 3.25s	remaining: 329ms
908:	learn: 0.1571331	total: 3.25s	remaining: 326ms
909:	learn: 0.1569480	total: 3.26s	remaining: 322ms
910:	learn: 0.1567005	total: 3.26s	remaining: 318ms
911:	learn: 0.1566489	total: 3.26s	remaining: 315ms
912:	learn: 0

70:	learn: 0.5464262	total: 319ms	remaining: 4.18s
71:	learn: 0.5451109	total: 322ms	remaining: 4.15s
72:	learn: 0.5436026	total: 325ms	remaining: 4.13s
73:	learn: 0.5425064	total: 334ms	remaining: 4.17s
74:	learn: 0.5415171	total: 341ms	remaining: 4.2s
75:	learn: 0.5402676	total: 344ms	remaining: 4.18s
76:	learn: 0.5390290	total: 347ms	remaining: 4.16s
77:	learn: 0.5380092	total: 350ms	remaining: 4.13s
78:	learn: 0.5371818	total: 352ms	remaining: 4.11s
79:	learn: 0.5357563	total: 355ms	remaining: 4.08s
80:	learn: 0.5345509	total: 358ms	remaining: 4.07s
81:	learn: 0.5333661	total: 361ms	remaining: 4.04s
82:	learn: 0.5320194	total: 363ms	remaining: 4.01s
83:	learn: 0.5306651	total: 366ms	remaining: 3.99s
84:	learn: 0.5294165	total: 369ms	remaining: 3.97s
85:	learn: 0.5281383	total: 372ms	remaining: 3.95s
86:	learn: 0.5268226	total: 375ms	remaining: 3.93s
87:	learn: 0.5256090	total: 377ms	remaining: 3.91s
88:	learn: 0.5245988	total: 380ms	remaining: 3.89s
89:	learn: 0.5234243	total: 383m

254:	learn: 0.3913743	total: 1s	remaining: 2.93s
255:	learn: 0.3909066	total: 1.01s	remaining: 2.94s
256:	learn: 0.3899667	total: 1.02s	remaining: 2.96s
257:	learn: 0.3893723	total: 1.03s	remaining: 2.96s
258:	learn: 0.3886323	total: 1.05s	remaining: 3s
259:	learn: 0.3881712	total: 1.06s	remaining: 3.02s
260:	learn: 0.3878083	total: 1.07s	remaining: 3.03s
261:	learn: 0.3872276	total: 1.08s	remaining: 3.04s
262:	learn: 0.3867914	total: 1.08s	remaining: 3.04s
263:	learn: 0.3861648	total: 1.1s	remaining: 3.06s
264:	learn: 0.3857157	total: 1.11s	remaining: 3.1s
265:	learn: 0.3852731	total: 1.12s	remaining: 3.1s
266:	learn: 0.3848090	total: 1.14s	remaining: 3.12s
267:	learn: 0.3842884	total: 1.15s	remaining: 3.13s
268:	learn: 0.3832269	total: 1.15s	remaining: 3.14s
269:	learn: 0.3827769	total: 1.17s	remaining: 3.16s
270:	learn: 0.3820519	total: 1.18s	remaining: 3.17s
271:	learn: 0.3811716	total: 1.19s	remaining: 3.18s
272:	learn: 0.3806300	total: 1.2s	remaining: 3.18s
273:	learn: 0.3801801	

425:	learn: 0.3070473	total: 2.21s	remaining: 2.98s
426:	learn: 0.3067640	total: 2.22s	remaining: 2.98s
427:	learn: 0.3064019	total: 2.23s	remaining: 2.98s
428:	learn: 0.3060410	total: 2.23s	remaining: 2.97s
429:	learn: 0.3056461	total: 2.24s	remaining: 2.97s
430:	learn: 0.3055750	total: 2.24s	remaining: 2.96s
431:	learn: 0.3053991	total: 2.25s	remaining: 2.96s
432:	learn: 0.3046000	total: 2.25s	remaining: 2.95s
433:	learn: 0.3044232	total: 2.26s	remaining: 2.94s
434:	learn: 0.3039258	total: 2.26s	remaining: 2.94s
435:	learn: 0.3034152	total: 2.26s	remaining: 2.93s
436:	learn: 0.3030930	total: 2.27s	remaining: 2.92s
437:	learn: 0.3027864	total: 2.27s	remaining: 2.92s
438:	learn: 0.3024309	total: 2.28s	remaining: 2.91s
439:	learn: 0.3022081	total: 2.28s	remaining: 2.9s
440:	learn: 0.3015824	total: 2.35s	remaining: 2.97s
441:	learn: 0.3013453	total: 2.36s	remaining: 2.98s
442:	learn: 0.3009690	total: 2.37s	remaining: 2.98s
443:	learn: 0.3006734	total: 2.37s	remaining: 2.97s
444:	learn: 0

589:	learn: 0.2505974	total: 3.03s	remaining: 2.11s
590:	learn: 0.2502011	total: 3.04s	remaining: 2.1s
591:	learn: 0.2498650	total: 3.06s	remaining: 2.1s
592:	learn: 0.2496151	total: 3.07s	remaining: 2.1s
593:	learn: 0.2492321	total: 3.07s	remaining: 2.1s
594:	learn: 0.2487940	total: 3.08s	remaining: 2.09s
595:	learn: 0.2486167	total: 3.08s	remaining: 2.09s
596:	learn: 0.2481738	total: 3.08s	remaining: 2.08s
597:	learn: 0.2477078	total: 3.09s	remaining: 2.08s
598:	learn: 0.2474961	total: 3.1s	remaining: 2.08s
599:	learn: 0.2470302	total: 3.11s	remaining: 2.07s
600:	learn: 0.2468662	total: 3.11s	remaining: 2.06s
601:	learn: 0.2467235	total: 3.12s	remaining: 2.06s
602:	learn: 0.2465234	total: 3.13s	remaining: 2.06s
603:	learn: 0.2460601	total: 3.14s	remaining: 2.06s
604:	learn: 0.2458415	total: 3.14s	remaining: 2.05s
605:	learn: 0.2455010	total: 3.15s	remaining: 2.05s
606:	learn: 0.2451580	total: 3.16s	remaining: 2.05s
607:	learn: 0.2449913	total: 3.17s	remaining: 2.04s
608:	learn: 0.244

771:	learn: 0.1961710	total: 3.82s	remaining: 1.13s
772:	learn: 0.1960193	total: 3.83s	remaining: 1.12s
773:	learn: 0.1956410	total: 3.83s	remaining: 1.12s
774:	learn: 0.1954352	total: 3.83s	remaining: 1.11s
775:	learn: 0.1953021	total: 3.84s	remaining: 1.11s
776:	learn: 0.1951119	total: 3.84s	remaining: 1.1s
777:	learn: 0.1949615	total: 3.85s	remaining: 1.1s
778:	learn: 0.1946744	total: 3.85s	remaining: 1.09s
779:	learn: 0.1945260	total: 3.85s	remaining: 1.09s
780:	learn: 0.1943225	total: 3.86s	remaining: 1.08s
781:	learn: 0.1941130	total: 3.86s	remaining: 1.08s
782:	learn: 0.1938670	total: 3.87s	remaining: 1.07s
783:	learn: 0.1936937	total: 3.88s	remaining: 1.07s
784:	learn: 0.1935072	total: 3.88s	remaining: 1.06s
785:	learn: 0.1931333	total: 3.88s	remaining: 1.06s
786:	learn: 0.1928125	total: 3.89s	remaining: 1.05s
787:	learn: 0.1925601	total: 3.89s	remaining: 1.05s
788:	learn: 0.1921714	total: 3.9s	remaining: 1.04s
789:	learn: 0.1919387	total: 3.91s	remaining: 1.04s
790:	learn: 0.1

953:	learn: 0.1549774	total: 4.62s	remaining: 223ms
954:	learn: 0.1548332	total: 4.64s	remaining: 219ms
955:	learn: 0.1547373	total: 4.64s	remaining: 214ms
956:	learn: 0.1544257	total: 4.69s	remaining: 211ms
957:	learn: 0.1541013	total: 4.69s	remaining: 206ms
958:	learn: 0.1538388	total: 4.7s	remaining: 201ms
959:	learn: 0.1535900	total: 4.7s	remaining: 196ms
960:	learn: 0.1533624	total: 4.71s	remaining: 191ms
961:	learn: 0.1532756	total: 4.72s	remaining: 186ms
962:	learn: 0.1531684	total: 4.72s	remaining: 181ms
963:	learn: 0.1531275	total: 4.72s	remaining: 176ms
964:	learn: 0.1529618	total: 4.73s	remaining: 172ms
965:	learn: 0.1528573	total: 4.74s	remaining: 167ms
966:	learn: 0.1527831	total: 4.74s	remaining: 162ms
967:	learn: 0.1526456	total: 4.74s	remaining: 157ms
968:	learn: 0.1524374	total: 4.75s	remaining: 152ms
969:	learn: 0.1522463	total: 4.75s	remaining: 147ms
970:	learn: 0.1520341	total: 4.75s	remaining: 142ms
971:	learn: 0.1517728	total: 4.76s	remaining: 137ms
972:	learn: 0.

158:	learn: 0.4623976	total: 476ms	remaining: 2.52s
159:	learn: 0.4617698	total: 479ms	remaining: 2.52s
160:	learn: 0.4607161	total: 483ms	remaining: 2.52s
161:	learn: 0.4596312	total: 486ms	remaining: 2.51s
162:	learn: 0.4588696	total: 490ms	remaining: 2.51s
163:	learn: 0.4581978	total: 493ms	remaining: 2.51s
164:	learn: 0.4575669	total: 496ms	remaining: 2.51s
165:	learn: 0.4567562	total: 501ms	remaining: 2.52s
166:	learn: 0.4562421	total: 504ms	remaining: 2.51s
167:	learn: 0.4551792	total: 507ms	remaining: 2.51s
168:	learn: 0.4546622	total: 509ms	remaining: 2.5s
169:	learn: 0.4536261	total: 512ms	remaining: 2.5s
170:	learn: 0.4530755	total: 517ms	remaining: 2.51s
171:	learn: 0.4520800	total: 520ms	remaining: 2.5s
172:	learn: 0.4511858	total: 523ms	remaining: 2.5s
173:	learn: 0.4505412	total: 526ms	remaining: 2.5s
174:	learn: 0.4497972	total: 528ms	remaining: 2.49s
175:	learn: 0.4490509	total: 532ms	remaining: 2.49s
176:	learn: 0.4482163	total: 534ms	remaining: 2.48s
177:	learn: 0.447

357:	learn: 0.3446430	total: 1.12s	remaining: 2s
358:	learn: 0.3442665	total: 1.12s	remaining: 2s
359:	learn: 0.3437606	total: 1.12s	remaining: 2s
360:	learn: 0.3430370	total: 1.13s	remaining: 1.99s
361:	learn: 0.3425644	total: 1.13s	remaining: 1.99s
362:	learn: 0.3421119	total: 1.13s	remaining: 1.99s
363:	learn: 0.3417162	total: 1.14s	remaining: 1.99s
364:	learn: 0.3413896	total: 1.14s	remaining: 1.98s
365:	learn: 0.3410703	total: 1.14s	remaining: 1.98s
366:	learn: 0.3406705	total: 1.15s	remaining: 1.98s
367:	learn: 0.3403498	total: 1.15s	remaining: 1.97s
368:	learn: 0.3399845	total: 1.15s	remaining: 1.97s
369:	learn: 0.3397299	total: 1.16s	remaining: 1.97s
370:	learn: 0.3390937	total: 1.16s	remaining: 1.97s
371:	learn: 0.3387863	total: 1.16s	remaining: 1.97s
372:	learn: 0.3384592	total: 1.17s	remaining: 1.96s
373:	learn: 0.3381085	total: 1.17s	remaining: 1.96s
374:	learn: 0.3374736	total: 1.18s	remaining: 1.96s
375:	learn: 0.3368537	total: 1.18s	remaining: 1.96s
376:	learn: 0.3364696

554:	learn: 0.2708239	total: 1.75s	remaining: 1.4s
555:	learn: 0.2704632	total: 1.75s	remaining: 1.4s
556:	learn: 0.2700548	total: 1.75s	remaining: 1.39s
557:	learn: 0.2695608	total: 1.75s	remaining: 1.39s
558:	learn: 0.2691074	total: 1.76s	remaining: 1.39s
559:	learn: 0.2686010	total: 1.76s	remaining: 1.38s
560:	learn: 0.2683700	total: 1.76s	remaining: 1.38s
561:	learn: 0.2680151	total: 1.77s	remaining: 1.38s
562:	learn: 0.2678640	total: 1.77s	remaining: 1.38s
563:	learn: 0.2675144	total: 1.77s	remaining: 1.37s
564:	learn: 0.2670867	total: 1.78s	remaining: 1.37s
565:	learn: 0.2667082	total: 1.78s	remaining: 1.36s
566:	learn: 0.2664154	total: 1.78s	remaining: 1.36s
567:	learn: 0.2663136	total: 1.78s	remaining: 1.36s
568:	learn: 0.2658787	total: 1.79s	remaining: 1.35s
569:	learn: 0.2657683	total: 1.79s	remaining: 1.35s
570:	learn: 0.2656917	total: 1.79s	remaining: 1.35s
571:	learn: 0.2652895	total: 1.79s	remaining: 1.34s
572:	learn: 0.2648968	total: 1.8s	remaining: 1.34s
573:	learn: 0.2

753:	learn: 0.2049154	total: 2.39s	remaining: 779ms
754:	learn: 0.2046800	total: 2.39s	remaining: 776ms
755:	learn: 0.2045492	total: 2.39s	remaining: 772ms
756:	learn: 0.2042528	total: 2.4s	remaining: 769ms
757:	learn: 0.2039306	total: 2.4s	remaining: 766ms
758:	learn: 0.2037193	total: 2.4s	remaining: 763ms
759:	learn: 0.2032755	total: 2.4s	remaining: 760ms
760:	learn: 0.2031075	total: 2.41s	remaining: 757ms
761:	learn: 0.2027426	total: 2.41s	remaining: 754ms
762:	learn: 0.2025933	total: 2.42s	remaining: 750ms
763:	learn: 0.2024523	total: 2.42s	remaining: 747ms
764:	learn: 0.2023026	total: 2.42s	remaining: 743ms
765:	learn: 0.2020921	total: 2.42s	remaining: 740ms
766:	learn: 0.2018812	total: 2.42s	remaining: 737ms
767:	learn: 0.2017360	total: 2.43s	remaining: 734ms
768:	learn: 0.2014993	total: 2.43s	remaining: 730ms
769:	learn: 0.2011399	total: 2.43s	remaining: 727ms
770:	learn: 0.2008057	total: 2.44s	remaining: 724ms
771:	learn: 0.2004718	total: 2.44s	remaining: 720ms
772:	learn: 0.20

918:	learn: 0.1599317	total: 2.86s	remaining: 252ms
919:	learn: 0.1596861	total: 2.86s	remaining: 249ms
920:	learn: 0.1594375	total: 2.87s	remaining: 246ms
921:	learn: 0.1591647	total: 2.87s	remaining: 243ms
922:	learn: 0.1589123	total: 2.87s	remaining: 240ms
923:	learn: 0.1587345	total: 2.88s	remaining: 237ms
924:	learn: 0.1584575	total: 2.88s	remaining: 234ms
925:	learn: 0.1582582	total: 2.89s	remaining: 231ms
926:	learn: 0.1578243	total: 2.89s	remaining: 228ms
927:	learn: 0.1576728	total: 2.9s	remaining: 225ms
928:	learn: 0.1575240	total: 2.9s	remaining: 222ms
929:	learn: 0.1571038	total: 2.9s	remaining: 219ms
930:	learn: 0.1568739	total: 2.91s	remaining: 215ms
931:	learn: 0.1566280	total: 2.91s	remaining: 212ms
932:	learn: 0.1563896	total: 2.91s	remaining: 209ms
933:	learn: 0.1561475	total: 2.92s	remaining: 206ms
934:	learn: 0.1558283	total: 2.92s	remaining: 203ms
935:	learn: 0.1556026	total: 2.92s	remaining: 200ms
936:	learn: 0.1553950	total: 2.93s	remaining: 197ms
937:	learn: 0.1

94:	learn: 0.5215148	total: 313ms	remaining: 2.98s
95:	learn: 0.5206087	total: 316ms	remaining: 2.98s
96:	learn: 0.5198561	total: 320ms	remaining: 2.97s
97:	learn: 0.5188441	total: 322ms	remaining: 2.97s
98:	learn: 0.5175022	total: 325ms	remaining: 2.96s
99:	learn: 0.5169586	total: 329ms	remaining: 2.96s
100:	learn: 0.5159956	total: 332ms	remaining: 2.95s
101:	learn: 0.5148137	total: 335ms	remaining: 2.95s
102:	learn: 0.5133432	total: 338ms	remaining: 2.95s
103:	learn: 0.5120264	total: 342ms	remaining: 2.94s
104:	learn: 0.5109797	total: 346ms	remaining: 2.94s
105:	learn: 0.5098649	total: 348ms	remaining: 2.94s
106:	learn: 0.5091563	total: 352ms	remaining: 2.94s
107:	learn: 0.5081352	total: 355ms	remaining: 2.93s
108:	learn: 0.5072510	total: 358ms	remaining: 2.92s
109:	learn: 0.5064288	total: 361ms	remaining: 2.92s
110:	learn: 0.5055775	total: 363ms	remaining: 2.91s
111:	learn: 0.5048687	total: 366ms	remaining: 2.9s
112:	learn: 0.5040451	total: 369ms	remaining: 2.9s
113:	learn: 0.502720

294:	learn: 0.3811624	total: 955ms	remaining: 2.28s
295:	learn: 0.3809107	total: 961ms	remaining: 2.28s
296:	learn: 0.3801130	total: 964ms	remaining: 2.28s
297:	learn: 0.3798248	total: 971ms	remaining: 2.29s
298:	learn: 0.3791901	total: 974ms	remaining: 2.28s
299:	learn: 0.3788398	total: 978ms	remaining: 2.28s
300:	learn: 0.3784727	total: 982ms	remaining: 2.28s
301:	learn: 0.3779594	total: 987ms	remaining: 2.28s
302:	learn: 0.3774273	total: 993ms	remaining: 2.28s
303:	learn: 0.3770009	total: 996ms	remaining: 2.28s
304:	learn: 0.3764658	total: 1000ms	remaining: 2.28s
305:	learn: 0.3761792	total: 1s	remaining: 2.27s
306:	learn: 0.3756635	total: 1.01s	remaining: 2.27s
307:	learn: 0.3750748	total: 1.01s	remaining: 2.27s
308:	learn: 0.3743031	total: 1.01s	remaining: 2.27s
309:	learn: 0.3737194	total: 1.02s	remaining: 2.26s
310:	learn: 0.3732491	total: 1.02s	remaining: 2.26s
311:	learn: 0.3728085	total: 1.02s	remaining: 2.26s
312:	learn: 0.3725427	total: 1.03s	remaining: 2.25s
313:	learn: 0.

469:	learn: 0.3061989	total: 1.58s	remaining: 1.78s
470:	learn: 0.3058332	total: 1.58s	remaining: 1.78s
471:	learn: 0.3055916	total: 1.61s	remaining: 1.8s
472:	learn: 0.3051374	total: 1.62s	remaining: 1.8s
473:	learn: 0.3049922	total: 1.62s	remaining: 1.8s
474:	learn: 0.3045323	total: 1.62s	remaining: 1.79s
475:	learn: 0.3043420	total: 1.63s	remaining: 1.79s
476:	learn: 0.3036488	total: 1.63s	remaining: 1.79s
477:	learn: 0.3033781	total: 1.64s	remaining: 1.78s
478:	learn: 0.3031382	total: 1.64s	remaining: 1.78s
479:	learn: 0.3027400	total: 1.64s	remaining: 1.78s
480:	learn: 0.3023202	total: 1.65s	remaining: 1.77s
481:	learn: 0.3020910	total: 1.65s	remaining: 1.77s
482:	learn: 0.3014986	total: 1.65s	remaining: 1.77s
483:	learn: 0.3009079	total: 1.66s	remaining: 1.77s
484:	learn: 0.3005842	total: 1.66s	remaining: 1.76s
485:	learn: 0.3001525	total: 1.66s	remaining: 1.76s
486:	learn: 0.2999246	total: 1.67s	remaining: 1.76s
487:	learn: 0.2993319	total: 1.67s	remaining: 1.76s
488:	learn: 0.2

672:	learn: 0.2373267	total: 2.38s	remaining: 1.16s
673:	learn: 0.2371237	total: 2.39s	remaining: 1.15s
674:	learn: 0.2366992	total: 2.39s	remaining: 1.15s
675:	learn: 0.2363113	total: 2.39s	remaining: 1.15s
676:	learn: 0.2361193	total: 2.4s	remaining: 1.14s
677:	learn: 0.2360969	total: 2.4s	remaining: 1.14s
678:	learn: 0.2357819	total: 2.4s	remaining: 1.14s
679:	learn: 0.2353955	total: 2.41s	remaining: 1.13s
680:	learn: 0.2350071	total: 2.41s	remaining: 1.13s
681:	learn: 0.2347396	total: 2.41s	remaining: 1.13s
682:	learn: 0.2344278	total: 2.42s	remaining: 1.12s
683:	learn: 0.2342010	total: 2.42s	remaining: 1.12s
684:	learn: 0.2337189	total: 2.42s	remaining: 1.11s
685:	learn: 0.2332603	total: 2.42s	remaining: 1.11s
686:	learn: 0.2329814	total: 2.43s	remaining: 1.11s
687:	learn: 0.2326466	total: 2.43s	remaining: 1.1s
688:	learn: 0.2324341	total: 2.44s	remaining: 1.1s
689:	learn: 0.2321458	total: 2.45s	remaining: 1.1s
690:	learn: 0.2318994	total: 2.45s	remaining: 1.1s
691:	learn: 0.23134

851:	learn: 0.1889513	total: 3.01s	remaining: 524ms
852:	learn: 0.1888958	total: 3.02s	remaining: 520ms
853:	learn: 0.1886066	total: 3.02s	remaining: 516ms
854:	learn: 0.1882898	total: 3.02s	remaining: 513ms
855:	learn: 0.1880738	total: 3.03s	remaining: 509ms
856:	learn: 0.1876491	total: 3.03s	remaining: 506ms
857:	learn: 0.1873739	total: 3.03s	remaining: 502ms
858:	learn: 0.1871146	total: 3.04s	remaining: 499ms
859:	learn: 0.1868176	total: 3.04s	remaining: 495ms
860:	learn: 0.1866386	total: 3.05s	remaining: 492ms
861:	learn: 0.1864282	total: 3.05s	remaining: 488ms
862:	learn: 0.1862157	total: 3.05s	remaining: 485ms
863:	learn: 0.1859273	total: 3.06s	remaining: 481ms
864:	learn: 0.1856124	total: 3.06s	remaining: 477ms
865:	learn: 0.1853792	total: 3.06s	remaining: 474ms
866:	learn: 0.1851711	total: 3.06s	remaining: 470ms
867:	learn: 0.1849169	total: 3.07s	remaining: 466ms
868:	learn: 0.1845658	total: 3.07s	remaining: 463ms
869:	learn: 0.1841970	total: 3.07s	remaining: 459ms
870:	learn: 

56:	learn: 0.5578187	total: 165ms	remaining: 2.72s
57:	learn: 0.5566163	total: 167ms	remaining: 2.72s
58:	learn: 0.5548219	total: 170ms	remaining: 2.71s
59:	learn: 0.5533207	total: 173ms	remaining: 2.71s
60:	learn: 0.5522886	total: 176ms	remaining: 2.71s
61:	learn: 0.5508273	total: 179ms	remaining: 2.71s
62:	learn: 0.5498134	total: 182ms	remaining: 2.7s
63:	learn: 0.5482604	total: 185ms	remaining: 2.7s
64:	learn: 0.5470798	total: 189ms	remaining: 2.71s
65:	learn: 0.5453865	total: 192ms	remaining: 2.71s
66:	learn: 0.5437184	total: 197ms	remaining: 2.75s
67:	learn: 0.5421111	total: 200ms	remaining: 2.75s
68:	learn: 0.5410393	total: 206ms	remaining: 2.77s
69:	learn: 0.5401073	total: 213ms	remaining: 2.83s
70:	learn: 0.5384906	total: 216ms	remaining: 2.82s
71:	learn: 0.5365361	total: 219ms	remaining: 2.83s
72:	learn: 0.5350823	total: 222ms	remaining: 2.81s
73:	learn: 0.5336392	total: 225ms	remaining: 2.81s
74:	learn: 0.5320337	total: 228ms	remaining: 2.81s
75:	learn: 0.5306662	total: 232ms

235:	learn: 0.4014436	total: 797ms	remaining: 2.58s
236:	learn: 0.4008979	total: 801ms	remaining: 2.58s
237:	learn: 0.4002053	total: 803ms	remaining: 2.57s
238:	learn: 0.3994415	total: 811ms	remaining: 2.58s
239:	learn: 0.3989593	total: 817ms	remaining: 2.59s
240:	learn: 0.3982887	total: 821ms	remaining: 2.59s
241:	learn: 0.3976080	total: 824ms	remaining: 2.58s
242:	learn: 0.3970495	total: 827ms	remaining: 2.58s
243:	learn: 0.3964869	total: 838ms	remaining: 2.6s
244:	learn: 0.3959656	total: 843ms	remaining: 2.6s
245:	learn: 0.3955111	total: 846ms	remaining: 2.59s
246:	learn: 0.3947065	total: 850ms	remaining: 2.59s
247:	learn: 0.3943613	total: 854ms	remaining: 2.59s
248:	learn: 0.3939720	total: 857ms	remaining: 2.58s
249:	learn: 0.3932512	total: 859ms	remaining: 2.58s
250:	learn: 0.3928089	total: 864ms	remaining: 2.58s
251:	learn: 0.3924009	total: 867ms	remaining: 2.57s
252:	learn: 0.3920110	total: 870ms	remaining: 2.57s
253:	learn: 0.3913942	total: 873ms	remaining: 2.56s
254:	learn: 0.

413:	learn: 0.3159370	total: 1.43s	remaining: 2.02s
414:	learn: 0.3155353	total: 1.43s	remaining: 2.02s
415:	learn: 0.3151957	total: 1.44s	remaining: 2.02s
416:	learn: 0.3148821	total: 1.44s	remaining: 2.01s
417:	learn: 0.3143746	total: 1.44s	remaining: 2.01s
418:	learn: 0.3140654	total: 1.45s	remaining: 2.01s
419:	learn: 0.3136624	total: 1.45s	remaining: 2s
420:	learn: 0.3133657	total: 1.46s	remaining: 2s
421:	learn: 0.3130259	total: 1.46s	remaining: 2s
422:	learn: 0.3124589	total: 1.46s	remaining: 2s
423:	learn: 0.3120423	total: 1.47s	remaining: 1.99s
424:	learn: 0.3116725	total: 1.47s	remaining: 1.99s
425:	learn: 0.3115211	total: 1.47s	remaining: 1.99s
426:	learn: 0.3110061	total: 1.48s	remaining: 1.98s
427:	learn: 0.3105356	total: 1.48s	remaining: 1.98s
428:	learn: 0.3100507	total: 1.49s	remaining: 1.98s
429:	learn: 0.3095560	total: 1.49s	remaining: 1.97s
430:	learn: 0.3090876	total: 1.49s	remaining: 1.97s
431:	learn: 0.3084900	total: 1.5s	remaining: 1.98s
432:	learn: 0.3082262	tot

595:	learn: 0.2534218	total: 2.06s	remaining: 1.4s
596:	learn: 0.2531635	total: 2.06s	remaining: 1.39s
597:	learn: 0.2530258	total: 2.07s	remaining: 1.39s
598:	learn: 0.2525909	total: 2.07s	remaining: 1.39s
599:	learn: 0.2521941	total: 2.08s	remaining: 1.38s
600:	learn: 0.2518072	total: 2.08s	remaining: 1.38s
601:	learn: 0.2515139	total: 2.08s	remaining: 1.38s
602:	learn: 0.2511996	total: 2.09s	remaining: 1.37s
603:	learn: 0.2508401	total: 2.09s	remaining: 1.37s
604:	learn: 0.2504774	total: 2.1s	remaining: 1.37s
605:	learn: 0.2502631	total: 2.1s	remaining: 1.36s
606:	learn: 0.2496821	total: 2.1s	remaining: 1.36s
607:	learn: 0.2494367	total: 2.11s	remaining: 1.36s
608:	learn: 0.2491759	total: 2.11s	remaining: 1.35s
609:	learn: 0.2489895	total: 2.12s	remaining: 1.35s
610:	learn: 0.2487319	total: 2.12s	remaining: 1.35s
611:	learn: 0.2484284	total: 2.12s	remaining: 1.34s
612:	learn: 0.2481387	total: 2.13s	remaining: 1.34s
613:	learn: 0.2478960	total: 2.13s	remaining: 1.34s
614:	learn: 0.24

775:	learn: 0.1989923	total: 2.7s	remaining: 779ms
776:	learn: 0.1988310	total: 2.7s	remaining: 776ms
777:	learn: 0.1987037	total: 2.71s	remaining: 773ms
778:	learn: 0.1983810	total: 2.71s	remaining: 769ms
779:	learn: 0.1979867	total: 2.71s	remaining: 765ms
780:	learn: 0.1976834	total: 2.72s	remaining: 762ms
781:	learn: 0.1973469	total: 2.72s	remaining: 758ms
782:	learn: 0.1969001	total: 2.73s	remaining: 755ms
783:	learn: 0.1966797	total: 2.73s	remaining: 752ms
784:	learn: 0.1964655	total: 2.73s	remaining: 749ms
785:	learn: 0.1961261	total: 2.74s	remaining: 746ms
786:	learn: 0.1957359	total: 2.74s	remaining: 742ms
787:	learn: 0.1953568	total: 2.75s	remaining: 739ms
788:	learn: 0.1950920	total: 2.75s	remaining: 736ms
789:	learn: 0.1948603	total: 2.75s	remaining: 732ms
790:	learn: 0.1945915	total: 2.76s	remaining: 729ms
791:	learn: 0.1943320	total: 2.76s	remaining: 726ms
792:	learn: 0.1940851	total: 2.77s	remaining: 722ms
793:	learn: 0.1937690	total: 2.77s	remaining: 718ms
794:	learn: 0.

934:	learn: 0.1598046	total: 3.33s	remaining: 231ms
935:	learn: 0.1593822	total: 3.33s	remaining: 228ms
936:	learn: 0.1591695	total: 3.33s	remaining: 224ms
937:	learn: 0.1591134	total: 3.34s	remaining: 221ms
938:	learn: 0.1587983	total: 3.34s	remaining: 217ms
939:	learn: 0.1584361	total: 3.35s	remaining: 214ms
940:	learn: 0.1581854	total: 3.35s	remaining: 210ms
941:	learn: 0.1579505	total: 3.37s	remaining: 207ms
942:	learn: 0.1576451	total: 3.37s	remaining: 204ms
943:	learn: 0.1575607	total: 3.38s	remaining: 200ms
944:	learn: 0.1571959	total: 3.38s	remaining: 197ms
945:	learn: 0.1571603	total: 3.39s	remaining: 193ms
946:	learn: 0.1570464	total: 3.39s	remaining: 190ms
947:	learn: 0.1567895	total: 3.4s	remaining: 187ms
948:	learn: 0.1565191	total: 3.41s	remaining: 183ms
949:	learn: 0.1561963	total: 3.41s	remaining: 180ms
950:	learn: 0.1559698	total: 3.42s	remaining: 176ms
951:	learn: 0.1557294	total: 3.42s	remaining: 172ms
952:	learn: 0.1554900	total: 3.42s	remaining: 169ms
953:	learn: 0

### PCA function

In [33]:
from sklearn.decomposition import PCA

In [34]:
pca=PCA(n_components=10)

In [35]:
x_new_2 = pca.fit(x).transform(x)

In [37]:
sum(pca.explained_variance_ratio_)

0.8678194290416649

In [38]:
train_x2_new, test_x2_new, train_y2_new, test_y2_new = train_test_split(x_new_2, y, train_size = 0.8, random_state = 1039)
rd2_new_classifier = RandomForestClassifier()
rd2_new_classifier.fit(train_x2_new, train_y_new)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [39]:
pred_y2_new = rd2_new_classifier.predict(test_x2_new)
print(classification_report(pred_y2_new, test_y2_new))
print(confusion_matrix(test_y2_new, pred_y2_new))

              precision    recall  f1-score   support

           0       0.70      0.73      0.72        64
           1       0.73      0.70      0.71        66

    accuracy                           0.72       130
   macro avg       0.72      0.72      0.72       130
weighted avg       0.72      0.72      0.72       130

[[47 20]
 [17 46]]


# Regression Part

### all work based on G3

In [40]:
data_por_3

Unnamed: 0,school,sex,age,address,famsize,Pstatus,Medu,Fedu,guardian,traveltime,studytime,failures,schoolsup,famsup,paid,activities,nursery,higher,internet,romantic,famrel,freetime,goout,Dalc,Walc,health,absences,G1,G2,G3,Mjob_at_home,Mjob_health,Mjob_other,Mjob_services,Mjob_teacher,Fjob_at_home,Fjob_health,Fjob_other,Fjob_services,Fjob_teacher,reason_course,reason_home,reason_other,reason_reputation,pass_fail
0,0,0,18,1,0,0,4,4,1,2,2,0,1,0,0,0,1,1,0,0,4,3,4,1,1,3,4,0,11,11,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0
1,0,0,17,1,0,1,1,1,0,1,2,0,0,1,0,0,0,1,1,0,5,3,3,1,1,3,2,9,11,11,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0
2,0,0,15,1,1,1,1,1,1,1,2,0,1,0,0,0,1,1,1,0,4,3,2,2,3,3,6,12,13,12,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1
3,0,0,15,1,0,1,4,2,1,1,3,0,0,1,0,1,1,1,1,1,3,2,2,1,1,5,0,14,14,14,0,1,0,0,0,0,0,0,1,0,0,1,0,0,1
4,0,0,16,1,0,1,3,3,0,1,2,0,0,1,0,0,1,1,0,0,4,3,2,1,2,5,0,11,13,13,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
644,1,0,19,0,0,1,2,3,1,1,3,1,0,0,0,1,0,1,1,0,5,4,2,1,2,5,4,10,11,10,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0
645,1,0,18,1,1,1,3,1,1,1,2,0,0,1,0,0,1,1,1,0,4,3,4,1,1,1,4,15,15,16,0,0,0,0,1,0,0,0,1,0,1,0,0,0,1
646,1,0,18,1,0,1,1,1,1,2,2,0,0,0,0,1,1,1,0,0,1,1,1,1,1,5,6,11,12,9,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0
647,1,1,17,1,1,1,3,1,1,2,1,0,0,0,0,0,0,1,1,0,2,4,5,3,4,2,6,10,10,10,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0


In [41]:
data_por_3["G3"].value_counts()

11    104
10     97
13     82
12     72
14     63
15     49
16     36
9      35
8      35
17     29
0      15
18     15
7      10
6       3
19      2
5       1
1       1
Name: G3, dtype: int64

In [42]:
data_por_3["G2"].value_counts()

11    103
12     86
10     83
13     80
9      72
14     54
8      40
15     38
16     25
17     20
7      16
18     14
6       7
0       7
5       3
19      1
Name: G2, dtype: int64

In [43]:
data_por_3["G1"].value_counts()

10    95
11    91
12    82
13    72
14    71
9     65
8     42
15    35
7     33
16    22
17    16
6      9
18     7
5      5
4      2
19     1
0      1
Name: G1, dtype: int64

In [45]:
data_por_3[data_por_3["G3"]==0]

Unnamed: 0,school,sex,age,address,famsize,Pstatus,Medu,Fedu,guardian,traveltime,studytime,failures,schoolsup,famsup,paid,activities,nursery,higher,internet,romantic,famrel,freetime,goout,Dalc,Walc,health,absences,G1,G2,G3,Mjob_at_home,Mjob_health,Mjob_other,Mjob_services,Mjob_teacher,Fjob_at_home,Fjob_health,Fjob_other,Fjob_services,Fjob_teacher,reason_course,reason_home,reason_other,reason_reputation,pass_fail
163,0,1,18,1,1,1,1,1,1,1,1,2,0,0,0,0,1,0,1,1,2,3,5,2,5,4,0,11,9,0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0
440,1,1,16,1,0,1,1,1,1,2,2,0,0,1,0,1,1,1,0,1,5,4,5,4,5,3,0,7,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0
519,1,1,16,0,0,1,2,1,1,2,2,0,0,0,0,1,1,1,1,0,5,2,1,1,1,2,0,8,7,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0
563,1,1,17,1,0,1,2,2,1,1,1,1,0,0,0,1,1,1,0,1,1,2,1,2,3,5,0,7,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0
567,1,1,18,0,0,1,3,2,1,1,1,1,0,0,0,0,1,0,1,0,2,3,1,2,2,5,0,4,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0
583,1,0,18,0,0,1,2,2,1,2,1,1,0,0,0,0,1,0,1,1,5,5,5,1,1,3,0,8,6,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0
586,1,0,17,1,0,1,4,2,1,1,2,0,1,1,0,1,1,1,1,0,5,5,5,1,3,5,0,8,8,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0
597,1,0,18,0,0,1,2,2,1,3,2,1,0,0,0,1,1,1,0,1,4,3,3,1,1,4,0,9,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0
603,1,0,18,0,1,0,4,2,1,1,2,0,0,0,0,1,1,1,1,1,5,3,1,1,1,5,0,5,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0
605,1,0,19,1,0,1,1,1,0,2,1,1,0,0,0,0,1,0,0,0,5,5,5,2,3,2,0,5,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0


In [46]:
categorical_dic

{'Fjob': ['at_home', 'health', 'other', 'services', 'teacher'],
 'Mjob': ['at_home', 'health', 'other', 'services', 'teacher'],
 'Pstatus': ['A', 'T'],
 'activities': ['no', 'yes'],
 'address': ['R', 'U'],
 'famsize': ['GT3', 'LE3'],
 'famsup': ['no', 'yes'],
 'guardian': ['father', 'mother', 'other'],
 'higher': ['no', 'yes'],
 'internet': ['no', 'yes'],
 'nursery': ['no', 'yes'],
 'paid': ['no', 'yes'],
 'reason': ['course', 'home', 'other', 'reputation'],
 'romantic': ['no', 'yes'],
 'school': ['GP', 'MS'],
 'schoolsup': ['no', 'yes'],
 'sex': ['F', 'M']}

### svm regression model

In [48]:
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error
import math
from sklearn.model_selection import cross_validate

In [47]:
x = data_por_3.drop(columns = ["G1", "G2", "G3", "pass_fail"])
y = data_por_3["G3"]
train_x, test_x, train_y, test_y = train_test_split(x, y, train_size = 0.8, random_state = 1039)

In [49]:
svm_regressor = SVR(C = 3, epsilon = 0.01)
svm_regressor.fit(train_x, train_y)
svm_pred_y = svm_regressor.predict(test_x)
print(mean_squared_error(test_y, svm_pred_y))
print(math.sqrt(mean_squared_error(test_y, svm_pred_y)))

7.899623790867218
2.8106269391129124


In [50]:
cv_results = cross_validate(svm_regressor, x, y, cv=5, scoring = 'neg_mean_squared_error')
print(cv_results['test_score'])
print(np.mean(cv_results['test_score']))

[ -3.80888857  -5.61785441  -6.62724716  -8.04942985 -16.86012097]
-8.192708191140516


### gradient boosting regression model

In [51]:
from sklearn.ensemble import GradientBoostingRegressor

In [52]:
grad_regression = GradientBoostingRegressor(learning_rate = 0.01, n_estimators = 1000)
grad_regression.fit(train_x, train_y)

GradientBoostingRegressor(alpha=0.9, ccp_alpha=0.0, criterion='friedman_mse',
                          init=None, learning_rate=0.01, loss='ls', max_depth=3,
                          max_features=None, max_leaf_nodes=None,
                          min_impurity_decrease=0.0, min_impurity_split=None,
                          min_samples_leaf=1, min_samples_split=2,
                          min_weight_fraction_leaf=0.0, n_estimators=1000,
                          n_iter_no_change=None, presort='deprecated',
                          random_state=None, subsample=1.0, tol=0.0001,
                          validation_fraction=0.1, verbose=0, warm_start=False)

In [53]:
grad_pred_y = grad_regression.predict(test_x)
print(mean_squared_error(test_y, grad_pred_y))
print(math.sqrt(mean_squared_error(test_y, grad_pred_y)))

8.32931304206392
2.8860549270698086


In [54]:
cv_results = cross_validate(grad_regression, x, y, cv=5, scoring = 'neg_mean_squared_error')
print(cv_results['test_score'])
print(np.mean(cv_results['test_score']))

[ -4.80704021  -5.59146993  -6.23011006  -9.40621312 -16.32071436]
-8.471109535161492


In [55]:
importances = grad_regression.feature_importances_
gradient_importances = pd.Series(importances, index=x.columns)
print(gradient_importances)

school               0.046107
sex                  0.014218
age                  0.039059
address              0.012542
famsize              0.006167
Pstatus              0.003321
Medu                 0.019343
Fedu                 0.028950
guardian             0.007580
traveltime           0.014605
studytime            0.028637
failures             0.252166
schoolsup            0.025451
famsup               0.004447
paid                 0.002158
activities           0.007983
nursery              0.005340
higher               0.065282
internet             0.016668
romantic             0.003436
famrel               0.016353
freetime             0.025845
goout                0.026916
Dalc                 0.076670
Walc                 0.040564
health               0.023079
absences             0.082687
Mjob_at_home         0.003556
Mjob_health          0.002694
Mjob_other           0.005676
Mjob_services        0.006156
Mjob_teacher         0.013457
Fjob_at_home         0.002383
Fjob_healt

In [56]:
choosing_feature = []
feature_name = list(x.columns)
for i in range(len(gradient_importances)):
    if forest_importances[i]>= 0.03:
        choosing_feature.append(feature_name[i])
print(choosing_feature)

['school', 'age', 'Medu', 'Fedu', 'studytime', 'failures', 'higher', 'famrel', 'freetime', 'goout', 'Dalc', 'Walc', 'health', 'absences']


### random forest regression

In [57]:
from sklearn.ensemble import RandomForestRegressor

In [58]:
random_regression = RandomForestRegressor(max_depth = 5, n_estimators = 300)
random_regression.fit(train_x, train_y)

RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse',
                      max_depth=5, max_features='auto', max_leaf_nodes=None,
                      max_samples=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=1,
                      min_samples_split=2, min_weight_fraction_leaf=0.0,
                      n_estimators=300, n_jobs=None, oob_score=False,
                      random_state=None, verbose=0, warm_start=False)

In [59]:
random_pred_y = random_regression.predict(test_x)
print(mean_squared_error(test_y, random_pred_y))
print(math.sqrt(mean_squared_error(test_y, random_pred_y)))

7.672460917037736
2.7699207420137015


In [60]:
cv_results = cross_validate(random_regression, x, y, cv=5, scoring = 'neg_mean_squared_error')
print(cv_results['test_score'])
print(np.mean(cv_results['test_score']))

[ -3.95223505  -5.89913413  -6.80768157  -8.83737914 -15.9866045 ]
-8.296606878393458


In [61]:
importances = random_regression.feature_importances_
forest_importances = pd.Series(importances, index=x.columns)
print(forest_importances)

school               0.046516
sex                  0.007468
age                  0.035880
address              0.012658
famsize              0.005072
Pstatus              0.001359
Medu                 0.022665
Fedu                 0.025050
guardian             0.013620
traveltime           0.008636
studytime            0.028026
failures             0.330616
schoolsup            0.015152
famsup               0.006644
paid                 0.001704
activities           0.004947
nursery              0.004936
higher               0.072053
internet             0.010264
romantic             0.007178
famrel               0.014290
freetime             0.014303
goout                0.026113
Dalc                 0.061557
Walc                 0.038720
health               0.015705
absences             0.092505
Mjob_at_home         0.003285
Mjob_health          0.002433
Mjob_other           0.004975
Mjob_services        0.003773
Mjob_teacher         0.004637
Fjob_at_home         0.001951
Fjob_healt

In [63]:
choosing_feature = []
feature_name = list(x.columns)
for i in range(len(forest_importances)):
    if forest_importances[i]>= 0.03:
        choosing_feature.append(feature_name[i])
print(choosing_feature)

['school', 'age', 'failures', 'higher', 'Dalc', 'Walc', 'absences']


### remove G2, G3 result = 0 sample and its estimated result

In [64]:
data_por_4 = data_por_3.copy()

In [65]:
data_por_4[data_por_3["G3"]==0]

Unnamed: 0,school,sex,age,address,famsize,Pstatus,Medu,Fedu,guardian,traveltime,studytime,failures,schoolsup,famsup,paid,activities,nursery,higher,internet,romantic,famrel,freetime,goout,Dalc,Walc,health,absences,G1,G2,G3,Mjob_at_home,Mjob_health,Mjob_other,Mjob_services,Mjob_teacher,Fjob_at_home,Fjob_health,Fjob_other,Fjob_services,Fjob_teacher,reason_course,reason_home,reason_other,reason_reputation,pass_fail
163,0,1,18,1,1,1,1,1,1,1,1,2,0,0,0,0,1,0,1,1,2,3,5,2,5,4,0,11,9,0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0
440,1,1,16,1,0,1,1,1,1,2,2,0,0,1,0,1,1,1,0,1,5,4,5,4,5,3,0,7,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0
519,1,1,16,0,0,1,2,1,1,2,2,0,0,0,0,1,1,1,1,0,5,2,1,1,1,2,0,8,7,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0
563,1,1,17,1,0,1,2,2,1,1,1,1,0,0,0,1,1,1,0,1,1,2,1,2,3,5,0,7,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0
567,1,1,18,0,0,1,3,2,1,1,1,1,0,0,0,0,1,0,1,0,2,3,1,2,2,5,0,4,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0
583,1,0,18,0,0,1,2,2,1,2,1,1,0,0,0,0,1,0,1,1,5,5,5,1,1,3,0,8,6,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0
586,1,0,17,1,0,1,4,2,1,1,2,0,1,1,0,1,1,1,1,0,5,5,5,1,3,5,0,8,8,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0
597,1,0,18,0,0,1,2,2,1,3,2,1,0,0,0,1,1,1,0,1,4,3,3,1,1,4,0,9,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0
603,1,0,18,0,1,0,4,2,1,1,2,0,0,0,0,1,1,1,1,1,5,3,1,1,1,5,0,5,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0
605,1,0,19,1,0,1,1,1,0,2,1,1,0,0,0,0,1,0,0,0,5,5,5,2,3,2,0,5,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0


In [66]:
result_0 = data_por_4[data_por_4["G3"]==0].index
data_por_4 = data_por_4.drop(index = result_0)

In [67]:
x = data_por_4.drop(columns = ["G1", "G2", "G3", "pass_fail"])
y = data_por_4["G3"]
train_x, test_x, train_y, test_y = train_test_split(x, y, train_size = 0.8, random_state = 1039)

In [68]:
random_regression = RandomForestRegressor(max_depth = 5, n_estimators = 300)
random_regression.fit(train_x, train_y)
random_pred_y = random_regression.predict(test_x)
print(mean_squared_error(test_y, random_pred_y))
print(math.sqrt(mean_squared_error(test_y, random_pred_y)))

4.686710116573054
2.164881086012129


In [69]:
cv_results = cross_validate(random_regression, x, y, cv=5, scoring = 'neg_mean_squared_error')
print(cv_results['test_score'])
print(np.mean(cv_results['test_score']))

[-3.88791196 -4.93730754 -6.11274627 -4.45811616 -6.92477304]
-5.264170994114235


In [70]:
importances = random_regression.feature_importances_
forest_importances = pd.Series(importances, index=x.columns)
print(forest_importances)

school               0.020052
sex                  0.009286
age                  0.026747
address              0.004295
famsize              0.004625
Pstatus              0.002986
Medu                 0.065979
Fedu                 0.026436
guardian             0.003189
traveltime           0.007725
studytime            0.061208
failures             0.354947
schoolsup            0.028379
famsup               0.006945
paid                 0.002090
activities           0.005108
nursery              0.005770
higher               0.095050
internet             0.008031
romantic             0.003041
famrel               0.007586
freetime             0.016950
goout                0.010543
Dalc                 0.019846
Walc                 0.031135
health               0.035171
absences             0.050131
Mjob_at_home         0.011654
Mjob_health          0.001853
Mjob_other           0.003614
Mjob_services        0.005422
Mjob_teacher         0.004872
Fjob_at_home         0.006899
Fjob_healt

In [71]:
choosing_feature = []
feature_point = []
feature_name = list(x.columns)
for i in range(len(gradient_importances)):
    if forest_importances[i]>= 0.03:
        choosing_feature.append(feature_name[i])
        feature_point.append(forest_importances[i])
print(choosing_feature)
print(sum(feature_point))

['Medu', 'studytime', 'failures', 'higher', 'Walc', 'health', 'absences']
0.6936216117382524


In [72]:
grad_regression = GradientBoostingRegressor(learning_rate = 0.01, n_estimators = 1000)
grad_regression.fit(train_x, train_y)
grad_pred_y = grad_regression.predict(test_x)
print(mean_squared_error(test_y, grad_pred_y))
print(math.sqrt(mean_squared_error(test_y, grad_pred_y)))

4.462192320630213
2.1123901913780543


In [73]:
cv_results = cross_validate(grad_regression, x, y, cv=5, scoring = 'neg_mean_squared_error')
print(cv_results['test_score'])
print(np.mean(cv_results['test_score']))

[-4.08216232 -5.03384567 -5.86502388 -5.31662828 -6.91507104]
-5.442546236769104


In [74]:
importances = grad_regression.feature_importances_
gradient_importances = pd.Series(importances, index=x.columns)
print(gradient_importances)

school               0.025836
sex                  0.013530
age                  0.032172
address              0.009961
famsize              0.006062
Pstatus              0.001941
Medu                 0.064630
Fedu                 0.033891
guardian             0.003800
traveltime           0.023266
studytime            0.067796
failures             0.232425
schoolsup            0.027051
famsup               0.015080
paid                 0.004708
activities           0.007999
nursery              0.013687
higher               0.075015
internet             0.010932
romantic             0.007547
famrel               0.013116
freetime             0.010221
goout                0.015033
Dalc                 0.023655
Walc                 0.037853
health               0.042996
absences             0.062658
Mjob_at_home         0.012490
Mjob_health          0.007924
Mjob_other           0.003051
Mjob_services        0.013279
Mjob_teacher         0.004613
Fjob_at_home         0.005435
Fjob_healt

In [75]:
choosing_feature = []
feature_point = []
feature_name = list(x.columns)
for i in range(len(gradient_importances)):
    if gradient_importances[i]>= 0.03:
        choosing_feature.append(feature_name[i])
        feature_point.append(gradient_importances[i])
print(choosing_feature)
print(sum(feature_point))

['age', 'Medu', 'Fedu', 'studytime', 'failures', 'higher', 'Walc', 'health', 'absences', 'Fjob_teacher']
0.680477955615138
