In [1]:
#!coding:utf-8
#Load the librarys
import pandas as pd #To work with dataset
import numpy as np #Math library
import seaborn as sns #Graph library that use matplot in background
import matplotlib.pyplot as plt #to plot some parameters in seaborn
from sklearn.model_selection import train_test_split, KFold, cross_val_score # to split the data
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, fbeta_score #To evaluate our model
from sklearn import metrics
from sklearn.model_selection import GridSearchCV

# Algorithmns models to be compared
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from xgboost import XGBClassifier

#Importing the data
df_credit = pd.read_csv("./input/german_credit_data.csv",index_col=0)

In [2]:
df_credit[:10]

Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Risk
0,67,male,2,own,,little,1169,6,radio/TV,good
1,22,female,2,own,little,moderate,5951,48,radio/TV,bad
2,49,male,1,own,little,,2096,12,education,good
3,45,male,2,free,little,little,7882,42,furniture/equipment,good
4,53,male,2,free,little,little,4870,24,car,bad
5,35,male,1,free,,,9055,36,education,good
6,53,male,2,own,quite rich,,2835,24,furniture/equipment,good
7,35,male,3,rent,little,moderate,6948,36,car,good
8,61,male,1,own,rich,,3059,12,radio/TV,good
9,28,male,3,own,little,moderate,5234,30,car,bad


In [3]:
print(df_credit.info())

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1000 entries, 0 to 999
Data columns (total 10 columns):
Age                 1000 non-null int64
Sex                 1000 non-null object
Job                 1000 non-null int64
Housing             1000 non-null object
Saving accounts     817 non-null object
Checking account    606 non-null object
Credit amount       1000 non-null int64
Duration            1000 non-null int64
Purpose             1000 non-null object
Risk                1000 non-null object
dtypes: int64(4), object(6)
memory usage: 85.9+ KB
None


In [4]:
def one_hot_encoder(df, nan_as_category = False):
    original_columns = list(df.columns)
    categorical_columns = [col for col in df.columns if df[col].dtype == 'object']
    df = pd.get_dummies(df, columns= categorical_columns, dummy_na= nan_as_category, drop_first=True)
    new_columns = [c for c in df.columns if c not in original_columns]
    return df, new_columns

In [5]:
interval = (18, 25, 35, 60, 120)

cats = ['Student', 'Young', 'Adult', 'Senior']
df_credit["Age_cat"] = pd.cut(df_credit.Age, interval, labels=cats)

In [6]:
df_credit['Saving accounts'] = df_credit['Saving accounts'].fillna('no_inf')
df_credit['Checking account'] = df_credit['Checking account'].fillna('no_inf')

#Purpose to Dummies Variable
df_credit = df_credit.merge(pd.get_dummies(df_credit.Purpose, drop_first=True, prefix='Purpose'), left_index=True, right_index=True)
#Sex feature in dummies
df_credit = df_credit.merge(pd.get_dummies(df_credit.Sex, drop_first=True, prefix='Sex'), left_index=True, right_index=True)
# Housing get dummies
df_credit = df_credit.merge(pd.get_dummies(df_credit.Housing, drop_first=True, prefix='Housing'), left_index=True, right_index=True)
# Housing get Saving Accounts
df_credit = df_credit.merge(pd.get_dummies(df_credit["Saving accounts"], drop_first=True, prefix='Savings'), left_index=True, right_index=True)
# Housing get Risk
df_credit = df_credit.merge(pd.get_dummies(df_credit.Risk, prefix='Risk'), left_index=True, right_index=True)
# Housing get Checking Account
df_credit = df_credit.merge(pd.get_dummies(df_credit["Checking account"], drop_first=True, prefix='Check'), left_index=True, right_index=True)
# Housing get Age categorical
df_credit = df_credit.merge(pd.get_dummies(df_credit["Age_cat"], drop_first=True, prefix='Age_cat'), left_index=True, right_index=True)

In [7]:
#Excluding the missing columns
del df_credit["Saving accounts"]
del df_credit["Checking account"]
del df_credit["Purpose"]
del df_credit["Sex"]
del df_credit["Housing"]
del df_credit["Age_cat"]
del df_credit["Risk"]
del df_credit['Risk_good']

In [8]:
df_credit['Credit amount'] = np.log(df_credit['Credit amount'])

In [9]:
X = df_credit.drop('Risk_bad', 1).values
y = df_credit["Risk_bad"].values

In [10]:
# Spliting X and y into train and test version


In [11]:
import xgboost as xgb

kf = KFold(n_splits=10, shuffle=True, random_state=42)

In [12]:
params = {
    "booster":"gbtree",
    "objective":"binary:logistic",
    "eval_metiic":"logloss",
    "eta":0.1,
    "max_depth":10,
    "missing":0,
    "seed":0,
    "silent":1
}

In [13]:
auc_score = []
acc_score = []
recall_score = []
prec_score = []
f1_score = []
beta_score = []

In [14]:
for train_index, test_index in kf.split(X):
    data_train = xgb.DMatrix(X[train_index], y[train_index])
    data_test = xgb.DMatrix(X[test_index],y[test_index])
    watch_list = [(data_test,"eval"),(data_train,"train")]
    bst = xgb.train(params,data_train,num_boost_round=20000,evals = watch_list,early_stopping_rounds = 10)
    ypred = bst.predict(data_test)
    y_pred = (ypred >= 0.5)*1
    print(y_pred)
    auc_score.append(metrics.roc_auc_score(y[test_index],ypred))
    acc_score.append(metrics.accuracy_score(y[test_index],y_pred))
    recall_score.append(metrics.recall_score(y[test_index],y_pred))
    prec_score.append(metrics.precision_score(y[test_index],y_pred))
    f1_score.append(metrics.f1_score(y[test_index],y_pred))
    beta_score.append(fbeta_score(y[test_index], y_pred, beta=2))

[0]	eval-error:0.36	train-error:0.138889
Multiple eval metrics have been passed: 'train-error' will be used for early stopping.

Will train until train-error hasn't improved in 10 rounds.
[1]	eval-error:0.32	train-error:0.132222
[2]	eval-error:0.31	train-error:0.123333
[3]	eval-error:0.27	train-error:0.116667
[4]	eval-error:0.26	train-error:0.113333
[5]	eval-error:0.26	train-error:0.115556
[6]	eval-error:0.25	train-error:0.11
[7]	eval-error:0.26	train-error:0.11
[8]	eval-error:0.25	train-error:0.106667
[9]	eval-error:0.25	train-error:0.104444
[10]	eval-error:0.25	train-error:0.106667
[11]	eval-error:0.24	train-error:0.101111
[12]	eval-error:0.25	train-error:0.097778
[13]	eval-error:0.24	train-error:0.09
[14]	eval-error:0.24	train-error:0.086667
[15]	eval-error:0.24	train-error:0.08
[16]	eval-error:0.24	train-error:0.08
[17]	eval-error:0.24	train-error:0.077778
[18]	eval-error:0.22	train-error:0.074444
[19]	eval-error:0.22	train-error:0.074444
[20]	eval-error:0.22	train-error:0.068889
[

[83]	eval-error:0.24	train-error:0.004444
[84]	eval-error:0.24	train-error:0.004444
[85]	eval-error:0.24	train-error:0.004444
[86]	eval-error:0.24	train-error:0.004444
[87]	eval-error:0.24	train-error:0.004444
[88]	eval-error:0.24	train-error:0.004444
[89]	eval-error:0.24	train-error:0.004444
[90]	eval-error:0.24	train-error:0.003333
[91]	eval-error:0.24	train-error:0.003333
[92]	eval-error:0.24	train-error:0.003333
[93]	eval-error:0.24	train-error:0.003333
[94]	eval-error:0.24	train-error:0.003333
[95]	eval-error:0.24	train-error:0.003333
[96]	eval-error:0.24	train-error:0.003333
[97]	eval-error:0.24	train-error:0.003333
[98]	eval-error:0.24	train-error:0.002222
[99]	eval-error:0.24	train-error:0.002222
[100]	eval-error:0.24	train-error:0.002222
[101]	eval-error:0.24	train-error:0.002222
[102]	eval-error:0.24	train-error:0.002222
[103]	eval-error:0.24	train-error:0.002222
[104]	eval-error:0.24	train-error:0.002222
[105]	eval-error:0.24	train-error:0.002222
[106]	eval-error:0.24	train-

[58]	eval-error:0.27	train-error:0.012222
[59]	eval-error:0.25	train-error:0.012222
[60]	eval-error:0.26	train-error:0.011111
[61]	eval-error:0.26	train-error:0.011111
[62]	eval-error:0.25	train-error:0.011111
[63]	eval-error:0.26	train-error:0.011111
[64]	eval-error:0.26	train-error:0.008889
[65]	eval-error:0.26	train-error:0.008889
[66]	eval-error:0.26	train-error:0.008889
[67]	eval-error:0.26	train-error:0.007778
[68]	eval-error:0.26	train-error:0.006667
[69]	eval-error:0.26	train-error:0.006667
[70]	eval-error:0.26	train-error:0.006667
[71]	eval-error:0.26	train-error:0.006667
[72]	eval-error:0.25	train-error:0.006667
[73]	eval-error:0.25	train-error:0.004444
[74]	eval-error:0.24	train-error:0.004444
[75]	eval-error:0.24	train-error:0.004444
[76]	eval-error:0.24	train-error:0.004444
[77]	eval-error:0.24	train-error:0.004444
[78]	eval-error:0.24	train-error:0.004444
[79]	eval-error:0.24	train-error:0.004444
[80]	eval-error:0.24	train-error:0.004444
[81]	eval-error:0.24	train-error:0

[53]	eval-error:0.39	train-error:0.017778
[54]	eval-error:0.39	train-error:0.018889
[55]	eval-error:0.39	train-error:0.018889
[56]	eval-error:0.39	train-error:0.017778
[57]	eval-error:0.39	train-error:0.016667
[58]	eval-error:0.4	train-error:0.015556
[59]	eval-error:0.39	train-error:0.015556
[60]	eval-error:0.39	train-error:0.014444
[61]	eval-error:0.38	train-error:0.014444
[62]	eval-error:0.38	train-error:0.014444
[63]	eval-error:0.38	train-error:0.014444
[64]	eval-error:0.38	train-error:0.014444
[65]	eval-error:0.39	train-error:0.011111
[66]	eval-error:0.39	train-error:0.011111
[67]	eval-error:0.39	train-error:0.01
[68]	eval-error:0.39	train-error:0.01
[69]	eval-error:0.39	train-error:0.008889
[70]	eval-error:0.39	train-error:0.006667
[71]	eval-error:0.39	train-error:0.007778
[72]	eval-error:0.39	train-error:0.006667
[73]	eval-error:0.39	train-error:0.006667
[74]	eval-error:0.39	train-error:0.006667
[75]	eval-error:0.39	train-error:0.006667
[76]	eval-error:0.39	train-error:0.005556
[

[34]	eval-error:0.26	train-error:0.048889
[35]	eval-error:0.26	train-error:0.048889
[36]	eval-error:0.27	train-error:0.048889
[37]	eval-error:0.26	train-error:0.045556
[38]	eval-error:0.25	train-error:0.043333
[39]	eval-error:0.25	train-error:0.043333
[40]	eval-error:0.26	train-error:0.04
[41]	eval-error:0.26	train-error:0.037778
[42]	eval-error:0.26	train-error:0.036667
[43]	eval-error:0.26	train-error:0.035556
[44]	eval-error:0.26	train-error:0.035556
[45]	eval-error:0.26	train-error:0.035556
[46]	eval-error:0.26	train-error:0.036667
[47]	eval-error:0.26	train-error:0.037778
[48]	eval-error:0.26	train-error:0.035556
[49]	eval-error:0.26	train-error:0.034444
[50]	eval-error:0.26	train-error:0.033333
[51]	eval-error:0.26	train-error:0.032222
[52]	eval-error:0.26	train-error:0.031111
[53]	eval-error:0.26	train-error:0.028889
[54]	eval-error:0.26	train-error:0.027778
[55]	eval-error:0.26	train-error:0.026667
[56]	eval-error:0.26	train-error:0.025556
[57]	eval-error:0.26	train-error:0.024

Will train until train-error hasn't improved in 10 rounds.
[1]	eval-error:0.25	train-error:0.137778
[2]	eval-error:0.24	train-error:0.131111
[3]	eval-error:0.24	train-error:0.125556
[4]	eval-error:0.26	train-error:0.125556
[5]	eval-error:0.29	train-error:0.11
[6]	eval-error:0.24	train-error:0.111111
[7]	eval-error:0.29	train-error:0.101111
[8]	eval-error:0.29	train-error:0.092222
[9]	eval-error:0.29	train-error:0.088889
[10]	eval-error:0.28	train-error:0.083333
[11]	eval-error:0.28	train-error:0.076667
[12]	eval-error:0.27	train-error:0.083333
[13]	eval-error:0.26	train-error:0.08
[14]	eval-error:0.28	train-error:0.076667
[15]	eval-error:0.29	train-error:0.076667
[16]	eval-error:0.29	train-error:0.076667
[17]	eval-error:0.29	train-error:0.073333
[18]	eval-error:0.29	train-error:0.067778
[19]	eval-error:0.28	train-error:0.066667
[20]	eval-error:0.29	train-error:0.063333
[21]	eval-error:0.29	train-error:0.065556
[22]	eval-error:0.29	train-error:0.067778
[23]	eval-error:0.29	train-error:0

In [54]:
# to feed the random state
seed = 7
# prepare models
models = []
models.append(('LR', LogisticRegression()))
#models.append(('LDA', LinearDiscriminantAnalysis()))
#models.append(('KNN', KNeighborsClassifier()))
#models.append(('CART', DecisionTreeClassifier()))
#models.append(('NB', GaussianNB()))
models.append(('RF', RandomForestClassifier()))
models.append(('SVM', SVC(gamma='auto')))
#models.append(('XGB', XGBClassifier()))

# evaluate each model in turn
results = []
names = []
scoring = 'f1'
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state=42)
for name, model in models:
        kfold = KFold(n_splits=10, random_state=42)
        cv_results = cross_val_score(model, X_train, y_train, cv=kfold, scoring=scoring)
        results.append(cv_results)
        names.append(name)
        msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
        print(msg)
        

LR: 0.467397 (0.063640)
RF: 0.347039 (0.105530)
SVM: 0.266798 (0.114255)


In [55]:
compare_result = pd.DataFrame(results).T
compare_result.columns= names

In [58]:
compare_result["XGBOOST"] = pd.DataFrame(f1_score)

In [59]:
compare_result

Unnamed: 0,LR,RF,SVM,XGBOOST
0,0.45,0.176471,0.294118,0.576923
1,0.580645,0.275862,0.173913,0.538462
2,0.461538,0.263158,0.242424,0.491228
3,0.444444,0.378378,0.285714,0.509804
4,0.391304,0.372093,0.263158,0.431373
5,0.457143,0.2,0.296296,0.349206
6,0.432432,0.451613,0.148148,0.708333
7,0.451613,0.5,0.428571,0.472727
8,0.594595,0.424242,0.466667,0.461538
9,0.410256,0.428571,0.068966,0.634921


In [60]:
names.append("xgboost")

In [61]:
# it's a library that we work with plotly
import plotly.offline as py 
py.init_notebook_mode(connected=True) # this code, allow us to work with offline plotly version
import plotly.graph_objs as go # it's like "plt" of matplot
import plotly.tools as tls # It's useful to we get some tools of plotly
import warnings # This library will be used to ignore some warnings
from collections import Counter # To do counter of some features

trace0 = go.Box(
    y= compare_result["LR"].values.tolist(),
    name='LR',
    marker=dict(
        color='#3D9970'
    )
)

trace1 = go.Box(
    y= compare_result["RF"].values.tolist(),
    x= name,
    name='RF',
    marker=dict(
        color='#3D9900'
    )
)

trace2 = go.Box(
    y= compare_result["SVM"].values.tolist(),
    x= name,
    name='SVM',
    marker=dict(
        color='#FF4136'
    )
)
trace3 = go.Box(
    y= compare_result["XGBOOST"].values.tolist(),
    x= name,
    name='XGBOOST',
    marker=dict(
        color='#3D0070'
    )
)
   
data = [trace0,trace1,trace2,trace3]

layout = go.Layout(
    yaxis=dict(
        title='Credit Amount (US Dollar)',
        zeroline=False
    ),
    xaxis=dict(
        title='Age Categorical'
    ),
    boxmode='group'
)
fig = go.Figure(data=data, layout=layout)

py.iplot(fig, filename='box-age-cat')

In [22]:
results

[array([ 0.45      ,  0.58064516,  0.46153846,  0.44444444,  0.39130435,
         0.45714286,  0.43243243,  0.4516129 ,  0.59459459,  0.41025641]),
 array([ 0.42857143,  0.58064516,  0.5       ,  0.46153846,  0.38297872,
         0.48648649,  0.43902439,  0.45714286,  0.66666667,  0.47619048]),
 array([ 0.35      ,  0.32258065,  0.23809524,  0.35294118,  0.31818182,
         0.25806452,  0.25641026,  0.36363636,  0.375     ,  0.40909091]),
 array([ 0.625     ,  0.57777778,  0.35087719,  0.41860465,  0.48979592,
         0.55813953,  0.43478261,  0.40816327,  0.49056604,  0.60714286]),
 array([ 0.60377358,  0.56410256,  0.52      ,  0.54166667,  0.58461538,
         0.35555556,  0.59259259,  0.625     ,  0.54166667,  0.48      ]),
 array([ 0.51282051,  0.25806452,  0.43243243,  0.35      ,  0.20512821,
         0.51612903,  0.5       ,  0.47058824,  0.42105263,  0.27027027]),
 array([ 0.29411765,  0.17391304,  0.24242424,  0.28571429,  0.26315789,
         0.2962963 ,  0.14814815,  0.42

In [None]:
## 实验2

In [18]:
params = {
    "booster":"gbtree",
    "objective":"binary:logistic",
    "eta":0.1,
    "max_depth":10,
    "missing":0,
    "seed":0,
    "silent":1
}

In [19]:
betas = [0.1,0.2,0.3,0.4]

In [20]:
for beta in betas:
    def weightloss(preds,dtrain):
        y = dtrain.get_label()
        p = 1.0 / (1.0 + np.exp(-preds ))
        grad = p * (beta + y - beta*y) - y
        hess = p*(1-p)*(beta + y - beta*y)
        return grad,hess
    bst_weightloss= xgb.train(params,data_train,num_boost_round=2000,evals = watch_list,early_stopping_rounds = 100,
                      obj=weightloss,maximize=False)
    ypred = bst_weightloss.predict(data_test)
    y_pred = (ypred >= 0.5)*1
  
    
    auc_score.append(metrics.roc_auc_score(y_test,ypred))
    acc_score.append(metrics.accuracy_score(y_test,y_pred))
    recall_score.append(metrics.recall_score(y_test,y_pred))
    prec_score.append(metrics.precision_score(y_test,y_pred))
    f1_score.append(metrics.f1_score(y_test,y_pred))
    beta_score.append(fbeta_score(y_test, y_pred, beta=2))

[0]	eval-error:0.472	train-error:0.425333
Multiple eval metrics have been passed: 'train-error' will be used for early stopping.

Will train until train-error hasn't improved in 100 rounds.
[1]	eval-error:0.44	train-error:0.406667
[2]	eval-error:0.464	train-error:0.422667
[3]	eval-error:0.44	train-error:0.406667
[4]	eval-error:0.44	train-error:0.406667
[5]	eval-error:0.452	train-error:0.405333
[6]	eval-error:0.452	train-error:0.405333
[7]	eval-error:0.452	train-error:0.404
[8]	eval-error:0.452	train-error:0.402667
[9]	eval-error:0.46	train-error:0.409333
[10]	eval-error:0.456	train-error:0.408
[11]	eval-error:0.456	train-error:0.409333
[12]	eval-error:0.464	train-error:0.416
[13]	eval-error:0.456	train-error:0.410667
[14]	eval-error:0.464	train-error:0.417333
[15]	eval-error:0.456	train-error:0.414667
[16]	eval-error:0.456	train-error:0.414667
[17]	eval-error:0.464	train-error:0.416
[18]	eval-error:0.456	train-error:0.416
[19]	eval-error:0.464	train-error:0.417333
[20]	eval-error:0.464

[192]	eval-error:0.44	train-error:0.34
[193]	eval-error:0.44	train-error:0.34
[194]	eval-error:0.44	train-error:0.34
[195]	eval-error:0.44	train-error:0.34
[196]	eval-error:0.44	train-error:0.34
[197]	eval-error:0.44	train-error:0.34
[198]	eval-error:0.44	train-error:0.34
[199]	eval-error:0.44	train-error:0.34
[200]	eval-error:0.44	train-error:0.34
[201]	eval-error:0.44	train-error:0.34
[202]	eval-error:0.44	train-error:0.34
[203]	eval-error:0.44	train-error:0.34
[204]	eval-error:0.44	train-error:0.338667
[205]	eval-error:0.44	train-error:0.338667
[206]	eval-error:0.444	train-error:0.338667
[207]	eval-error:0.444	train-error:0.338667
[208]	eval-error:0.444	train-error:0.337333
[209]	eval-error:0.444	train-error:0.337333
[210]	eval-error:0.444	train-error:0.337333
[211]	eval-error:0.444	train-error:0.337333
[212]	eval-error:0.444	train-error:0.337333
[213]	eval-error:0.444	train-error:0.337333
[214]	eval-error:0.444	train-error:0.338667
[215]	eval-error:0.444	train-error:0.338667
[216]	

[118]	eval-error:0.332	train-error:0.201333
[119]	eval-error:0.332	train-error:0.201333
[120]	eval-error:0.328	train-error:0.201333
[121]	eval-error:0.328	train-error:0.201333
[122]	eval-error:0.328	train-error:0.201333
[123]	eval-error:0.328	train-error:0.201333
[124]	eval-error:0.328	train-error:0.202667
[125]	eval-error:0.328	train-error:0.201333
[126]	eval-error:0.328	train-error:0.201333
[127]	eval-error:0.328	train-error:0.201333
[128]	eval-error:0.328	train-error:0.201333
[129]	eval-error:0.328	train-error:0.202667
[130]	eval-error:0.332	train-error:0.201333
[131]	eval-error:0.332	train-error:0.2
[132]	eval-error:0.332	train-error:0.201333
[133]	eval-error:0.332	train-error:0.2
[134]	eval-error:0.332	train-error:0.201333
[135]	eval-error:0.332	train-error:0.201333
[136]	eval-error:0.332	train-error:0.201333
[137]	eval-error:0.332	train-error:0.2
[138]	eval-error:0.332	train-error:0.2
[139]	eval-error:0.332	train-error:0.201333
[140]	eval-error:0.336	train-error:0.202667
[141]	ev

[67]	eval-error:0.308	train-error:0.153333
[68]	eval-error:0.308	train-error:0.156
[69]	eval-error:0.312	train-error:0.154667
[70]	eval-error:0.308	train-error:0.154667
[71]	eval-error:0.312	train-error:0.154667
[72]	eval-error:0.312	train-error:0.153333
[73]	eval-error:0.308	train-error:0.150667
[74]	eval-error:0.312	train-error:0.154667
[75]	eval-error:0.312	train-error:0.157333
[76]	eval-error:0.312	train-error:0.154667
[77]	eval-error:0.312	train-error:0.154667
[78]	eval-error:0.308	train-error:0.154667
[79]	eval-error:0.308	train-error:0.153333
[80]	eval-error:0.308	train-error:0.153333
[81]	eval-error:0.304	train-error:0.154667
[82]	eval-error:0.308	train-error:0.154667
[83]	eval-error:0.308	train-error:0.154667
[84]	eval-error:0.308	train-error:0.154667
[85]	eval-error:0.304	train-error:0.156
[86]	eval-error:0.304	train-error:0.154667
[87]	eval-error:0.312	train-error:0.156
[88]	eval-error:0.312	train-error:0.156
[89]	eval-error:0.304	train-error:0.154667
[90]	eval-error:0.312	t

[259]	eval-error:0.284	train-error:0.136
[260]	eval-error:0.284	train-error:0.136
[261]	eval-error:0.284	train-error:0.136
[262]	eval-error:0.288	train-error:0.136
[263]	eval-error:0.288	train-error:0.136
[264]	eval-error:0.288	train-error:0.136
[265]	eval-error:0.284	train-error:0.133333
[266]	eval-error:0.284	train-error:0.134667
[267]	eval-error:0.288	train-error:0.134667
[268]	eval-error:0.288	train-error:0.134667
[269]	eval-error:0.288	train-error:0.134667
[270]	eval-error:0.288	train-error:0.133333
[271]	eval-error:0.288	train-error:0.136
[272]	eval-error:0.288	train-error:0.136
[273]	eval-error:0.292	train-error:0.136
[274]	eval-error:0.292	train-error:0.136
[275]	eval-error:0.292	train-error:0.137333
[276]	eval-error:0.292	train-error:0.137333
[277]	eval-error:0.288	train-error:0.137333
[278]	eval-error:0.288	train-error:0.137333
[279]	eval-error:0.288	train-error:0.136
[280]	eval-error:0.288	train-error:0.137333
[281]	eval-error:0.292	train-error:0.134667
[282]	eval-error:0.29

[451]	eval-error:0.288	train-error:0.125333
[452]	eval-error:0.288	train-error:0.125333
[453]	eval-error:0.288	train-error:0.125333
[454]	eval-error:0.288	train-error:0.125333
[455]	eval-error:0.288	train-error:0.125333
[456]	eval-error:0.288	train-error:0.125333
[457]	eval-error:0.288	train-error:0.125333
[458]	eval-error:0.288	train-error:0.125333
[459]	eval-error:0.284	train-error:0.125333
[460]	eval-error:0.284	train-error:0.124
[461]	eval-error:0.284	train-error:0.125333
[462]	eval-error:0.284	train-error:0.125333
[463]	eval-error:0.284	train-error:0.125333
[464]	eval-error:0.284	train-error:0.125333
[465]	eval-error:0.28	train-error:0.125333
[466]	eval-error:0.28	train-error:0.125333
[467]	eval-error:0.28	train-error:0.125333
[468]	eval-error:0.28	train-error:0.125333
[469]	eval-error:0.28	train-error:0.125333
[470]	eval-error:0.28	train-error:0.125333
[471]	eval-error:0.28	train-error:0.125333
[472]	eval-error:0.28	train-error:0.125333
[473]	eval-error:0.28	train-error:0.125333


[33]	eval-error:0.288	train-error:0.141333
[34]	eval-error:0.284	train-error:0.138667
[35]	eval-error:0.28	train-error:0.138667
[36]	eval-error:0.28	train-error:0.138667
[37]	eval-error:0.28	train-error:0.137333
[38]	eval-error:0.276	train-error:0.137333
[39]	eval-error:0.28	train-error:0.138667
[40]	eval-error:0.276	train-error:0.137333
[41]	eval-error:0.276	train-error:0.136
[42]	eval-error:0.276	train-error:0.136
[43]	eval-error:0.272	train-error:0.136
[44]	eval-error:0.272	train-error:0.137333
[45]	eval-error:0.272	train-error:0.136
[46]	eval-error:0.272	train-error:0.137333
[47]	eval-error:0.272	train-error:0.137333
[48]	eval-error:0.272	train-error:0.137333
[49]	eval-error:0.272	train-error:0.138667
[50]	eval-error:0.272	train-error:0.138667
[51]	eval-error:0.272	train-error:0.14
[52]	eval-error:0.268	train-error:0.137333
[53]	eval-error:0.268	train-error:0.137333
[54]	eval-error:0.268	train-error:0.137333
[55]	eval-error:0.276	train-error:0.138667
[56]	eval-error:0.276	train-err

[225]	eval-error:0.264	train-error:0.117333
[226]	eval-error:0.264	train-error:0.117333
[227]	eval-error:0.264	train-error:0.117333
[228]	eval-error:0.264	train-error:0.117333
[229]	eval-error:0.264	train-error:0.117333
[230]	eval-error:0.264	train-error:0.117333
[231]	eval-error:0.264	train-error:0.117333
[232]	eval-error:0.264	train-error:0.117333
[233]	eval-error:0.264	train-error:0.117333
[234]	eval-error:0.264	train-error:0.117333
[235]	eval-error:0.264	train-error:0.117333
[236]	eval-error:0.264	train-error:0.117333
[237]	eval-error:0.264	train-error:0.117333
[238]	eval-error:0.264	train-error:0.117333
[239]	eval-error:0.264	train-error:0.117333
[240]	eval-error:0.264	train-error:0.117333
Stopping. Best iteration:
[140]	eval-error:0.28	train-error:0.113333



In [21]:
f1_score

[0.49600000000000005,
 0.51304347826086949,
 0.54444444444444451,
 0.52702702702702708,
 0.51470588235294112]

In [22]:
betas.insert(0,0)

In [23]:
result = pd.DataFrame([auc_score,acc_score,recall_score,prec_score,f1_score,beta_score]).T
result.columns = ["auc_score","acc_score","recall_score","prec_score","f1_score","beta_score"]

In [24]:
result

Unnamed: 0,auc_score,acc_score,recall_score,prec_score,f1_score,beta_score
0,0.731976,0.748,0.430556,0.584906,0.496,0.454545
1,0.713561,0.552,0.819444,0.373418,0.513043,0.661435
2,0.721403,0.672,0.680556,0.453704,0.544444,0.618687
3,0.723237,0.72,0.541667,0.513158,0.527027,0.535714
4,0.709699,0.736,0.486111,0.546875,0.514706,0.497159


In [25]:
# it's a library that we work with plotly
import plotly.offline as py 
py.init_notebook_mode(connected=True) # this code, allow us to work with offline plotly version
import plotly.graph_objs as go # it's like "plt" of matplot
import plotly.tools as tls # It's useful to we get some tools of plotly
import warnings # This library will be used to ignore some warnings
from collections import Counter # To do counter of some features

trace0 = go.Bar(
            x = betas,
            y = auc_score,
            name='auc_score'
    )

trace1 = go.Bar(
            x = betas ,
            y = acc_score,
            name='acc_score'
    )
trace2 = go.Bar(
            x = betas ,
            y = recall_score,
            name='recall_score'
    )
trace3 = go.Bar(
            x = betas ,
            y = prec_score,
            name='prec_score'
    )
trace4 = go.Bar(
            x = betas ,
            y = f1_score,
            name='f1_score'
    )

trace5 = go.Bar(
            x = betas ,
            y = beta_score,
            name='beta_score'
    )
data = [trace0, trace1,trace2,trace3,trace4,trace5]

layout = go.Layout(
    
)

layout = go.Layout(
    yaxis=dict(
        title='Count'
    ),
    xaxis=dict(
        title='Risk Variable'
    ),
    title='Target variable distribution'
)

fig = go.Figure(data=data, layout=layout)

py.iplot(fig, filename='grouped-bar')

In [None]:
result.index= betas

In [None]:
%matplotlib inline
result.plot(kind='bar',figsize=(10,6.18))

In [None]:
import numpy as np
beta = 0.2
def weightloss(preds,dtrain):
    y = dtrain.get_label()
    p = 1.0 / (1.0 + np.exp(-preds ))
    grad = p * (beta + y - beta*y) - y
    hess = p*(1-p)*(beta + y - beta*y)
    return grad,hess
    

In [None]:
params = {
    "booster":"gbtree",
    "objective":"binary:logistic",
    "eta":0.1,
    "max_depth":10,
    "missing":0,
    "seed":0,
    "silent":1
}

In [None]:
bst_weightloss= xgb.train(params,data_train,num_boost_round=2000,evals = watch_list,early_stopping_rounds = 100,
                          obj=weightloss,maximize=False)

In [22]:
import numpy as np
lamda = 0.1
def general_logloss(preds,dtrain):
    labels = dtrain.get_label()
    preds = 1.0 / (1.0 + np.exp(-preds))
    grads = preds - labels + lamda * (preds - obj_series)
    hess = preds * (1.0 - preds) + lamda *(preds *(1.0 -preds))
    return grad,hess