In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

!pip install optuna 

# category variable
category = ['cat0', 'cat1', 'cat2', 'cat3', 'cat4', 'cat5', 'cat6', 'cat7', 'cat8', 'cat9']

# continuous variable
continuous = ['cont0', 'cont1', 'cont2', 'cont3', 'cont4', 'cont5', 'cont6', 
              'cont7', 'cont8', 'cont9', 'cont10', 'cont11', 'cont12', 'cont13']

You should consider upgrading via the '/opt/conda/bin/python3.7 -m pip install --upgrade pip' command.[0m


In [2]:
#features = ['cat1', 'cont0', 'cat2', 'cont11', 'cont13', 'cat8', 'cont8', 'cont1', 'cat9',
#            'cont9', 'cont5', 'cat3', 'cat0', 'cat6', 'cont3', 'cat5', 'cont4', 'cont2', 'cont12']

train_dataset = pd.read_csv('../input/tabular-playground-series-feb-2021/train.csv')
test_data = pd.read_csv('../input/tabular-playground-series-feb-2021/test.csv')
dataset = pd.concat([train_dataset, test_data])

# あきらかな外れ値は削除
# [166042]はあきらかな外れ値
outlier=[166042]
for x in outlier:
    dataset = dataset.loc[dataset['id'] != x, :]

# idとtargetは避難させておく
id = dataset['id']
target = dataset['target']
# 避難させたので遠慮なく削除
dataset = dataset.drop(columns=['id', 'target'])
# 相関が高いものだけを使用
# この手法は有効だったと思うが、Catboostでのスコアは一定以上伸びなかった。

# 重要度を確認するためにはLabelEncodingが有効
from sklearn.preprocessing import LabelEncoder, RobustScaler, StandardScaler
encoder = LabelEncoder()
scaler = RobustScaler()
#scaler = StandardScaler()

for x in dataset.columns:
    # notebookみると、みんなLabelEncoder使っているなぁ
    if dataset[x].dtype == object:
        #dataset[x] = encoder.fit_transform(dataset[x])
        dataset = pd.get_dummies(dataset, columns=[x], drop_first=True)
        
dataset[continuous] = scaler.fit_transform(dataset.loc[:,continuous].values)

# データセットにidとtargetを元に戻して
dataset = pd.concat([id,dataset,target], axis=1)
# targetのあるなしでtrainとtestを分割
train = dataset.loc[dataset['target'].notnull(), :]
test  = dataset.loc[dataset['target'].isnull(), :]

X = train.drop(columns=['id', 'target'])
y = train['target']
X_prediction = test.drop(columns=['id', 'target'])
prediction_id = test.loc[:,'id']

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [None]:
## Lightgbm Optuna

import optuna.integration.lightgbm as lgb

trains = lgb.Dataset(X_train, y_train)
tests = lgb.Dataset(X_test, y_test)

params = {'objective': 'mean_squared_error',
         'metric': 'rmse'}

lgb_model = lgb.train(params, trains, valid_sets=tests, early_stopping_rounds=10)
best_params = lgb_model.params


import lightgbm as lgb

lgb_model = lgb.train(best_params, trains, valid_sets=tests)
y_pred = lgb_model.predict(X_test)

from sklearn.metrics import mean_squared_error
print(mean_squared_error(y_test, y_pred))

In [None]:
from sklearn.model_selection import KFold
import lightgbm as lgb

kf = KFold(n_splits=5,random_state=48,shuffle=True)
rmse=[]  # list contains rmse for each fold
n=0
preds=0

# kf.splitはindexを返すことに注意。データ自体じゃないよ!
for train, test in kf.split(X, y):
    model = lgb.LGBMRegressor(**best_params)
    x_tr = X.iloc[train, :].values
    x_te = X.iloc[test,  :].values
    y_tr = y.iloc[train].values
    y_te = y.iloc[test].values
    
    model.fit(x_tr,y_tr,eval_set=[(x_te,y_te)],early_stopping_rounds=100,verbose=False)
    rmse.append(mean_squared_error(y_te, model.predict(x_te), squared=False))
    preds+=model.predict(X_prediction)
    print(n+1,rmse[n])
    n+=1

y_pred = preds/kf.n_splits
import statistics as stat
print(f'RMSE mean = {stat.mean(rmse)}')
print(f'RMSE = {rmse}')

In [7]:
import lightgbm as lgb
from sklearn.metrics import r2_score, mean_squared_error
import optuna

trains = lgb.Dataset(X_train, y_train)
tests = lgb.Dataset(X_test, y_test)

def objective(trial):
    params = {
        'objective': 'regression',
        'num_leaves': trial.suggest_int('num_leaves', 5, 200),
        'max_depth': trial.suggest_int('max_depth', 4, 10),
        'learning_rate': trial.suggest_loguniform('learning_rate', 0.001, 0.1),
        'n_estimators': trial.suggest_int('n_estimators', 100, 3000),
        'reg_alpha': trial.suggest_loguniform('reg_alpha', 0.001, 0.1),
        'reg_lambda': trial.suggest_loguniform('reg_lambda', 0.001, 0.1),
        'colsample_bytree': trial.suggest_loguniform('colsample_bytree', 0.5, 1),
        'random_state': 42
    }  
    
    
    lgb_model = lgb.LGBMRegressor(**params)
    lgb_model.fit(X=X_train, y=y_train, eval_set = [(X_test, y_test)], eval_metric='rmse',  early_stopping_rounds=10)
    y_pred = lgb_model.predict(X_test)
    
    return np.sqrt(mean_squared_error(y_test, y_pred))

study = optuna.create_study()
study.optimize(objective, n_trials=50, n_jobs=-1)
lgb_best = study.best_params
print(lgb_best)

[32m[I 2021-02-07 00:53:45,230][0m A new study created in memory with name: no-name-1fe06757-d926-4f30-9628-7e9345ff0e88[0m


[1]	valid_0's rmse: 0.883142	valid_0's l2: 0.77994
Training until validation scores don't improve for 10 rounds
[2]	valid_0's rmse: 0.882845	valid_0's l2: 0.779416
[3]	valid_0's rmse: 0.882407	valid_0's l2: 0.778643
[4]	valid_0's rmse: 0.88197	valid_0's l2: 0.777871
[5]	valid_0's rmse: 0.881543	valid_0's l2: 0.777118
[6]	valid_0's rmse: 0.881283	valid_0's l2: 0.776659
[7]	valid_0's rmse: 0.880881	valid_0's l2: 0.775952
[8]	valid_0's rmse: 0.880608	valid_0's l2: 0.77547
[9]	valid_0's rmse: 0.880237	valid_0's l2: 0.774817
[10]	valid_0's rmse: 0.879866	valid_0's l2: 0.774164
[11]	valid_0's rmse: 0.879523	valid_0's l2: 0.773562
[12]	valid_0's rmse: 0.879145	valid_0's l2: 0.772895
[13]	valid_0's rmse: 0.878853	valid_0's l2: 0.772383
[14]	valid_0's rmse: 0.878525	valid_0's l2: 0.771807
[15]	valid_0's rmse: 0.878155	valid_0's l2: 0.771156
[16]	valid_0's rmse: 0.877866	valid_0's l2: 0.770648
[17]	valid_0's rmse: 0.877527	valid_0's l2: 0.770054
[18]	valid_0's rmse: 0.877247	valid_0's l2: 0.7695

[161]	valid_0's rmse: 0.856928	valid_0's l2: 0.734326
[162]	valid_0's rmse: 0.856865	valid_0's l2: 0.734217
[163]	valid_0's rmse: 0.85681	valid_0's l2: 0.734123
[164]	valid_0's rmse: 0.856746	valid_0's l2: 0.734013
[165]	valid_0's rmse: 0.856675	valid_0's l2: 0.733892
[166]	valid_0's rmse: 0.856609	valid_0's l2: 0.733779
[167]	valid_0's rmse: 0.856549	valid_0's l2: 0.733677
[168]	valid_0's rmse: 0.856486	valid_0's l2: 0.733568
[169]	valid_0's rmse: 0.856422	valid_0's l2: 0.733459
[170]	valid_0's rmse: 0.856368	valid_0's l2: 0.733366
[171]	valid_0's rmse: 0.85632	valid_0's l2: 0.733284
[172]	valid_0's rmse: 0.856259	valid_0's l2: 0.73318
[173]	valid_0's rmse: 0.856203	valid_0's l2: 0.733084
[174]	valid_0's rmse: 0.856151	valid_0's l2: 0.732995
[175]	valid_0's rmse: 0.8561	valid_0's l2: 0.732907
[176]	valid_0's rmse: 0.856035	valid_0's l2: 0.732796
[177]	valid_0's rmse: 0.855978	valid_0's l2: 0.732699
[178]	valid_0's rmse: 0.855925	valid_0's l2: 0.732608
[179]	valid_0's rmse: 0.855869	va

[320]	valid_0's rmse: 0.850579	valid_0's l2: 0.723485
[321]	valid_0's rmse: 0.850547	valid_0's l2: 0.72343
[322]	valid_0's rmse: 0.850526	valid_0's l2: 0.723394
[323]	valid_0's rmse: 0.850505	valid_0's l2: 0.723358
[324]	valid_0's rmse: 0.85048	valid_0's l2: 0.723317
[325]	valid_0's rmse: 0.850459	valid_0's l2: 0.72328
[326]	valid_0's rmse: 0.850433	valid_0's l2: 0.723237
[327]	valid_0's rmse: 0.850413	valid_0's l2: 0.723202
[328]	valid_0's rmse: 0.850383	valid_0's l2: 0.723152
[329]	valid_0's rmse: 0.850358	valid_0's l2: 0.723109
[330]	valid_0's rmse: 0.850324	valid_0's l2: 0.723051
[331]	valid_0's rmse: 0.850298	valid_0's l2: 0.723006
[332]	valid_0's rmse: 0.85027	valid_0's l2: 0.722959
[333]	valid_0's rmse: 0.850245	valid_0's l2: 0.722917
[334]	valid_0's rmse: 0.850224	valid_0's l2: 0.722882
[335]	valid_0's rmse: 0.850209	valid_0's l2: 0.722856
[336]	valid_0's rmse: 0.850187	valid_0's l2: 0.722818
[337]	valid_0's rmse: 0.85016	valid_0's l2: 0.722771
[338]	valid_0's rmse: 0.850143	va

[477]	valid_0's rmse: 0.847547	valid_0's l2: 0.718336
[478]	valid_0's rmse: 0.847523	valid_0's l2: 0.718295
[479]	valid_0's rmse: 0.847508	valid_0's l2: 0.71827
[480]	valid_0's rmse: 0.8475	valid_0's l2: 0.718257
[481]	valid_0's rmse: 0.847491	valid_0's l2: 0.718241
[482]	valid_0's rmse: 0.847481	valid_0's l2: 0.718224
[483]	valid_0's rmse: 0.847465	valid_0's l2: 0.718197
[484]	valid_0's rmse: 0.84745	valid_0's l2: 0.718172
[485]	valid_0's rmse: 0.847431	valid_0's l2: 0.718139
[486]	valid_0's rmse: 0.847417	valid_0's l2: 0.718115
[487]	valid_0's rmse: 0.847405	valid_0's l2: 0.718095
[488]	valid_0's rmse: 0.847398	valid_0's l2: 0.718083
[489]	valid_0's rmse: 0.847383	valid_0's l2: 0.718059
[490]	valid_0's rmse: 0.847368	valid_0's l2: 0.718032
[491]	valid_0's rmse: 0.847348	valid_0's l2: 0.717998
[492]	valid_0's rmse: 0.847337	valid_0's l2: 0.71798
[493]	valid_0's rmse: 0.847322	valid_0's l2: 0.717955
[494]	valid_0's rmse: 0.847315	valid_0's l2: 0.717943
[495]	valid_0's rmse: 0.847306	va

[638]	valid_0's rmse: 0.845676	valid_0's l2: 0.715168
[639]	valid_0's rmse: 0.84567	valid_0's l2: 0.715157
[640]	valid_0's rmse: 0.845659	valid_0's l2: 0.715139
[641]	valid_0's rmse: 0.845635	valid_0's l2: 0.715099
[642]	valid_0's rmse: 0.845624	valid_0's l2: 0.71508
[643]	valid_0's rmse: 0.845617	valid_0's l2: 0.715068
[644]	valid_0's rmse: 0.845611	valid_0's l2: 0.715058
[645]	valid_0's rmse: 0.845606	valid_0's l2: 0.71505
[646]	valid_0's rmse: 0.845587	valid_0's l2: 0.715017
[647]	valid_0's rmse: 0.845578	valid_0's l2: 0.715003
[648]	valid_0's rmse: 0.845571	valid_0's l2: 0.714991
[649]	valid_0's rmse: 0.845563	valid_0's l2: 0.714977
[650]	valid_0's rmse: 0.845559	valid_0's l2: 0.71497
[651]	valid_0's rmse: 0.845549	valid_0's l2: 0.714954
[652]	valid_0's rmse: 0.845541	valid_0's l2: 0.71494
[653]	valid_0's rmse: 0.845521	valid_0's l2: 0.714906
[654]	valid_0's rmse: 0.84551	valid_0's l2: 0.714887
[655]	valid_0's rmse: 0.845501	valid_0's l2: 0.714873
[656]	valid_0's rmse: 0.845496	val

[796]	valid_0's rmse: 0.844355	valid_0's l2: 0.712936
[797]	valid_0's rmse: 0.84435	valid_0's l2: 0.712927
[798]	valid_0's rmse: 0.844346	valid_0's l2: 0.71292
[799]	valid_0's rmse: 0.844338	valid_0's l2: 0.712906
[800]	valid_0's rmse: 0.844328	valid_0's l2: 0.71289
[801]	valid_0's rmse: 0.844325	valid_0's l2: 0.712885
[802]	valid_0's rmse: 0.844314	valid_0's l2: 0.712866
[803]	valid_0's rmse: 0.844309	valid_0's l2: 0.712857
[804]	valid_0's rmse: 0.844302	valid_0's l2: 0.712846
[805]	valid_0's rmse: 0.844295	valid_0's l2: 0.712834
[806]	valid_0's rmse: 0.844286	valid_0's l2: 0.712818
[807]	valid_0's rmse: 0.844282	valid_0's l2: 0.712812
[808]	valid_0's rmse: 0.844276	valid_0's l2: 0.712801
[809]	valid_0's rmse: 0.84427	valid_0's l2: 0.712793
[810]	valid_0's rmse: 0.844265	valid_0's l2: 0.712783
[811]	valid_0's rmse: 0.844262	valid_0's l2: 0.712778
[812]	valid_0's rmse: 0.844254	valid_0's l2: 0.712764
[813]	valid_0's rmse: 0.844247	valid_0's l2: 0.712753
[814]	valid_0's rmse: 0.844238	v

[949]	valid_0's rmse: 0.843423	valid_0's l2: 0.711362
[950]	valid_0's rmse: 0.843419	valid_0's l2: 0.711355
[951]	valid_0's rmse: 0.843414	valid_0's l2: 0.711348
[952]	valid_0's rmse: 0.843413	valid_0's l2: 0.711345
[953]	valid_0's rmse: 0.843407	valid_0's l2: 0.711335
[954]	valid_0's rmse: 0.843405	valid_0's l2: 0.711331
[955]	valid_0's rmse: 0.843395	valid_0's l2: 0.711316
[956]	valid_0's rmse: 0.843389	valid_0's l2: 0.711306
[957]	valid_0's rmse: 0.843388	valid_0's l2: 0.711303
[958]	valid_0's rmse: 0.843385	valid_0's l2: 0.711299
[959]	valid_0's rmse: 0.84338	valid_0's l2: 0.71129
[960]	valid_0's rmse: 0.843373	valid_0's l2: 0.711278
[961]	valid_0's rmse: 0.843369	valid_0's l2: 0.711271
[962]	valid_0's rmse: 0.843366	valid_0's l2: 0.711267
[963]	valid_0's rmse: 0.843361	valid_0's l2: 0.711257
[964]	valid_0's rmse: 0.843355	valid_0's l2: 0.711248
[965]	valid_0's rmse: 0.843354	valid_0's l2: 0.711246
[966]	valid_0's rmse: 0.843348	valid_0's l2: 0.711235
[967]	valid_0's rmse: 0.843343

[1106]	valid_0's rmse: 0.842776	valid_0's l2: 0.710272
[1107]	valid_0's rmse: 0.842774	valid_0's l2: 0.710268
[1108]	valid_0's rmse: 0.842774	valid_0's l2: 0.710268
[1109]	valid_0's rmse: 0.842769	valid_0's l2: 0.71026
[1110]	valid_0's rmse: 0.842758	valid_0's l2: 0.71024
[1111]	valid_0's rmse: 0.842756	valid_0's l2: 0.710237
[1112]	valid_0's rmse: 0.842752	valid_0's l2: 0.710231
[1113]	valid_0's rmse: 0.842748	valid_0's l2: 0.710224
[1114]	valid_0's rmse: 0.842746	valid_0's l2: 0.710221
[1115]	valid_0's rmse: 0.842741	valid_0's l2: 0.710213
[1116]	valid_0's rmse: 0.842739	valid_0's l2: 0.710209
[1117]	valid_0's rmse: 0.842734	valid_0's l2: 0.710201
[1118]	valid_0's rmse: 0.842732	valid_0's l2: 0.710198
[1119]	valid_0's rmse: 0.842731	valid_0's l2: 0.710195
[1120]	valid_0's rmse: 0.842726	valid_0's l2: 0.710188
[1121]	valid_0's rmse: 0.842719	valid_0's l2: 0.710175
[1122]	valid_0's rmse: 0.842716	valid_0's l2: 0.71017
[1123]	valid_0's rmse: 0.842702	valid_0's l2: 0.710147
[1124]	valid_

[1257]	valid_0's rmse: 0.842345	valid_0's l2: 0.709545
[1258]	valid_0's rmse: 0.842343	valid_0's l2: 0.709542
[1259]	valid_0's rmse: 0.842342	valid_0's l2: 0.70954
[1260]	valid_0's rmse: 0.842341	valid_0's l2: 0.709539
[1261]	valid_0's rmse: 0.842339	valid_0's l2: 0.709536
[1262]	valid_0's rmse: 0.842338	valid_0's l2: 0.709534
[1263]	valid_0's rmse: 0.842336	valid_0's l2: 0.709529
[1264]	valid_0's rmse: 0.842334	valid_0's l2: 0.709527
[1265]	valid_0's rmse: 0.842331	valid_0's l2: 0.709522
[1266]	valid_0's rmse: 0.842327	valid_0's l2: 0.709515
[1267]	valid_0's rmse: 0.842323	valid_0's l2: 0.709508
[1268]	valid_0's rmse: 0.842321	valid_0's l2: 0.709504
[1269]	valid_0's rmse: 0.842317	valid_0's l2: 0.709499
[1270]	valid_0's rmse: 0.842309	valid_0's l2: 0.709485
[1271]	valid_0's rmse: 0.842307	valid_0's l2: 0.709481
[1272]	valid_0's rmse: 0.842306	valid_0's l2: 0.709479
[1273]	valid_0's rmse: 0.842302	valid_0's l2: 0.709472
[1274]	valid_0's rmse: 0.842304	valid_0's l2: 0.709476
[1275]	vali

[1413]	valid_0's rmse: 0.842031	valid_0's l2: 0.709017
[1414]	valid_0's rmse: 0.842027	valid_0's l2: 0.70901
[1415]	valid_0's rmse: 0.842029	valid_0's l2: 0.709012
[1416]	valid_0's rmse: 0.842026	valid_0's l2: 0.709008
[1417]	valid_0's rmse: 0.842023	valid_0's l2: 0.709003
[1418]	valid_0's rmse: 0.842021	valid_0's l2: 0.708999
[1419]	valid_0's rmse: 0.842017	valid_0's l2: 0.708993
[1420]	valid_0's rmse: 0.842014	valid_0's l2: 0.708988
[1421]	valid_0's rmse: 0.842014	valid_0's l2: 0.708987
[1422]	valid_0's rmse: 0.842014	valid_0's l2: 0.708988
[1423]	valid_0's rmse: 0.842011	valid_0's l2: 0.708982
[1424]	valid_0's rmse: 0.84201	valid_0's l2: 0.70898
[1425]	valid_0's rmse: 0.842004	valid_0's l2: 0.708971
[1426]	valid_0's rmse: 0.842001	valid_0's l2: 0.708966
[1427]	valid_0's rmse: 0.841996	valid_0's l2: 0.708957
[1428]	valid_0's rmse: 0.841991	valid_0's l2: 0.708949
[1429]	valid_0's rmse: 0.841988	valid_0's l2: 0.708944
[1430]	valid_0's rmse: 0.841987	valid_0's l2: 0.708942
[1431]	valid_

[1563]	valid_0's rmse: 0.841784	valid_0's l2: 0.708601
[1564]	valid_0's rmse: 0.841783	valid_0's l2: 0.708598
[1565]	valid_0's rmse: 0.841779	valid_0's l2: 0.708591
[1566]	valid_0's rmse: 0.841778	valid_0's l2: 0.708591
[1567]	valid_0's rmse: 0.841773	valid_0's l2: 0.708581
[1568]	valid_0's rmse: 0.841772	valid_0's l2: 0.70858
[1569]	valid_0's rmse: 0.841771	valid_0's l2: 0.708579
[1570]	valid_0's rmse: 0.841768	valid_0's l2: 0.708574
[1571]	valid_0's rmse: 0.841769	valid_0's l2: 0.708575
[1572]	valid_0's rmse: 0.841768	valid_0's l2: 0.708574
[1573]	valid_0's rmse: 0.841767	valid_0's l2: 0.708572
[1574]	valid_0's rmse: 0.841763	valid_0's l2: 0.708564
[1575]	valid_0's rmse: 0.84176	valid_0's l2: 0.708559
[1576]	valid_0's rmse: 0.841755	valid_0's l2: 0.708551
[1577]	valid_0's rmse: 0.841752	valid_0's l2: 0.708546
[1578]	valid_0's rmse: 0.841751	valid_0's l2: 0.708545
[1579]	valid_0's rmse: 0.841749	valid_0's l2: 0.708542
[1580]	valid_0's rmse: 0.841746	valid_0's l2: 0.708536
[1581]	valid

[1720]	valid_0's rmse: 0.841604	valid_0's l2: 0.708297
[1721]	valid_0's rmse: 0.841603	valid_0's l2: 0.708295
[1722]	valid_0's rmse: 0.841603	valid_0's l2: 0.708296
[1723]	valid_0's rmse: 0.841603	valid_0's l2: 0.708295
[1724]	valid_0's rmse: 0.841596	valid_0's l2: 0.708284
[1725]	valid_0's rmse: 0.841594	valid_0's l2: 0.70828
[1726]	valid_0's rmse: 0.841591	valid_0's l2: 0.708276
[1727]	valid_0's rmse: 0.841592	valid_0's l2: 0.708277
[1728]	valid_0's rmse: 0.841594	valid_0's l2: 0.70828
[1729]	valid_0's rmse: 0.841589	valid_0's l2: 0.708273
[1730]	valid_0's rmse: 0.841587	valid_0's l2: 0.708269
[1731]	valid_0's rmse: 0.841584	valid_0's l2: 0.708263
[1732]	valid_0's rmse: 0.841579	valid_0's l2: 0.708255
[1733]	valid_0's rmse: 0.841577	valid_0's l2: 0.708252
[1734]	valid_0's rmse: 0.841575	valid_0's l2: 0.708248
[1735]	valid_0's rmse: 0.841576	valid_0's l2: 0.708251
[1736]	valid_0's rmse: 0.841578	valid_0's l2: 0.708253
[1737]	valid_0's rmse: 0.841577	valid_0's l2: 0.708252
[1738]	valid

[1871]	valid_0's rmse: 0.841458	valid_0's l2: 0.708051
[1872]	valid_0's rmse: 0.841455	valid_0's l2: 0.708047
[1873]	valid_0's rmse: 0.841454	valid_0's l2: 0.708044
[1874]	valid_0's rmse: 0.841455	valid_0's l2: 0.708046
[1875]	valid_0's rmse: 0.841454	valid_0's l2: 0.708045
[1876]	valid_0's rmse: 0.841453	valid_0's l2: 0.708044
[1877]	valid_0's rmse: 0.841453	valid_0's l2: 0.708043
[1878]	valid_0's rmse: 0.841454	valid_0's l2: 0.708045
[1879]	valid_0's rmse: 0.841456	valid_0's l2: 0.708049
[1880]	valid_0's rmse: 0.841457	valid_0's l2: 0.70805
[1881]	valid_0's rmse: 0.841457	valid_0's l2: 0.70805
[1882]	valid_0's rmse: 0.841458	valid_0's l2: 0.708052
[1883]	valid_0's rmse: 0.841457	valid_0's l2: 0.70805
[1884]	valid_0's rmse: 0.841457	valid_0's l2: 0.708049
[1885]	valid_0's rmse: 0.841455	valid_0's l2: 0.708047
[1886]	valid_0's rmse: 0.841455	valid_0's l2: 0.708046
[1887]	valid_0's rmse: 0.841456	valid_0's l2: 0.708048
Early stopping, best iteration is:
[1877]	valid_0's rmse: 0.841453	v

[32m[I 2021-02-07 00:54:46,236][0m Trial 0 finished with value: 0.8414530380594016 and parameters: {'num_leaves': 66, 'max_depth': 6, 'learning_rate': 0.012190298323401755, 'n_estimators': 2439, 'reg_alpha': 0.004096274174939093, 'reg_lambda': 0.002375343433382987, 'colsample_bytree': 0.7655501600151997}. Best is trial 0 with value: 0.8414530380594016.[0m


{'num_leaves': 66, 'max_depth': 6, 'learning_rate': 0.012190298323401755, 'n_estimators': 2439, 'reg_alpha': 0.004096274174939093, 'reg_lambda': 0.002375343433382987, 'colsample_bytree': 0.7655501600151997}


In [13]:
import xgboost as xgb
from sklearn.metrics import r2_score, mean_squared_error
import optuna

# XGBoost Parameters — xgboost 1.4.0-SNAPSHOT documentation https://xgboost.readthedocs.io/en/latest/parameter.html

def objective(trial):
    params = {
        'objective': 'reg:squarederror',
        'eval_metric': 'rmse',
        'tree_method': 'gpu_hist',
        'eta': trial.suggest_loguniform('eta', 0.1, 1.0),
        'gamma': trial.suggest_loguniform('gamma', 0.001, 5.),
        'max_depth': trial.suggest_int('max_depth', 4, 10),
        'sub_sample': trial.suggest_loguniform('sub_sample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_loguniform('colsample_bytree', 0.5, 1.0),
        'lambda': trial.suggest_loguniform('lambda', 0.001, 0.01),
        'alpha': trial.suggest_loguniform('alpha', 0.001, 0.01),
        'learning_rate': trial.suggest_loguniform('learning_rate', 0.0001, 0.1),
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000)
    }  
    
    xgb_model = xgb.XGBRegressor(**params)
    xgb_model.fit(X_train, y_train, eval_set = [(X_test, y_test)],  early_stopping_rounds=10)
    y_pred = xgb_model.predict(X_test)
    
    return np.sqrt(mean_squared_error(y_test, y_pred))

study = optuna.create_study()
study.optimize(objective, n_trials=50, n_jobs=-1)
xgb_best = study.best_params
print(xgb_best)

[32m[I 2021-02-07 00:57:54,091][0m A new study created in memory with name: no-name-d797930f-9cf0-40ff-8a41-9cc6ea04e1c5[0m


[0]	validation_0-rmse:6.99276
[1]	validation_0-rmse:6.97746
[2]	validation_0-rmse:6.96220
[3]	validation_0-rmse:6.94698
[4]	validation_0-rmse:6.93179
[5]	validation_0-rmse:6.91663
[6]	validation_0-rmse:6.90151
[7]	validation_0-rmse:6.88643
[8]	validation_0-rmse:6.87137
[9]	validation_0-rmse:6.85635
[10]	validation_0-rmse:6.84137
[11]	validation_0-rmse:6.82641
[12]	validation_0-rmse:6.81150
[13]	validation_0-rmse:6.79661
[14]	validation_0-rmse:6.78176
[15]	validation_0-rmse:6.76694
[16]	validation_0-rmse:6.75216
[17]	validation_0-rmse:6.73741
[18]	validation_0-rmse:6.72269
[19]	validation_0-rmse:6.70801
[20]	validation_0-rmse:6.69336
[21]	validation_0-rmse:6.67874
[22]	validation_0-rmse:6.66416
[23]	validation_0-rmse:6.64961
[24]	validation_0-rmse:6.63509
[25]	validation_0-rmse:6.62060
[26]	validation_0-rmse:6.60615
[27]	validation_0-rmse:6.59173
[28]	validation_0-rmse:6.57734
[29]	validation_0-rmse:6.56298
[30]	validation_0-rmse:6.54866
[31]	validation_0-rmse:6.53437
[32]	validation_0-

[260]	validation_0-rmse:3.98670
[261]	validation_0-rmse:3.97825
[262]	validation_0-rmse:3.96982
[263]	validation_0-rmse:3.96142
[264]	validation_0-rmse:3.95303
[265]	validation_0-rmse:3.94466
[266]	validation_0-rmse:3.93632
[267]	validation_0-rmse:3.92799
[268]	validation_0-rmse:3.91968
[269]	validation_0-rmse:3.91139
[270]	validation_0-rmse:3.90312
[271]	validation_0-rmse:3.89487
[272]	validation_0-rmse:3.88664
[273]	validation_0-rmse:3.87843
[274]	validation_0-rmse:3.87023
[275]	validation_0-rmse:3.86206
[276]	validation_0-rmse:3.85390
[277]	validation_0-rmse:3.84577
[278]	validation_0-rmse:3.83765
[279]	validation_0-rmse:3.82955
[280]	validation_0-rmse:3.82147
[281]	validation_0-rmse:3.81341
[282]	validation_0-rmse:3.80537
[283]	validation_0-rmse:3.79735
[284]	validation_0-rmse:3.78934
[285]	validation_0-rmse:3.78136
[286]	validation_0-rmse:3.77339
[287]	validation_0-rmse:3.76544
[288]	validation_0-rmse:3.75751
[289]	validation_0-rmse:3.74960
[290]	validation_0-rmse:3.74171
[291]	va

[517]	validation_0-rmse:2.35935
[518]	validation_0-rmse:2.35480
[519]	validation_0-rmse:2.35026
[520]	validation_0-rmse:2.34572
[521]	validation_0-rmse:2.34121
[522]	validation_0-rmse:2.33670
[523]	validation_0-rmse:2.33220
[524]	validation_0-rmse:2.32772
[525]	validation_0-rmse:2.32324
[526]	validation_0-rmse:2.31878
[527]	validation_0-rmse:2.31433
[528]	validation_0-rmse:2.30989
[529]	validation_0-rmse:2.30546
[530]	validation_0-rmse:2.30104
[531]	validation_0-rmse:2.29663
[532]	validation_0-rmse:2.29224
[533]	validation_0-rmse:2.28785
[534]	validation_0-rmse:2.28348
[535]	validation_0-rmse:2.27912
[536]	validation_0-rmse:2.27476
[537]	validation_0-rmse:2.27042
[538]	validation_0-rmse:2.26609
[539]	validation_0-rmse:2.26177
[540]	validation_0-rmse:2.25746
[541]	validation_0-rmse:2.25317
[542]	validation_0-rmse:2.24888
[543]	validation_0-rmse:2.24461
[544]	validation_0-rmse:2.24034
[545]	validation_0-rmse:2.23609
[546]	validation_0-rmse:2.23185
[547]	validation_0-rmse:2.22762
[548]	va

[774]	validation_0-rmse:1.50676
[775]	validation_0-rmse:1.50448
[776]	validation_0-rmse:1.50221
[777]	validation_0-rmse:1.49995
[778]	validation_0-rmse:1.49770
[779]	validation_0-rmse:1.49545
[780]	validation_0-rmse:1.49321
[781]	validation_0-rmse:1.49097
[782]	validation_0-rmse:1.48874
[783]	validation_0-rmse:1.48652
[784]	validation_0-rmse:1.48430
[785]	validation_0-rmse:1.48210
[786]	validation_0-rmse:1.47989
[787]	validation_0-rmse:1.47770
[788]	validation_0-rmse:1.47551
[789]	validation_0-rmse:1.47332
[790]	validation_0-rmse:1.47115
[791]	validation_0-rmse:1.46898
[792]	validation_0-rmse:1.46681
[793]	validation_0-rmse:1.46466


[32m[I 2021-02-07 00:58:02,284][0m Trial 0 finished with value: 1.4646548373270436 and parameters: {'eta': 0.20102298083783426, 'gamma': 0.005716990925643276, 'max_depth': 6, 'sub_sample': 0.6219730920308847, 'colsample_bytree': 0.9754618517365117, 'lambda': 0.0014723269810255602, 'alpha': 0.0012374454206099512, 'learning_rate': 0.0022195678551731415, 'n_estimators': 794}. Best is trial 0 with value: 1.4646548373270436.[0m


{'eta': 0.20102298083783426, 'gamma': 0.005716990925643276, 'max_depth': 6, 'sub_sample': 0.6219730920308847, 'colsample_bytree': 0.9754618517365117, 'lambda': 0.0014723269810255602, 'alpha': 0.0012374454206099512, 'learning_rate': 0.0022195678551731415, 'n_estimators': 794}


In [None]:
## CatBoost Optuna
from catboost import CatBoostRegressor
from catboost import Pool
from sklearn.metrics import r2_score, mean_squared_error
import optuna

cat_train = Pool(X_train, y_train)
cat_test = Pool(X_test, y_test)

def objective(trial):
    params = {
        'iterations': trial.suggest_int('iterations', 100, 500),
        'depth': trial.suggest_int('depth', 4, 10),
        'learning_rate': trial.suggest_loguniform('learning_rate', 0.001, 0.1),
        'l2_leaf_reg': trial.suggest_int('l2_leaf', 1,10),
        'eval_metric': trial.suggest_categorical('eval_metric', ['RMSE']),
        'random_strength': trial.suggest_int('random_strength', 0, 100),
        'bagging_temperature': trial.suggest_loguniform('bagging_temperature', 0.01, 100.00),
        'od_type': trial.suggest_categorical('od_type', ['IncToDec', 'Iter']),
        'od_wait': trial.suggest_int('od_wait', 10, 50),
        'use_best_model': True
    }  
    
    model = CatBoostRegressor(**params)
    model.fit(cat_train, eval_set = cat_test)
    y_pred = model.predict(cat_test)
    
    return np.sqrt(mean_squared_error(y_test, y_pred))

study = optuna.create_study()
study.optimize(objective, n_trials=20, n_jobs=-1)
cat_best = study.best_params
print(cat_best)

In [None]:
from sklearn.linear_model import LinearRegression
reg = LinearRegression()


In [None]:
import tensorflow as tf
from tensorflow import keras

def rmse(y_true, y_pred):
    return tf.sqrt(tf.losses.mean_squared_error(y_true, y_pred))

### ***********************************************************###
model = tf.keras.Sequential()

#model.add(tf.keras.layers.Dense(1024, activation='relu', input_shape=(X.shape[1],)))
#model.add(tf.keras.layers.LeakyReLU())
#model.add(tf.keras.layers.BatchNormalization())
#model.add(tf.keras.layers.Dropout(0.5))

#model.add(tf.keras.layers.Dense(512, activation='relu'))
model.add(tf.keras.layers.Dense(64, kernel_regularizer=keras.regularizers.l2(0.001), activation='relu',input_shape=(X.shape[1],)))
model.add(tf.keras.layers.LeakyReLU())
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dropout(0.4))

#model.add(tf.keras.layers.Dense(512, activation='relu'))
model.add(tf.keras.layers.Dense(64, kernel_regularizer=keras.regularizers.l2(0.001), activation='relu'))
model.add(tf.keras.layers.LeakyReLU())
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dropout(0.4))

#model.add(tf.keras.layers.Dense(units=256, activation='relu'))
model.add(tf.keras.layers.Dense(32, kernel_regularizer=keras.regularizers.l2(0.001), activation='relu'))
model.add(tf.keras.layers.LeakyReLU())
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dropout(0.3))

#model.add(tf.keras.layers.Dense(units=256, activation='relu'))
model.add(tf.keras.layers.Dense(32, kernel_regularizer=keras.regularizers.l2(0.001), activation='relu'))
model.add(tf.keras.layers.LeakyReLU())
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dropout(0.3))

#model.add(tf.keras.layers.Dense(units=128, activation='relu'))
model.add(tf.keras.layers.Dense(16, kernel_regularizer=keras.regularizers.l2(0.001), activation='relu'))
model.add(tf.keras.layers.LeakyReLU())

model.add(tf.keras.layers.Dense(units=1, activation='linear'))
### ***********************************************************###

optimizer = tf.keras.optimizers.Adam(lr=0.005, decay=5e-4)
model.compile(optimizer = optimizer, loss = 'mae', metrics = ['mse', 'mae'])

#checkpoint_name = 'Model/{epoch:03d}-{val_loss:.5f}.hdf5'
checkpoint_name = 'DNN_BestModel.hdf5'
checkpoint = tf.keras.callbacks.ModelCheckpoint(checkpoint_name, monitor='val_loss', verbose=1, save_best_only=True, mode='auto')
callback_list = [checkpoint]

history = model.fit(X_train, y_train, validation_split=0.2, epochs = 500, batch_size = 1024,
                    validation_data=(X_test, y_test), callbacks=callback_list)
y_pred = model.predict(X_test).reshape(-1)
RMSE = rmse(y_test, y_pred)
print(f'RMSE = {RMSE}')

fig = plt.figure(figsize=(8,5))
ax = fig.add_subplot(111)
ax.set_ylim(0.6, 0.75)

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.legend(['Train', 'Test'], loc='upper right')
plt.savefig(fname='1024 neurons enable LearningRate.png')

In [None]:
# CatBoosting
'''
from catboost import CatBoostRegressor
from sklearn.metrics import r2_score, mean_squared_error

from sklearn.model_selection import StratifiedKFold, KFold
kfold = KFold(n_splits=5, shuffle=True, random_state=42)

from sklearn.model_selection import GridSearchCV
param_cat = {'depth':[6,8,10],
            'learning_rate':[0.005, 0.001],
            'l2_leaf_reg':[1,4,9],
            'iterations':[100],
            'cat_features':[feature_cat],
            'eval_metric':['RMSE']
            }

grid_result = GridSearchCV(estimator=CatBoostRegressor(),param_grid=param_cat, cv=kfold, scoring='neg_mean_squared_error', n_jobs = -1, verbose=2)
grid_result.fit(X_train, y_train)
grid_param = grid_result.best_params_
print(grid_param)


cat = CatBoostRegressor(task_type='GPU', iterations=8000, use_best_model=True, depth=10, eval_metric='RMSE', l2_leaf_reg=1, learning_rate=0.001, early_stopping_rounds=10)
cat.fit(X_train, y_train, cat_features=feature_cat, eval_set = (X_test, y_test))
y_pred = cat.predict(X_test)
print(f'RMSE: {mean_squared_error(y_test, y_pred)}')


'''

In [None]:
#y_pred=regressor.predict(X_prediction)
output = pd.DataFrame({'id': prediction_id, 'target': y_pred})
output.to_csv('my_submission.csv', index=False)
print("Your submission was successfully saved!")