In [46]:
import os
import warnings
import pandas as pd
import numpy as np
import tensorflow as tf

from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
import lightgbm as lgb

import optuna

warnings.filterwarnings('ignore')

In [44]:
random_state=42
np.random.seed(42)

In [2]:
train_path = ('./data/FIFA_train.csv')
test_path = ('./data/FIFA_test.csv')
submission_path = ('./data/submission.csv')

In [3]:
train = pd.read_csv(train_path)
test = pd.read_csv(test_path)
submission = pd.read_csv(submission_path)

In [4]:
train.head()

Unnamed: 0,id,name,age,continent,contract_until,position,prefer_foot,reputation,stat_overall,stat_potential,stat_skill_moves,value
0,0,L. Messi,31,south america,2021,ST,left,5.0,94,94,4.0,110500000.0
1,3,De Gea,27,europe,2020,GK,right,4.0,91,93,1.0,72000000.0
2,7,L. Suárez,31,south america,2021,ST,right,5.0,91,91,3.0,80000000.0
3,8,Sergio Ramos,32,europe,2020,DF,right,4.0,91,91,3.0,51000000.0
4,9,J. Oblak,25,europe,2021,GK,right,3.0,90,93,1.0,68000000.0


In [5]:
test.head()

Unnamed: 0,id,name,age,continent,contract_until,position,prefer_foot,reputation,stat_overall,stat_potential,stat_skill_moves
0,1,Cristiano Ronaldo,33,europe,2022,ST,right,5.0,94,94,5.0
1,2,Neymar Jr,26,south america,2022,ST,right,5.0,92,93,5.0
2,4,K. De Bruyne,27,europe,2023,MF,right,4.0,91,92,4.0
3,5,E. Hazard,27,europe,2020,ST,right,4.0,91,91,4.0
4,6,L. Modrić,32,europe,2020,MF,right,4.0,91,91,4.0


In [6]:
submission.head()

Unnamed: 0,id,value
0,1,0
1,2,0
2,4,0
3,5,0
4,6,0


In [7]:
# column의 차이를 비교할 때
set(train.columns) - set(test.columns)

{'value'}

In [8]:
y_train = train['value']

In [9]:
# train, test 병합
data = pd.concat([train.drop('value', axis=1), test]).reset_index(drop=True)

In [10]:
print(train.shape)
print(test.shape)
print(data.shape)

(8932, 12)
(3828, 11)
(12760, 11)


In [11]:
data.tail()

Unnamed: 0,id,name,age,continent,contract_until,position,prefer_foot,reputation,stat_overall,stat_potential,stat_skill_moves
12755,16924,R. Takae,20,asia,2021,MF,right,1.0,48,63,2.0
12756,16929,L. Wahlstedt,18,europe,2018,GK,right,1.0,48,65,1.0
12757,16932,Y. Góez,18,south america,2021,MF,right,1.0,48,65,2.0
12758,16937,A. Kaltner,18,europe,2020,ST,right,1.0,47,61,2.0
12759,16943,K. Fujikawa,19,asia,2021,MF,right,1.0,47,61,2.0


In [12]:
# id와 name은 필요없기 때문에 제거
data = data.drop(["id", "name"], axis=1)
data.head()

Unnamed: 0,age,continent,contract_until,position,prefer_foot,reputation,stat_overall,stat_potential,stat_skill_moves
0,31,south america,2021,ST,left,5.0,94,94,4.0
1,27,europe,2020,GK,right,4.0,91,93,1.0
2,31,south america,2021,ST,right,5.0,91,91,3.0
3,32,europe,2020,DF,right,4.0,91,91,3.0
4,25,europe,2021,GK,right,3.0,90,93,1.0


In [13]:
# null값이 있는지 확인
data.isnull().sum()

age                 0
continent           0
contract_until      0
position            0
prefer_foot         0
reputation          0
stat_overall        0
stat_potential      0
stat_skill_moves    0
dtype: int64

In [14]:
# column 별 unique 값 갯수 확인
for col in data.columns:
    print(f"column : {col}")
    print(f"The number of unique : {data[col].nunique()}")
    print()

column : age
The number of unique : 26

column : continent
The number of unique : 5

column : contract_until
The number of unique : 19

column : position
The number of unique : 4

column : prefer_foot
The number of unique : 2

column : reputation
The number of unique : 5

column : stat_overall
The number of unique : 47

column : stat_potential
The number of unique : 46

column : stat_skill_moves
The number of unique : 5



In [15]:
# column 별 unique 값 확인
for col in data.columns:
    print(f"column : {col}")
    print(f"unique : {data[col].unique()}")
    print()

column : age
unique : [31 27 32 25 26 29 33 30 40 24 28 34 23 22 35 36 21 18 19 37 20 39 17 38
 16 42]

column : continent
unique : ['south america' 'europe' 'africa' 'asia' 'oceania']

column : contract_until
unique : ['2021' '2020' '2019' '2023' '2022' '2024' 'Jun 30, 2019' '2026'
 'Dec 31, 2018' '2018' '2025' 'Jun 30, 2020' 'May 31, 2020' 'May 31, 2019'
 'Jan 31, 2019' 'Jan 1, 2019' 'Jan 12, 2019' 'Dec 31, 2019' 'Jun 1, 2019']

column : position
unique : ['ST' 'GK' 'DF' 'MF']

column : prefer_foot
unique : ['left' 'right']

column : reputation
unique : [5. 4. 3. 1. 2.]

column : stat_overall
unique : [94 91 90 89 88 87 86 85 84 83 82 81 80 79 78 77 76 75 74 73 72 71 70 69
 68 67 66 65 64 63 62 61 60 59 58 57 56 55 54 53 52 51 50 49 48 47 92]

column : stat_potential
unique : [94 93 91 90 92 89 88 87 86 85 84 83 82 81 80 79 78 77 76 75 74 73 72 71
 70 69 68 67 66 65 64 63 62 61 60 59 58 57 56 55 54 53 52 50 48 95]

column : stat_skill_moves
unique : [4. 1. 3. 2. 5.]



In [16]:
# 계약기간 연도로 통일
data['contract_until'] = data['contract_until'].map(lambda x: x[-4:])
data['contract_until'].unique()

array(['2021', '2020', '2019', '2023', '2022', '2024', '2026', '2018',
       '2025'], dtype=object)

In [17]:
# Categorical data One-Hot Encoding, 데이터 타입은 string 타입이어야 한다.
data = pd.get_dummies(data, columns=['contract_until', 'continent', 'position', 'prefer_foot'])

In [18]:
data.shape

(12760, 25)

In [19]:
data.head()

Unnamed: 0,age,reputation,stat_overall,stat_potential,stat_skill_moves,contract_until_2018,contract_until_2019,contract_until_2020,contract_until_2021,contract_until_2022,...,continent_asia,continent_europe,continent_oceania,continent_south america,position_DF,position_GK,position_MF,position_ST,prefer_foot_left,prefer_foot_right
0,31,5.0,94,94,4.0,0,0,0,1,0,...,0,0,0,1,0,0,0,1,1,0
1,27,4.0,91,93,1.0,0,0,1,0,0,...,0,1,0,0,0,1,0,0,0,1
2,31,5.0,91,91,3.0,0,0,0,1,0,...,0,0,0,1,0,0,0,1,0,1
3,32,4.0,91,91,3.0,0,0,1,0,0,...,0,1,0,0,1,0,0,0,0,1
4,25,3.0,90,93,1.0,0,0,0,1,0,...,0,1,0,0,0,1,0,0,0,1


In [20]:
X_train = data[:len(train)]
X_test = data[len(train):].reset_index(drop=True)

In [21]:
print(X_train.shape)
print(X_test.shape)

(8932, 25)
(3828, 25)


In [22]:
X_train.tail()

Unnamed: 0,age,reputation,stat_overall,stat_potential,stat_skill_moves,contract_until_2018,contract_until_2019,contract_until_2020,contract_until_2021,contract_until_2022,...,continent_asia,continent_europe,continent_oceania,continent_south america,position_DF,position_GK,position_MF,position_ST,prefer_foot_left,prefer_foot_right
8927,18,1.0,48,63,3.0,0,1,0,0,0,...,0,0,0,0,0,0,1,0,0,1
8928,19,1.0,47,59,2.0,0,0,1,0,0,...,0,1,0,0,1,0,0,0,0,1
8929,18,1.0,47,64,2.0,0,0,0,1,0,...,0,0,0,1,1,0,0,0,0,1
8930,18,1.0,47,65,1.0,0,0,0,1,0,...,0,1,0,0,0,1,0,0,0,1
8931,19,1.0,47,63,2.0,0,0,1,0,0,...,0,1,0,0,0,0,0,1,0,1


In [23]:
X_test.tail()

Unnamed: 0,age,reputation,stat_overall,stat_potential,stat_skill_moves,contract_until_2018,contract_until_2019,contract_until_2020,contract_until_2021,contract_until_2022,...,continent_asia,continent_europe,continent_oceania,continent_south america,position_DF,position_GK,position_MF,position_ST,prefer_foot_left,prefer_foot_right
3823,20,1.0,48,63,2.0,0,0,0,1,0,...,1,0,0,0,0,0,1,0,0,1
3824,18,1.0,48,65,1.0,1,0,0,0,0,...,0,1,0,0,0,1,0,0,0,1
3825,18,1.0,48,65,2.0,0,0,0,1,0,...,0,0,0,1,0,0,1,0,0,1
3826,18,1.0,47,61,2.0,0,0,1,0,0,...,0,1,0,0,0,0,0,1,0,1
3827,19,1.0,47,61,2.0,0,0,0,1,0,...,1,0,0,0,0,0,1,0,0,1


In [24]:
params= {
    "n_estimators": 300,
    "random_state": 42,
}

In [25]:
rf = RandomForestRegressor(**params)

In [26]:
# log scaling
y_train = np.log1p(y_train)

In [27]:
rf.fit(X_train, y_train)

RandomForestRegressor(n_estimators=300, random_state=42)

In [28]:
pred = rf.predict(X_test)

In [29]:
pred = np.expm1(pred)

In [30]:
submission['value'] = pred

In [31]:
submission

Unnamed: 0,id,value
0,1,5.588749e+07
1,2,7.823982e+07
2,4,6.710032e+07
3,5,7.032970e+07
4,6,6.163380e+07
...,...,...
3823,16924,5.945552e+04
3824,16929,5.065921e+04
3825,16932,5.949694e+04
3826,16937,4.487307e+04


In [32]:
# submission 파일로 저장
submission.to_csv("./data/submission_baseline_rf.csv", index=False)

In [33]:
pd.read_csv('./data/submission_baseline_rf.csv')

Unnamed: 0,id,value
0,1,5.588749e+07
1,2,7.823982e+07
2,4,6.710032e+07
3,5,7.032970e+07
4,6,6.163380e+07
...,...,...
3823,16924,5.945552e+04
3824,16929,5.065921e+04
3825,16932,5.949694e+04
3826,16937,4.487307e+04


In [59]:
def objective(trial):

    train_x, test_x, train_y, test_y = train_test_split(X_train, y_train, random_state=random_state, test_size=0.1)
    dtrain = lgb.Dataset(train_x, label=train_y)
    dval = lgb.Dataset(test_x, label=test_y)
 
    params = {
        'objective': 'regression',
        'metric': 'rmse',
        'force_row_wise': True,
        'random_state': random_state,
        'early_stopping_round': 50,
        'verbose': -1,
        'tree_learner': 'voting',
        'boosting': 'gbdt',
        'learning_rate': trial.suggest_uniform('learning_rate', 0.01, 0.1),
        'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 1.0),
        'lambda_l2': trial.suggest_loguniform('lambda_l2', 1e-8, 1.0),
        'num_leaves': trial.suggest_int('num_leaves', 31, 511),
        'feature_fraction': trial.suggest_uniform('feature_fraction', 0.3, 0.7),
        'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.7, 1.0),
        'bagging_freq': trial.suggest_int('bagging_freq', 1, 11),
        'min_child_samples': trial.suggest_int('min_child_samples', 2, 64),
    }
 
    gbm = lgb.train(params, dtrain, 3000, dval, verbose_eval=1000)
    preds = gbm.predict(test_x)
    accuracy = np.sqrt(mean_squared_error(test_y, preds))
    
    return accuracy

In [60]:
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=10)

print('Best Value:', study.best_value)
print('Best trial:', study.best_trial.params)

[32m[I 2021-04-13 02:02:28,111][0m A new study created in memory with name: no-name-b9f3fb62-a7fe-4687-9143-3a0c665f873a[0m


Training until validation scores don't improve for 50 rounds
[1000]	valid_0's rmse: 0.132558
[2000]	valid_0's rmse: 0.115269


[32m[I 2021-04-13 02:02:30,104][0m Trial 0 finished with value: 0.10699216047253297 and parameters: {'learning_rate': 0.012724433776858394, 'lambda_l1': 0.8332516197508926, 'lambda_l2': 1.2954761860456232e-06, 'num_leaves': 89, 'feature_fraction': 0.31347294995432146, 'bagging_fraction': 0.7244975487098971, 'bagging_freq': 6, 'min_child_samples': 44}. Best is trial 0 with value: 0.10699216047253297.[0m


[3000]	valid_0's rmse: 0.106992
Did not meet early stopping. Best iteration is:
[3000]	valid_0's rmse: 0.106992
Training until validation scores don't improve for 50 rounds
[1000]	valid_0's rmse: 0.0888509
[2000]	valid_0's rmse: 0.0833754
[3000]	valid_0's rmse: 0.0806926
Did not meet early stopping. Best iteration is:
[3000]	valid_0's rmse: 0.0806926


[32m[I 2021-04-13 02:02:34,597][0m Trial 1 finished with value: 0.08069256707391334 and parameters: {'learning_rate': 0.03742429236641126, 'lambda_l1': 0.04491269149745811, 'lambda_l2': 0.009385838035626636, 'num_leaves': 259, 'feature_fraction': 0.6596266066999787, 'bagging_fraction': 0.8372874909375961, 'bagging_freq': 3, 'min_child_samples': 59}. Best is trial 1 with value: 0.08069256707391334.[0m


Training until validation scores don't improve for 50 rounds
[1000]	valid_0's rmse: 0.0975698
[2000]	valid_0's rmse: 0.0888612
[3000]	valid_0's rmse: 0.0848616
Did not meet early stopping. Best iteration is:
[2999]	valid_0's rmse: 0.0848613


[32m[I 2021-04-13 02:02:40,898][0m Trial 2 finished with value: 0.08486125946512804 and parameters: {'learning_rate': 0.02003492754548486, 'lambda_l1': 0.011523743445165439, 'lambda_l2': 0.09780427551413329, 'num_leaves': 254, 'feature_fraction': 0.5084937659319694, 'bagging_fraction': 0.8999295927477391, 'bagging_freq': 11, 'min_child_samples': 36}. Best is trial 1 with value: 0.08069256707391334.[0m


Training until validation scores don't improve for 50 rounds
[1000]	valid_0's rmse: 0.0872328
Early stopping, best iteration is:
[1080]	valid_0's rmse: 0.0870648


[32m[I 2021-04-13 02:02:44,668][0m Trial 3 finished with value: 0.08706477687664033 and parameters: {'learning_rate': 0.07980904804896945, 'lambda_l1': 0.0009992597688732404, 'lambda_l2': 1.1511685181518396e-08, 'num_leaves': 239, 'feature_fraction': 0.43913670347734524, 'bagging_fraction': 0.9319454115977162, 'bagging_freq': 9, 'min_child_samples': 18}. Best is trial 1 with value: 0.08069256707391334.[0m


Training until validation scores don't improve for 50 rounds
[1000]	valid_0's rmse: 0.0883105


[32m[I 2021-04-13 02:02:47,291][0m Trial 4 finished with value: 0.0853262555101639 and parameters: {'learning_rate': 0.0605678920642094, 'lambda_l1': 9.260472724323685e-06, 'lambda_l2': 2.4100830582189533e-05, 'num_leaves': 386, 'feature_fraction': 0.4937543438039733, 'bagging_fraction': 0.7535194139752937, 'bagging_freq': 9, 'min_child_samples': 47}. Best is trial 1 with value: 0.08069256707391334.[0m


Early stopping, best iteration is:
[1450]	valid_0's rmse: 0.0853263
Training until validation scores don't improve for 50 rounds
[1000]	valid_0's rmse: 0.0796745


[32m[I 2021-04-13 02:02:48,556][0m Trial 5 finished with value: 0.07700753170529034 and parameters: {'learning_rate': 0.08541312967823016, 'lambda_l1': 0.020334465841254947, 'lambda_l2': 6.383494611641645e-07, 'num_leaves': 37, 'feature_fraction': 0.43628098424195805, 'bagging_fraction': 0.8927153923985023, 'bagging_freq': 9, 'min_child_samples': 31}. Best is trial 5 with value: 0.07700753170529034.[0m


Early stopping, best iteration is:
[1369]	valid_0's rmse: 0.0770075
Training until validation scores don't improve for 50 rounds
[1000]	valid_0's rmse: 0.108226
[2000]	valid_0's rmse: 0.0966466
[3000]	valid_0's rmse: 0.0914894
Did not meet early stopping. Best iteration is:
[2990]	valid_0's rmse: 0.091476


[32m[I 2021-04-13 02:02:51,706][0m Trial 6 finished with value: 0.09147598164257606 and parameters: {'learning_rate': 0.03235377048980083, 'lambda_l1': 0.053366564912103356, 'lambda_l2': 6.270697123806469e-08, 'num_leaves': 52, 'feature_fraction': 0.4669796768675324, 'bagging_fraction': 0.7836586743385469, 'bagging_freq': 1, 'min_child_samples': 62}. Best is trial 5 with value: 0.07700753170529034.[0m


Training until validation scores don't improve for 50 rounds
[1000]	valid_0's rmse: 0.109684
[2000]	valid_0's rmse: 0.0966556
[3000]	valid_0's rmse: 0.0914321
Did not meet early stopping. Best iteration is:
[3000]	valid_0's rmse: 0.0914321


[32m[I 2021-04-13 02:02:56,781][0m Trial 7 finished with value: 0.09143210863307748 and parameters: {'learning_rate': 0.022077054908958005, 'lambda_l1': 5.550903303351942e-07, 'lambda_l2': 7.043567830176033e-07, 'num_leaves': 248, 'feature_fraction': 0.3436106042066707, 'bagging_fraction': 0.9043064779460739, 'bagging_freq': 4, 'min_child_samples': 50}. Best is trial 5 with value: 0.07700753170529034.[0m


Training until validation scores don't improve for 50 rounds


[32m[I 2021-04-13 02:02:58,425][0m Trial 8 finished with value: 0.07538703959640199 and parameters: {'learning_rate': 0.04808421560828722, 'lambda_l1': 0.05070055956614441, 'lambda_l2': 0.00027153927115777636, 'num_leaves': 101, 'feature_fraction': 0.6033947797902528, 'bagging_fraction': 0.9388348649625382, 'bagging_freq': 10, 'min_child_samples': 9}. Best is trial 8 with value: 0.07538703959640199.[0m


Early stopping, best iteration is:
[707]	valid_0's rmse: 0.075387
Training until validation scores don't improve for 50 rounds
[1000]	valid_0's rmse: 0.102701
[2000]	valid_0's rmse: 0.0929153
[3000]	valid_0's rmse: 0.0888456
Did not meet early stopping. Best iteration is:
[2999]	valid_0's rmse: 0.0888411


[32m[I 2021-04-13 02:03:03,986][0m Trial 9 finished with value: 0.08884110844337308 and parameters: {'learning_rate': 0.025274283454176375, 'lambda_l1': 5.806482314683408e-05, 'lambda_l2': 1.1474247549055463e-05, 'num_leaves': 511, 'feature_fraction': 0.4610485426894455, 'bagging_fraction': 0.8587187883348603, 'bagging_freq': 3, 'min_child_samples': 59}. Best is trial 8 with value: 0.07538703959640199.[0m


Best Value: 0.07538703959640199
Best trial: {'learning_rate': 0.04808421560828722, 'lambda_l1': 0.05070055956614441, 'lambda_l2': 0.00027153927115777636, 'num_leaves': 101, 'feature_fraction': 0.6033947797902528, 'bagging_fraction': 0.9388348649625382, 'bagging_freq': 10, 'min_child_samples': 9}


In [61]:
study.trials_dataframe().sort_values(['value']).head(20)

Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_bagging_fraction,params_bagging_freq,params_feature_fraction,params_lambda_l1,params_lambda_l2,params_learning_rate,params_min_child_samples,params_num_leaves,state
8,8,0.075387,2021-04-13 02:02:56.782655,2021-04-13 02:02:58.424748,0 days 00:00:01.642093,0.938835,10,0.603395,0.05070056,0.0002715393,0.048084,9,101,COMPLETE
5,5,0.077008,2021-04-13 02:02:47.292626,2021-04-13 02:02:48.555624,0 days 00:00:01.262998,0.892715,9,0.436281,0.02033447,6.383495e-07,0.085413,31,37,COMPLETE
1,1,0.080693,2021-04-13 02:02:30.105626,2021-04-13 02:02:34.596624,0 days 00:00:04.490998,0.837287,3,0.659627,0.04491269,0.009385838,0.037424,59,259,COMPLETE
2,2,0.084861,2021-04-13 02:02:34.597623,2021-04-13 02:02:40.897624,0 days 00:00:06.300001,0.89993,11,0.508494,0.01152374,0.09780428,0.020035,36,254,COMPLETE
4,4,0.085326,2021-04-13 02:02:44.669623,2021-04-13 02:02:47.290626,0 days 00:00:02.621003,0.753519,9,0.493754,9.260473e-06,2.410083e-05,0.060568,47,386,COMPLETE
3,3,0.087065,2021-04-13 02:02:40.898625,2021-04-13 02:02:44.668623,0 days 00:00:03.769998,0.931945,9,0.439137,0.0009992598,1.151169e-08,0.079809,18,239,COMPLETE
9,9,0.088841,2021-04-13 02:02:58.425748,2021-04-13 02:03:03.986405,0 days 00:00:05.560657,0.858719,3,0.461049,5.806482e-05,1.147425e-05,0.025274,59,511,COMPLETE
7,7,0.091432,2021-04-13 02:02:51.707623,2021-04-13 02:02:56.781656,0 days 00:00:05.074033,0.904306,4,0.343611,5.550903e-07,7.043568e-07,0.022077,50,248,COMPLETE
6,6,0.091476,2021-04-13 02:02:48.556624,2021-04-13 02:02:51.706625,0 days 00:00:03.150001,0.783659,1,0.46698,0.05336656,6.270697e-08,0.032354,62,52,COMPLETE
0,0,0.106992,2021-04-13 02:02:28.112628,2021-04-13 02:02:30.104623,0 days 00:00:01.991995,0.724498,6,0.313473,0.8332516,1.295476e-06,0.012724,44,89,COMPLETE


In [62]:
study.best_params

{'learning_rate': 0.04808421560828722,
 'lambda_l1': 0.05070055956614441,
 'lambda_l2': 0.00027153927115777636,
 'num_leaves': 101,
 'feature_fraction': 0.6033947797902528,
 'bagging_fraction': 0.9388348649625382,
 'bagging_freq': 10,
 'min_child_samples': 9}

In [63]:
study.best_value

0.07538703959640199

In [64]:
params = study.best_params

In [65]:
params

{'learning_rate': 0.04808421560828722,
 'lambda_l1': 0.05070055956614441,
 'lambda_l2': 0.00027153927115777636,
 'num_leaves': 101,
 'feature_fraction': 0.6033947797902528,
 'bagging_fraction': 0.9388348649625382,
 'bagging_freq': 10,
 'min_child_samples': 9}

In [67]:
lgbm = lgb.LGBMRegressor(**params, n_estimators=3000)

In [69]:
lgbm.fit(X_train, y_train)



LGBMRegressor(bagging_fraction=0.9388348649625382, bagging_freq=10,
              feature_fraction=0.6033947797902528,
              lambda_l1=0.05070055956614441, lambda_l2=0.00027153927115777636,
              learning_rate=0.04808421560828722, min_child_samples=9,
              n_estimators=3000, num_leaves=101)

In [71]:
pred = lgbm.predict(X_test)

In [73]:
pred = np.expm1(pred)

In [75]:
submission['value'] = pred

In [76]:
submission

Unnamed: 0,id,value
0,1,4.445015e+07
1,2,8.604490e+07
2,4,7.829531e+07
3,5,8.089341e+07
4,6,6.279681e+07
...,...,...
3823,16924,5.950746e+04
3824,16929,5.025932e+04
3825,16932,5.791141e+04
3826,16937,5.123261e+04


In [77]:
# submission 파일로 저장
submission.to_csv("./data/submission_lightgbm.csv", index=False)