In [155]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from xgboost import XGBRegressor
from sklearn.svm import SVR
from sklearn.multioutput import RegressorChain
from sklearn.multioutput import MultiOutputRegressor
from sklearn.model_selection import KFold
from sklearn.linear_model import ElasticNet
import tensorflow.keras as keras
import tensorflow as tf

In [156]:
path = "D:/LG_Radar"
train = pd.read_csv(path+"/train.csv")
test = pd.read_csv(path+"/test.csv")
submission = pd.read_csv(path+"/sample_submission.csv")

In [157]:
def lg_nrmse(gt, preds):
    # 각 Y Feature별 NRMSE 총합
    # Y_01 ~ Y_08 까지 20% 가중치 부여
    all_nrmse = []
    for idx in range(14): # ignore 'ID'
        rmse = mean_squared_error(gt[:,idx], preds[:,idx], squared=False)
        nrmse = rmse/np.mean(np.abs(gt[:,idx]))
        all_nrmse.append(nrmse)
    score = 1.2 * np.sum(all_nrmse[:8]) + 1.0 * np.sum(all_nrmse[8:14])
    print(all_nrmse)
    return score

In [158]:
train

Unnamed: 0,ID,X_01,X_02,X_03,X_04,X_05,X_06,X_07,X_08,X_09,...,Y_05,Y_06,Y_07,Y_08,Y_09,Y_10,Y_11,Y_12,Y_13,Y_14
0,TRAIN_00001,70.544,103.320,67.47,1,101.892,74.983,29.45,62.38,245.71,...,29.632,16.083,4.276,-25.381,-25.529,-22.769,23.792,-25.470,-25.409,-25.304
1,TRAIN_00002,69.524,103.321,65.17,1,101.944,72.943,28.73,61.23,233.61,...,33.179,16.736,3.229,-26.619,-26.523,-22.574,24.691,-26.253,-26.497,-26.438
2,TRAIN_00003,72.583,103.320,64.07,1,103.153,72.943,28.81,105.77,272.20,...,31.801,17.080,2.839,-26.238,-26.216,-22.169,24.649,-26.285,-26.215,-26.370
3,TRAIN_00004,71.563,103.320,67.57,1,101.971,77.022,28.92,115.21,255.36,...,34.503,17.143,3.144,-25.426,-25.079,-21.765,24.913,-25.254,-25.021,-25.345
4,TRAIN_00005,69.524,103.320,63.57,1,101.981,70.904,29.68,103.38,241.46,...,32.602,17.569,3.138,-25.376,-25.242,-21.072,25.299,-25.072,-25.195,-24.974
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39602,TRAIN_39603,66.465,103.320,62.27,1,103.150,66.825,30.20,77.83,298.05,...,29.194,16.582,3.410,-26.486,-26.581,-22.772,24.261,-26.491,-26.584,-26.580
39603,TRAIN_39604,66.465,103.321,62.77,1,102.021,66.825,29.21,102.25,270.67,...,29.859,15.659,3.406,-27.308,-27.203,-24.674,23.427,-27.250,-27.334,-27.325
39604,TRAIN_39605,68.504,103.320,64.67,1,103.144,68.864,29.96,102.61,198.07,...,24.720,16.823,3.215,-26.502,-26.687,-22.577,24.301,-26.388,-26.425,-26.601
39605,TRAIN_39606,66.465,103.320,63.67,1,102.025,67.845,30.30,112.60,275.52,...,26.412,15.757,4.216,-26.760,-26.634,-24.066,23.305,-26.536,-26.751,-26.635


In [159]:
train.columns

Index(['ID', 'X_01', 'X_02', 'X_03', 'X_04', 'X_05', 'X_06', 'X_07', 'X_08',
       'X_09', 'X_10', 'X_11', 'X_12', 'X_13', 'X_14', 'X_15', 'X_16', 'X_17',
       'X_18', 'X_19', 'X_20', 'X_21', 'X_22', 'X_23', 'X_24', 'X_25', 'X_26',
       'X_27', 'X_28', 'X_29', 'X_30', 'X_31', 'X_32', 'X_33', 'X_34', 'X_35',
       'X_36', 'X_37', 'X_38', 'X_39', 'X_40', 'X_41', 'X_42', 'X_43', 'X_44',
       'X_45', 'X_46', 'X_47', 'X_48', 'X_49', 'X_50', 'X_51', 'X_52', 'X_53',
       'X_54', 'X_55', 'X_56', 'Y_01', 'Y_02', 'Y_03', 'Y_04', 'Y_05', 'Y_06',
       'Y_07', 'Y_08', 'Y_09', 'Y_10', 'Y_11', 'Y_12', 'Y_13', 'Y_14'],
      dtype='object')

In [160]:
X = np.array(train[list(train.columns[1:57])])
y = np.array(train[list(train.columns[57:])])

In [161]:
X_train, X_test, y_train, y_test = train_test_split(X, np.array(y), random_state=42)

In [162]:
scaler = StandardScaler()
scaler.fit(X_train)
X_train_scaled, X_test_scaled = scaler.transform(X_train), scaler.transform(X_test) 

# Make base models

각각의 모델의 에러를 가중치로 평균내어 예측한다.

In [43]:
models_list=[]
rf_model = RandomForestRegressor()
rf_model.fit(X_train, y_train)
print(lg_nrmse(y_train, rf_model.predict(X_train)))
print(lg_nrmse(y_test, rf_model.predict(X_test)))

[0.09670271964051644, 0.1345721632517782, 0.13186672359605406, 0.07192073482021387, 0.029781159088212893, 0.03930302386084485, 0.04861211090853905, 0.009036646958175666, 0.008965430882527782, 0.014479332867538401, 0.012568602167582417, 0.009024673411063831, 0.008997954369851504, 0.009004457764971593]
0.7371947900127376
[0.25766138905404445, 0.35886539520721955, 0.3505850821448126, 0.1875901595996728, 0.08019460711328945, 0.11834099431345166, 0.12938286333961457, 0.02375178956958381, 0.023517120082524347, 0.03924488944474016, 0.03335460764072608, 0.023655385261588856, 0.023577889904899667, 0.023706885184489487]
1.974703513928995


In [10]:
scaler = StandardScaler()
scaler.fit(X_train)
X_train_scaled, X_test_scaled = scaler.transform(X_train), scaler.transform(X_test) 
svm_model = MultiOutputRegressor(SVR())
svm_model.fit(X_train_scaled, y_train)
print(lg_nrmse(y_test, svm_model.predict(X_test_scaled)))

In [25]:
xgb_model = MultiOutputRegressor(XGBRegressor(max_depth=5, learning_rate=0.1, alpha=4, gamma=1, reg_lambda=7))
xgb_model.fit(X_train, y_train)
print(lg_nrmse(y_train, xgb_model.predict(X_train)))
print(lg_nrmse(y_test, xgb_model.predict(X_test)))

[0.2523191488237303, 0.3471716072995733, 0.34291181589665465, 0.17924742818676148, 0.07528309089874936, 0.0861067120607161, 0.12551482815256845, 0.023022719935640458, 0.022867724240378702, 0.03635485902932705, 0.03195408792574978, 0.022998638186077818, 0.022940338792546144, 0.02299854675715869]
1.878007016436511
[0.2561240472193856, 0.35727586271824735, 0.3479654264452222, 0.18822887493532348, 0.08036300316087812, 0.11843145716290114, 0.12995804292448906, 0.023770709108455572, 0.02353078587312592, 0.03925235171604544, 0.033328623048510524, 0.02364746084163248, 0.023614672274852578, 0.02370478280264867]
1.9696195849666984


# Make k-fold model list

## Random Forest 

In [163]:
def prediction(models):
    pred=[]
    for i in range(5):
        pred.append(models[i].predict(X_test))
    return pred

In [164]:
def prediction_scaled(models):
    pred=[]
    for i in range(5):
        pred.append(models[i].predict(X_test_scaled))
    return pred

In [188]:
kf = KFold(n_splits=5)
rf_list=[]
for train_index, test_index in kf.split(X_train):
    X_train_, X_test_ = X_train[train_index], X_train[test_index]
    y_train_, y_test_ = y_train[train_index], y_train[test_index]
    rf_model = RandomForestRegressor()
    rf_model.fit(X_train_, y_train_)
    print("Train: ", lg_nrmse(y_train_, rf_model.predict(X_train_)))
    print("Test: ",lg_nrmse(y_test_, rf_model.predict(X_test_)))
    rf_list.append(rf_model)

[0.09711345383913565, 0.13494851921814457, 0.13213970731400418, 0.07004882595550954, 0.029761169192260333, 0.04092875869913585, 0.04851139852424882, 0.008976826567268064, 0.008889307287162819, 0.014601051388266464, 0.012548639788612943, 0.008946563985608221, 0.008941940606451246, 0.00895561310374235]
Train:  0.7377975073314924
[0.2569866804208162, 0.3578086701477715, 0.351529788508981, 0.19016105121085475, 0.08062259832910071, 0.10436382023304734, 0.13077898270630167, 0.024444033079800902, 0.0241961915413168, 0.03875044055934521, 0.03403738038418211, 0.024345289625690446, 0.024283430181934375, 0.024317099125741343]
Test:  1.9659645809822188
[0.09677630592561719, 0.1343313744486164, 0.1314299422039107, 0.07031942724984508, 0.02993624274031539, 0.04107744229881379, 0.04872202926411644, 0.009032698419000324, 0.008950524857914482, 0.01459489128951228, 0.012571139032946415, 0.008999879508704038, 0.008986067823331789, 0.009005771018591886]
Train:  0.7370588285912832
[0.2559911470400626, 0.36

In [189]:
pred_1 = prediction(rf_list)
pred_1 = np.sum(pred_1, axis=0)/5

In [190]:
lg_nrmse(y_test, pred_1)

[0.2575272647375289, 0.3568267555551742, 0.3492567690636384, 0.19818914733020532, 0.07960847842557174, 0.10603936876278743, 0.1293288357743404, 0.02399925184489039, 0.02390722832869722, 0.03841129692110332, 0.033229850271742904, 0.02405595630958492, 0.023916455829717315, 0.023941167932194866]


1.9683930013860047

## XGBoost 

In [237]:
X.shape

(39607, 56)

In [191]:
kf = KFold(n_splits=5)
xgb_list=[]

for train_index, test_index in kf.split(X_train):
    X_train_, X_test_ = X_train[train_index], X_train[test_index]
    y_train_, y_test_ = y_train[train_index], y_train[test_index]
    xgb_model = MultiOutputRegressor(XGBRegressor(max_depth=5, learning_rate=0.1, alpha=3, gamma=1, reg_lambda=5)) 
    xgb_model.fit(X_train_, y_train_)
    print("Train: ", lg_nrmse(y_train_, xgb_model.predict(X_train_)))
    print("Test: ",lg_nrmse(y_test_, xgb_model.predict(X_test_)))
    xgb_list.append(xgb_model)

[0.25039825023777484, 0.3459121614462364, 0.34331208889305864, 0.17397916250215978, 0.07409821228605683, 0.08076762099009527, 0.12474184122326001, 0.022587352507464457, 0.022363473861795536, 0.035973113247755484, 0.031510378303881176, 0.022531517572114225, 0.022450198573860446, 0.022573819846319784]
Train:  1.8563585295090541
[0.25566753942814296, 0.3565125224718744, 0.3495452697757995, 0.19056332935001005, 0.0804079465569588, 0.10460429961784706, 0.13099940872386875, 0.02447906808004786, 0.024276160559778957, 0.038911403462723745, 0.034005915694117914, 0.024432091239806088, 0.024365504307862274, 0.02444203534233309]
Test:  1.9617683714120815
[0.25197629470619487, 0.34721417909880026, 0.34261395698250857, 0.17560653368758636, 0.07486242488575161, 0.07851058634916143, 0.12416402340488528, 0.02272535738196857, 0.02257934908360841, 0.036113151622959846, 0.03162148602196337, 0.0227034576220021, 0.02267187419576066, 0.022752911672707227]
Train:  1.8596502580152299
[0.2528307421208037, 0.358

In [192]:
pred_2 = prediction(xgb_list)
pred_2 = np.sum(pred_2, axis=0)/5

In [193]:
lg_nrmse(y_test, pred_2)

[0.2558822529743317, 0.3549029217754941, 0.34790117737876264, 0.1991030318408405, 0.07967861321862739, 0.1047342048211401, 0.129434573235479, 0.024024011510679597, 0.02392021506458476, 0.0383312251962411, 0.0332238238543668, 0.02404897247172401, 0.023917766018484818, 0.023969489114200358]


1.962204435826028

## Linear model 

In [194]:
from sklearn.linear_model import HuberRegressor
from sklearn.linear_model import BayesianRidge

In [222]:
kf = KFold(n_splits=5)
linear_list=[]

for train_index, test_index in kf.split(X_train_scaled):
    X_train_, X_test_ = X_train_scaled[train_index], X_train_scaled[test_index]
    y_train_, y_test_ = y_train[train_index], y_train[test_index]
    linear_model = MultiOutputRegressor(HuberRegressor(epsilon=1.5)) 
    linear_model.fit(X_train_, y_train_)
    print("Train: ", lg_nrmse(y_train_, linear_model.predict(X_train_)))
    print("Test: ",lg_nrmse(y_test_, linear_model.predict(X_test_)))
    linear_list.append(linear_model)

[0.258716824485236, 0.3594733162038395, 0.351889366126246, 0.19014780118749172, 0.08017248675241707, 0.11492686741096554, 0.13072414224985154, 0.024278651978358406, 0.024043685395963497, 0.039960143841755676, 0.03376594676653666, 0.02418996266965008, 0.024181933162410536, 0.024221899724928893]
Train:  1.9827589192345323
[0.25786288933791024, 0.3583531405712616, 0.3517367680118784, 0.19418695982268896, 0.08148726390497141, 0.1138267325939211, 0.13196808516286337, 0.025033464168910304, 0.024789326810417945, 0.040434976047898245, 0.03459282434770728, 0.024952382320682788, 0.024858423865327856, 0.02491982768088713]
Test:  1.9918941253622076
[0.2592493082384143, 0.3586615926517625, 0.3517627011928164, 0.19140304726394153, 0.08058207231285346, 0.11718584523537144, 0.1311710759931446, 0.02448155985902052, 0.024237964230008326, 0.040260823962386505, 0.03394163909120841, 0.024408839065083246, 0.02436235743763121, 0.024399592475278634]
Train:  1.9890078595583862
[0.25559875504063334, 0.361453480

In [223]:
pred_3 = prediction_scaled(linear_list)
pred_3 = np.sum(pred_3, axis=0)/5

In [224]:
lg_nrmse(y_test, pred_3)

[0.2588878186300962, 0.35830850055644875, 0.3509603085928431, 0.20252504605089944, 0.08086028866450429, 0.11163731055117335, 0.13058312958773138, 0.024567403188024323, 0.024450465576942446, 0.039690564704165494, 0.033850688648893304, 0.024603547828312988, 0.024479487477700495, 0.024488893480535987]


1.9935594147026159

## ANN 

In [214]:
reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor = "val_loss", factor=0.5, patience=3, verbose=0)
es = keras.callbacks.EarlyStopping(monitor = "val_loss", patience=10)
mc = keras.callbacks.ModelCheckpoint(monitor = "val_loss", verbose=3, save_best_only=True,
                                     mode="min", filepath="./checkpoint.h5")

In [215]:
kf = KFold(n_splits=5)
model_list=[]
for train_index, test_index in kf.split(X_train_scaled):
    X_train, X_test = X_train_scaled[train_index], X_train_scaled[test_index]
    y_train_, y_test_ = y_train[train_index], y_train[test_index]

    Input = keras.layers.Input(shape=X_train.shape[1])
    x = keras.layers.Dense(32, activation="relu", kernel_initializer=keras.initializers.he_normal)(Input)
    x = keras.layers.BatchNormalization()(x)
    x = keras.layers.Dense(32, activation="relu", kernel_initializer=keras.initializers.he_normal)(x)
    x = keras.layers.BatchNormalization()(x)

    Output = keras.layers.Dense(14, activation="linear", kernel_initializer=keras.initializers.he_normal)(x)


    model = keras.models.Model(inputs = Input, outputs = Output)
    model.compile(loss="mse", optimizer=keras.optimizers.Adam(learning_rate=0.01), metrics=["mse", "mae"])
    model.fit(X_train, y_train_, epochs=70, validation_data = (X_test, y_test_), batch_size=128, callbacks=[reduce_lr, es], verbose=0)
    print(lg_nrmse(y_train_, model.predict(X_train)))
    print(lg_nrmse(y_test_, model.predict(X_test)))    
    model_list.append(model)

[0.2583364450746405, 0.35907560737341887, 0.35166575359987823, 0.1843719837404005, 0.07798579675427528, 0.11200231495370963, 0.12945017850155355, 0.023899091878034937, 0.023650531823685938, 0.03892826612583198, 0.03317502074443401, 0.0237733695838119, 0.023784718939337087, 0.023886007720138942]
1.9633425211883333
[0.25833746556473386, 0.3581758396979343, 0.35187330652025595, 0.19273007259384717, 0.08100642529159169, 0.11249010550499303, 0.13160946803622192, 0.024883526762319825, 0.02454761720915585, 0.03975969931892908, 0.034459147362224565, 0.02475515948015997, 0.024681115469372607, 0.024778621786173717]
1.986308812592293
[0.25995916785554685, 0.3601451959065731, 0.3537910197237443, 0.18565223643936768, 0.07835110724473088, 0.1133466271097082, 0.13014231859744774, 0.02405271980930187, 0.023818962856591415, 0.03897699268127956, 0.0334671668827628, 0.02401410391049979, 0.023946191741074763, 0.02392886527831073]
1.9746807545742238
[0.25761904834067695, 0.3643025020702629, 0.3545167757637

In [216]:
pred_4 = prediction_scaled(model_list)
pred_4 = np.sum(pred_4, axis=0)/5

In [217]:
lg_nrmse(y_test, pred_4)

[0.2589138539962884, 0.3583680360603153, 0.3510731064799994, 0.19966994623390213, 0.07987238862756645, 0.10961651991275326, 0.1304997282588233, 0.024281577608257002, 0.024144656736569663, 0.03892045253052741, 0.03354363242586301, 0.024324059440830454, 0.02418459248861957, 0.02417606413973205]


1.9840476463756285

In [272]:
lg_nrmse(y_test, pred_1*0.4 + pred_2*0.6)

[0.2560991801054071, 0.3550564249061046, 0.3479628023966397, 0.1983620498910887, 0.07946811375113932, 0.10477375065972319, 0.12913267105380344, 0.023950399524338677, 0.023852854754404667, 0.03820184428433655, 0.03315208313829512, 0.023991053391723673, 0.02385568315668482, 0.02389623038473636]


1.9607162198560748

## SVM 

In [8]:
# kf = KFold(n_splits=5)
# scaler = StandardScaler()
# X_scaled = scaler.fit_transform(X)
# model_list=[]
# for train_index, test_index in kf.split(X_scaled):
#     print("TRAIN:", train_index, "TEST:", test_index)
#     X_train_, X_test_ = X_scaled[train_index], X_scaled[test_index]
#     y_train_, y_test_ = y[train_index], y[test_index]
#     svm_model = MultiOutputRegressor(SVR())
#     svm_model.fit(X_train_, y_train_)
# #     print(lg_nrmse(y_train_, svm_model.predict(X_train_)))
#     print(lg_nrmse(y_test_, svm_model.predict(X_test_)))
#     model_list.append(svm_model)

# Make model for submission

In [258]:
import joblib
joblib.dump(rf_list, "./re_list.pkl")


KeyboardInterrupt



In [252]:
test = test[list(test.columns[1:])]

In [253]:
test = test[list(test.columns[1:])]

def prediction(models):
    pred=[]
    for i in range(5):
        pred.append(models[i].predict(test))
    return pred

In [240]:
kf = KFold(n_splits=5)
xgb_list=[]

for train_index, test_index in kf.split(X):
    X_train_, X_test_ = X[train_index], X[test_index]
    y_train_, y_test_ = y[train_index], y[test_index]
    xgb_model = MultiOutputRegressor(XGBRegressor(max_depth=5, learning_rate=0.1, alpha=3, gamma=1, reg_lambda=5)) 
    xgb_model.fit(X_train_, y_train_)
    print("Train: ", lg_nrmse(y_train_, xgb_model.predict(X_train_)))
    print("Test: ",lg_nrmse(y_test_, xgb_model.predict(X_test_)))
    xgb_list.append(xgb_model)

[0.24786014431474163, 0.3471249833155379, 0.34152587903900444, 0.18064812941068414, 0.0744083289391349, 0.06402768829929066, 0.12267149835575084, 0.02231509513244782, 0.022141829736179532, 0.03505551291085125, 0.03156890088870052, 0.02229232122813398, 0.02224047147654206, 0.0222627774079735]
Train:  1.8362599098162917
[0.27242541927797087, 0.35857731087290623, 0.3557742835314258, 0.18439869437140782, 0.08461587282061873, 0.17909073847429324, 0.1416518816606074, 0.026718132515478794, 0.026422126325348495, 0.04468628791446024, 0.03495779780706339, 0.026485936485242206, 0.026615479558030633, 0.02655537115561898]
Test:  2.1096257994754146
[0.24999424378307988, 0.3504096298156244, 0.3439635929170658, 0.1810455247153, 0.0752174685633724, 0.07331766954557595, 0.12320778167586995, 0.02255321370253246, 0.022413706661189876, 0.035935382771867364, 0.03169600864102319, 0.022593567944554117, 0.022501408145343838, 0.022520837501125678]
Train:  1.8613118613272086
[0.25917298292154445, 0.3473869139550

In [256]:
xgb_pred = prediction(xgb_list)
xgb_pred = np.sum(xgb_pred, axis=0)/5

In [247]:
svm_pred = joblib.load("./svm_pred_test.pkl")
svm_pred = np.sum(svm_pred, axis=0)/5

In [248]:
pd.DataFrame(svm_pred)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,1.409819,1.252312,1.163701,13.512946,31.008136,16.635279,3.299466,-26.310833,-26.330778,-22.271583,24.359146,-26.188435,-26.190361,-26.306680
1,1.415609,1.250920,1.147291,12.757977,31.704951,16.662048,3.183028,-26.128144,-26.183824,-22.251028,24.489667,-26.072201,-26.078579,-26.063945
2,1.564963,1.100217,1.134266,14.258767,31.306923,16.866931,3.428086,-25.977658,-26.054103,-22.115169,24.587552,-25.961441,-26.002260,-25.966161
3,1.668950,1.354370,1.267215,15.352608,32.625482,17.454612,3.025557,-25.503381,-25.574858,-21.553498,25.203769,-25.508678,-25.486801,-25.516549
4,1.286316,0.939406,0.803248,15.372050,32.355104,17.091091,3.059540,-25.535459,-25.482908,-21.988857,24.910916,-25.368966,-25.368402,-25.472581
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39603,1.253778,0.802493,1.018040,13.490981,30.958652,16.737182,3.480894,-26.443106,-26.416641,-22.642595,24.415092,-26.296327,-26.288961,-26.306986
39604,1.183491,0.719369,0.934206,12.902357,31.205385,16.707389,3.415066,-26.545678,-26.556356,-23.007535,24.359420,-26.463875,-26.521575,-26.559217
39605,1.209365,0.909526,1.004215,13.393399,31.922727,16.778812,3.173302,-26.346610,-26.440189,-22.660900,24.503594,-26.398515,-26.415295,-26.454039
39606,1.078828,0.797547,0.853858,13.073846,31.515225,16.854181,3.326421,-26.457356,-26.537674,-22.713752,24.508952,-26.481735,-26.422933,-26.510171


In [257]:
pd.DataFrame(xgb_pred)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,1.400740,1.191919,1.067861,13.824954,31.322250,16.597258,3.149719,-26.152908,-26.173376,-22.200495,24.479355,-26.091381,-26.105927,-26.055267
1,1.493244,1.194573,1.135296,13.697275,31.192072,16.576202,3.144413,-26.166401,-26.181250,-22.299389,24.384298,-26.120701,-26.120825,-26.115902
2,1.386967,1.091242,1.073729,14.559009,32.127338,16.937084,3.046861,-25.897882,-25.828592,-22.077620,24.615082,-25.758932,-25.818579,-25.867197
3,1.444038,1.137749,1.059399,15.176462,32.601620,17.036249,3.033588,-25.650751,-25.700232,-21.784954,24.936899,-25.632898,-25.623577,-25.699432
4,1.315189,1.000293,0.924035,14.923152,31.629135,16.932186,3.125610,-25.748081,-25.791937,-22.100204,24.777075,-25.672480,-25.716684,-25.707270
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39603,1.284306,0.965738,1.010752,13.065999,31.275232,16.706198,3.157528,-26.509151,-26.477982,-22.792858,24.450790,-26.428436,-26.399389,-26.397385
39604,1.261379,0.902333,0.981407,14.815043,31.352224,16.734900,3.188740,-26.424097,-26.381802,-22.748774,24.463482,-26.329123,-26.370712,-26.336264
39605,1.245729,0.910043,0.976458,12.908163,31.119501,16.695415,3.146627,-26.499918,-26.501791,-22.829569,24.356607,-26.475494,-26.460205,-26.436710
39606,1.249103,0.865900,0.957562,13.408760,31.384037,16.712229,3.188548,-26.574184,-26.521839,-22.774195,24.448502,-26.485046,-26.484020,-26.478649


In [259]:
joblib.dump(svm_pred, "svm_submission.pkl")

['svm_submission.pkl']

In [260]:
joblib.dump(xgb_pred, "xgb_submission.pkl")

['xgb_submission.pkl']

# Submission

In [22]:
import joblib
svm_model = joblib.load("./svm.pkl")
pred_svm = joblib.load("./pred_svm.pkl")

In [12]:
X_test_submission = test[list(test.columns[1:57])]
X_test_submission_scaled = scaler.transform(X_test_submission)

In [20]:
pred_1 = rf_model.predict(X_test_submission)*0.4

NameError: name 'rf_model' is not defined

In [None]:
pred_2 = pred_svm * 0.2

In [21]:
pred_3 = xgb_model.predict(X_test_submission)*0.4

NameError: name 'xgb_model' is not defined

In [25]:
submission_data = pred_1 + pred_3

In [28]:
submission[list(submission.columns[1:])] = submission_data

In [31]:
submission

Unnamed: 0,ID,Y_01,Y_02,Y_03,Y_04,Y_05,Y_06,Y_07,Y_08,Y_09,Y_10,Y_11,Y_12,Y_13,Y_14
0,TEST_00001,1.409515,1.210701,1.083776,13.933078,31.433623,16.756577,3.122686,-26.144023,-26.197439,-22.219550,24.426465,-26.089073,-26.061156,-26.105292
1,TEST_00002,1.461119,1.187049,1.120219,13.868451,31.126204,16.657150,3.105777,-26.138793,-26.162941,-22.235692,24.417072,-26.103904,-26.091830,-26.132909
2,TEST_00003,1.366390,1.038038,1.003808,14.027303,31.619934,16.673731,3.119544,-25.962618,-25.939933,-22.145309,24.470158,-25.925116,-25.893393,-25.882908
3,TEST_00004,1.425013,1.124561,1.045093,14.876444,32.198437,17.084132,3.110118,-25.639896,-25.654441,-21.805200,24.879766,-25.687802,-25.557868,-25.690587
4,TEST_00005,1.339584,1.039257,0.969167,14.823126,31.544785,17.015359,3.142881,-25.675690,-25.739396,-22.102114,24.768741,-25.578993,-25.634174,-25.639194
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39603,TEST_39604,1.294672,0.998447,1.012693,12.803720,31.132021,16.575491,3.206131,-26.549717,-26.525038,-22.732996,24.272350,-26.438003,-26.461607,-26.466054
39604,TEST_39605,1.226949,0.896240,0.933956,14.105540,30.992607,16.591816,3.215134,-26.489779,-26.498661,-22.826320,24.361330,-26.442771,-26.478468,-26.412184
39605,TEST_39606,1.264823,0.910235,0.958471,13.050433,31.079248,16.587271,3.161601,-26.567321,-26.585213,-22.770649,24.162510,-26.500153,-26.524040,-26.507245
39606,TEST_39607,1.215846,0.927153,0.943064,12.815964,30.535386,16.598558,3.203439,-26.573993,-26.576417,-22.888135,24.211090,-26.511960,-26.530781,-26.531343


In [30]:
submission.to_csv("./ensemble_submission.csv", index=None)