In [270]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from xgboost import XGBRegressor
from sklearn.svm import SVR
from sklearn.multioutput import RegressorChain
from sklearn.multioutput import MultiOutputRegressor
import tensorflow.keras as keras
import tensorflow as tf
from sklearn.model_selection import KFold

In [271]:
path = "D:/LG_Radar"
train = pd.read_csv(path+"/train.csv")
test = pd.read_csv(path+"/test.csv")
submission = pd.read_csv(path+"/sample_submission.csv")

In [272]:
def lg_nrmse(gt, preds):
    # 각 Y Feature별 NRMSE 총합
    # Y_01 ~ Y_08 까지 20% 가중치 부여
    all_nrmse = []
    for idx in range(14): # ignore 'ID'
        rmse = mean_squared_error(gt[:,idx], preds[:,idx], squared=False)
        nrmse = rmse/np.mean(np.abs(gt[:,idx]))
        all_nrmse.append(nrmse)
    score = 1.2 * np.sum(all_nrmse[:8]) + 1.0 * np.sum(all_nrmse[8:14])
    print(all_nrmse)
    return score

In [273]:
train

Unnamed: 0,ID,X_01,X_02,X_03,X_04,X_05,X_06,X_07,X_08,X_09,...,Y_05,Y_06,Y_07,Y_08,Y_09,Y_10,Y_11,Y_12,Y_13,Y_14
0,TRAIN_00001,70.544,103.320,67.47,1,101.892,74.983,29.45,62.38,245.71,...,29.632,16.083,4.276,-25.381,-25.529,-22.769,23.792,-25.470,-25.409,-25.304
1,TRAIN_00002,69.524,103.321,65.17,1,101.944,72.943,28.73,61.23,233.61,...,33.179,16.736,3.229,-26.619,-26.523,-22.574,24.691,-26.253,-26.497,-26.438
2,TRAIN_00003,72.583,103.320,64.07,1,103.153,72.943,28.81,105.77,272.20,...,31.801,17.080,2.839,-26.238,-26.216,-22.169,24.649,-26.285,-26.215,-26.370
3,TRAIN_00004,71.563,103.320,67.57,1,101.971,77.022,28.92,115.21,255.36,...,34.503,17.143,3.144,-25.426,-25.079,-21.765,24.913,-25.254,-25.021,-25.345
4,TRAIN_00005,69.524,103.320,63.57,1,101.981,70.904,29.68,103.38,241.46,...,32.602,17.569,3.138,-25.376,-25.242,-21.072,25.299,-25.072,-25.195,-24.974
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39602,TRAIN_39603,66.465,103.320,62.27,1,103.150,66.825,30.20,77.83,298.05,...,29.194,16.582,3.410,-26.486,-26.581,-22.772,24.261,-26.491,-26.584,-26.580
39603,TRAIN_39604,66.465,103.321,62.77,1,102.021,66.825,29.21,102.25,270.67,...,29.859,15.659,3.406,-27.308,-27.203,-24.674,23.427,-27.250,-27.334,-27.325
39604,TRAIN_39605,68.504,103.320,64.67,1,103.144,68.864,29.96,102.61,198.07,...,24.720,16.823,3.215,-26.502,-26.687,-22.577,24.301,-26.388,-26.425,-26.601
39605,TRAIN_39606,66.465,103.320,63.67,1,102.025,67.845,30.30,112.60,275.52,...,26.412,15.757,4.216,-26.760,-26.634,-24.066,23.305,-26.536,-26.751,-26.635


In [274]:
train.columns

Index(['ID', 'X_01', 'X_02', 'X_03', 'X_04', 'X_05', 'X_06', 'X_07', 'X_08',
       'X_09', 'X_10', 'X_11', 'X_12', 'X_13', 'X_14', 'X_15', 'X_16', 'X_17',
       'X_18', 'X_19', 'X_20', 'X_21', 'X_22', 'X_23', 'X_24', 'X_25', 'X_26',
       'X_27', 'X_28', 'X_29', 'X_30', 'X_31', 'X_32', 'X_33', 'X_34', 'X_35',
       'X_36', 'X_37', 'X_38', 'X_39', 'X_40', 'X_41', 'X_42', 'X_43', 'X_44',
       'X_45', 'X_46', 'X_47', 'X_48', 'X_49', 'X_50', 'X_51', 'X_52', 'X_53',
       'X_54', 'X_55', 'X_56', 'Y_01', 'Y_02', 'Y_03', 'Y_04', 'Y_05', 'Y_06',
       'Y_07', 'Y_08', 'Y_09', 'Y_10', 'Y_11', 'Y_12', 'Y_13', 'Y_14'],
      dtype='object')

In [275]:
X = train[list(train.columns[1:57])]
y = train[list(train.columns[57:])]
y = np.array(y)

In [276]:
X_train, X_test, y_train, y_test = train_test_split(X,y)

# Make base models

각각의 모델의 에러를 가중치로 평균내어 예측한다.

In [322]:
reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor = "val_loss", factor=0.5, patience=3, verbose=3)
es = keras.callbacks.EarlyStopping(monitor = "val_loss", patience=10)
mc = keras.callbacks.ModelCheckpoint(monitor = "val_loss", verbose=3, save_best_only=True,
                                     mode="min", filepath="./checkpoint.h5")

In [277]:
scaler = StandardScaler()
scaler.fit(X_train)
X_train_scaled, X_test_scaled = scaler.transform(X_train), scaler.transform(X_test)

In [495]:
kf = KFold(n_splits=5)
model_list=[]
for train_index, test_index in kf.split(X_train_scaled):
    X_train, X_test = X_train_scaled[train_index], X_train_scaled[test_index]
    y_train_, y_test_ = y_train[train_index], y_train[test_index]

    Input = keras.layers.Input(shape=X_train.shape[1])
    x = keras.layers.Dense(32, activation="relu", kernel_initializer=keras.initializers.he_normal)(Input)
    x = keras.layers.BatchNormalization()(x)
    x = keras.layers.Dense(32, activation="relu", kernel_initializer=keras.initializers.he_normal)(x)
    x = keras.layers.BatchNormalization()(x)

    Output = keras.layers.Dense(14, activation="linear", kernel_initializer=keras.initializers.he_normal)(x)


    model = keras.models.Model(inputs = Input, outputs = Output)
    model.compile(loss="mse", optimizer=keras.optimizers.Adam(learning_rate=0.01), metrics=["mse", "mae"])
    model.fit(X_train, y_train_, epochs=70, validation_data = (X_test, y_test_), batch_size=128, callbacks=[reduce_lr, es], verbose=0)
    print(lg_nrmse(y_train_, model.predict(X_train)))
    print(lg_nrmse(y_test_, model.predict(X_test)))    
    model_list.append(model)


Epoch 00009: ReduceLROnPlateau reducing learning rate to 0.004999999888241291.

Epoch 00016: ReduceLROnPlateau reducing learning rate to 0.0024999999441206455.

Epoch 00022: ReduceLROnPlateau reducing learning rate to 0.0012499999720603228.

Epoch 00030: ReduceLROnPlateau reducing learning rate to 0.0006249999860301614.

Epoch 00033: ReduceLROnPlateau reducing learning rate to 0.0003124999930150807.

Epoch 00036: ReduceLROnPlateau reducing learning rate to 0.00015624999650754035.
[0.2578274245634931, 0.35941457609819133, 0.3518269246718135, 0.18375679397269445, 0.07795451216409248, 0.1044228822742948, 0.1287621998829742, 0.023838861663792805, 0.02367351467052767, 0.038234717994075865, 0.03321270826254574, 0.023821558101719025, 0.023742642362077366, 0.023778610512081282]
1.951828762252643
[0.2628197348875235, 0.35856748975641634, 0.3532133305125901, 0.1916021182624245, 0.0799183332209255, 0.1420741533219168, 0.13347433490037658, 0.024589879320090022, 0.024498276503584587, 0.04194513705

In [496]:
pred=[]
for i in range(len(model_list)):
    pred.append(model_list[i].predict(X_test_scaled))

In [497]:
lg_nrmse(y_test, model_list[0].predict(X_test_scaled)*0.2 + model_list[1].predict(X_test_scaled)*0.2 + model_list[2].predict(X_test_scaled)*0.2 + model_list[3].predict(X_test_scaled)*0.2 + model_list[4].predict(X_test_scaled)*0.2 )

[0.2579675057305892, 0.35983434048308727, 0.35257505072733747, 0.19732038771223753, 0.07913340783060467, 0.10327848197332934, 0.13033117701221966, 0.024064688804649986, 0.02391677997276643, 0.03828000964887613, 0.03358500654905552, 0.024042124017197908, 0.023999631355785506, 0.024018111243114194]


1.9732477111156617

In [498]:
lg_nrmse(y_test, np.sum(pred, axis=0)/5)

[0.25796750547982294, 0.3598343396165711, 0.35257505164232406, 0.19732038749308217, 0.07913340842930658, 0.10327848264108869, 0.13033117756189697, 0.024064688503433327, 0.023916779483540176, 0.038280009539930375, 0.03358500620660196, 0.024042123661841157, 0.02399963136866422, 0.024018112025927074]


1.9732477119275358

In [481]:
Input = keras.layers.Input(shape=X_train.shape[1])
x = keras.layers.Dense(32, activation="relu", kernel_initializer=keras.initializers.he_normal)(Input)
x_1 = keras.layers.BatchNormalization()(x)
x_2 = keras.layers.Dense(32, activation="relu", kernel_initializer=keras.initializers.he_normal)(x_1)
x= keras.layers.BatchNormalization()(x_2)

# x = keras.layers.Add()([x_1, x_2])
# x = keras.layers.BatchNormalization()(x)

Output = keras.layers.Dense(14, activation="linear", kernel_initializer=keras.initializers.he_normal)(x)

model = keras.models.Model(inputs = Input, outputs = Output)
model.compile(loss="mse", optimizer=keras.optimizers.Adam(learning_rate=0.05), metrics=["mse", "mae"])

In [485]:
Input = keras.layers.Input(shape=X_train.shape[1])
x = keras.layers.Dense(32, activation="relu", kernel_initializer=keras.initializers.he_normal)(Input)
x = keras.layers.BatchNormalization()(x)
x = keras.layers.Dense(32, activation="relu", kernel_initializer=keras.initializers.he_normal)(x)
x = keras.layers.BatchNormalization()(x)

Output = keras.layers.Dense(14, activation="linear", kernel_initializer=keras.initializers.he_normal)(x)

model = keras.models.Model(inputs = Input, outputs = Output)
model.compile(loss="mse", optimizer=keras.optimizers.Adam(learning_rate=0.01), metrics=["mse", "mae"])

In [488]:
model.fit(X_train_scaled, y_train, epochs=70, validation_data = (X_test_scaled, y_test), batch_size=256, callbacks=[reduce_lr, es])

Epoch 1/70
Epoch 2/70
Epoch 3/70
Epoch 4/70
Epoch 5/70
Epoch 6/70
Epoch 7/70
Epoch 8/70
Epoch 9/70
Epoch 10/70
Epoch 11/70

Epoch 00011: ReduceLROnPlateau reducing learning rate to 0.004999999888241291.
Epoch 12/70
Epoch 13/70
Epoch 14/70
Epoch 15/70

Epoch 00015: ReduceLROnPlateau reducing learning rate to 0.0024999999441206455.
Epoch 16/70
Epoch 17/70
Epoch 18/70
Epoch 19/70
Epoch 20/70
Epoch 21/70

Epoch 00021: ReduceLROnPlateau reducing learning rate to 0.0012499999720603228.
Epoch 22/70
Epoch 23/70
Epoch 24/70

Epoch 00024: ReduceLROnPlateau reducing learning rate to 0.0006249999860301614.
Epoch 25/70
Epoch 26/70
Epoch 27/70

Epoch 00027: ReduceLROnPlateau reducing learning rate to 0.0003124999930150807.
Epoch 28/70


<keras.callbacks.History at 0x19ed7c8ed30>

In [489]:
lg_nrmse(y_train, model.predict(X_train_scaled))

[0.2595140024890998, 0.3601153734183418, 0.3530520242441651, 0.18675111789412252, 0.07891708119237315, 0.11491165563047351, 0.13032811649405382, 0.02411649147389635, 0.02393450481760124, 0.039281182968370985, 0.033552245346686084, 0.024044466242581813, 0.024058385509737885, 0.02405155612177991]


1.9781693764105892

In [490]:
lg_nrmse(y_test, model.predict(X_test_scaled))

[0.2598716478394388, 0.3622498358743623, 0.35490040792564254, 0.19864034728027008, 0.07942686690116546, 0.10456161123152158, 0.13110728541433286, 0.02435067247956746, 0.024180521319207116, 0.038566905424082636, 0.033809961684162435, 0.02429498865456671, 0.024285057295465762, 0.024304235572907305]


1.9875720798859533

# XGBResressor

In [8]:
def prediction(X):
    pred=[]
    for i in range(14):
        pred.append(models[i].predict(X[list(X.columns)]))
    return np.transpose(pred)

In [34]:
models= []
for i in range(14):
    model = XGBRegressor()
    model.fit(X_train, y_train[:,i])
    models.append(model)

  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


In [37]:
pred = prediction(X_test)

In [38]:
lg_nrmse(np.array(y_test),pred)

[0.262495545679322, 0.367331189787829, 0.3581213245182812, 0.19204247825260873, 0.08058056594375078, 0.11879452646607998, 0.13381720492578428, 0.02461084828458363, 0.024420097046054634, 0.03978019663952962, 0.03419074584699197, 0.024568896779369763, 0.024551394882750443, 0.024606731634419284]


2.0174704834590034

# Predict Y_11 First

In [50]:
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [51]:
y_train_11 = y_train["Y_11"]
y_test_11 = y_test["Y_11"]

In [52]:
model = RandomForestRegressor()
model.fit(X_train, y_train_11)

RandomForestRegressor()

In [53]:
X_train["Y_11"] = model.predict(X_train)
model_2 = RandomForestRegressor()
model_2.fit(X_train, y_train)

RandomForestRegressor()

In [59]:
# X_test["Y_11"] = model.predict(X_test)
pred = model_2.predict(X_test)

In [65]:
y_test

Unnamed: 0,Y_01,Y_02,Y_03,Y_04,Y_05,Y_06,Y_07,Y_08,Y_09,Y_10,Y_11,Y_12,Y_13,Y_14
26267,1.100,0.669,0.497,13.258,30.600,16.605,4.063,-26.046,-26.311,-23.005,24.146,-26.301,-26.348,-26.237
9686,1.264,0.548,0.707,12.993,33.641,16.809,3.230,-26.418,-26.428,-22.225,24.515,-26.330,-26.322,-26.353
3845,1.211,0.883,0.673,15.075,33.486,16.623,3.140,-25.767,-25.704,-21.973,24.793,-25.621,-25.673,-25.775
19533,1.668,1.553,1.150,15.068,32.486,17.674,2.764,-25.829,-26.002,-21.193,25.282,-25.871,-25.864,-25.874
37747,1.056,0.828,0.752,15.572,32.294,16.999,3.331,-25.683,-25.652,-21.734,24.761,-25.741,-25.431,-25.682
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32197,1.841,1.431,1.453,14.111,28.704,17.051,3.700,-25.328,-25.481,-21.489,24.761,-25.130,-25.312,-25.263
19726,1.747,1.529,1.356,12.844,27.614,17.400,3.217,-25.904,-25.910,-21.907,24.939,-25.759,-25.784,-25.738
16537,0.824,0.641,0.426,10.815,27.039,15.235,3.395,-27.305,-27.270,-23.855,22.744,-27.300,-27.030,-27.208
11381,1.133,0.802,0.798,16.117,32.786,16.120,2.882,-27.529,-27.363,-23.028,23.727,-27.142,-27.105,-27.171


In [66]:
pred.shape

(9902, 14)

In [69]:
lg_nrmse(np.array(y_test), pred)

[0.2585291730802704, 0.36219962829722213, 0.35592982005569895, 0.19109341376725766, 0.08059765128520041, 0.12004935054679504, 0.13253389894506237, 0.024076788441728697, 0.023886355660720598, 0.0396287596597299, 0.03345471538767088, 0.023998868839385686, 0.023919889964008376, 0.02404848325845188]


1.9989487420730498

In [79]:
pd.DataFrame([0.2594777429390943, 0.3608651752575464, 0.35299554221448104, 0.18707286611813304, 0.07910317604116744, 0.11971384182649097, 0.13004823447152736, 0.024185815130866747, 0.023799344141429164, 0.0394765219333084, 0.03337109441235436, 0.023992849164211377, 0.02396454569120152, 0.02398859789548287], y.columns, ).transpose()

Unnamed: 0,Y_01,Y_02,Y_03,Y_04,Y_05,Y_06,Y_07,Y_08,Y_09,Y_10,Y_11,Y_12,Y_13,Y_14
0,0.259478,0.360865,0.352996,0.187073,0.079103,0.119714,0.130048,0.024186,0.023799,0.039477,0.033371,0.023993,0.023965,0.023989


In [38]:
X_test["Y_11"] = model.predict()

Unnamed: 0,Y_01,Y_02,Y_03,Y_04,Y_05,Y_06,Y_07,Y_08,Y_09,Y_10,Y_11,Y_12,Y_13,Y_14
0,2.056,1.456,1.680,10.502,29.632,16.083,4.276,-25.381,-25.529,-22.769,23.792,-25.470,-25.409,-25.304
1,1.446,1.184,1.268,18.507,33.179,16.736,3.229,-26.619,-26.523,-22.574,24.691,-26.253,-26.497,-26.438
2,1.251,0.665,0.782,14.082,31.801,17.080,2.839,-26.238,-26.216,-22.169,24.649,-26.285,-26.215,-26.370
3,1.464,1.079,1.052,16.975,34.503,17.143,3.144,-25.426,-25.079,-21.765,24.913,-25.254,-25.021,-25.345
4,0.983,0.646,0.689,15.047,32.602,17.569,3.138,-25.376,-25.242,-21.072,25.299,-25.072,-25.195,-24.974
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39602,1.382,1.215,1.263,10.874,29.194,16.582,3.410,-26.486,-26.581,-22.772,24.261,-26.491,-26.584,-26.580
39603,1.482,0.606,1.083,8.759,29.859,15.659,3.406,-27.308,-27.203,-24.674,23.427,-27.250,-27.334,-27.325
39604,1.117,1.154,0.993,13.159,24.720,16.823,3.215,-26.502,-26.687,-22.577,24.301,-26.388,-26.425,-26.601
39605,0.895,0.187,0.477,9.123,26.412,15.757,4.216,-26.760,-26.634,-24.066,23.305,-26.536,-26.751,-26.635


In [73]:
test["Y_11"] = model.predict(test[list(test.columns[1:])])
pred = model_2.predict(test[list(test.columns[1:])])

# Submission

In [81]:
submission[list(submission.columns[1:])] = model.predict(test[list(test.columns[1:57])])

ValueError: Columns must be same length as key

In [11]:
submission[list(submission.columns[1:])]

Unnamed: 0,Y_01,Y_02,Y_03,Y_04,Y_05,Y_06,Y_07,Y_08,Y_09,Y_10,Y_11,Y_12,Y_13,Y_14
0,1.37042,1.08067,1.02674,14.13674,31.41828,16.65789,3.08395,-26.19325,-26.22382,-22.29272,24.39037,-26.18821,-26.14404,-26.17236
1,1.42886,1.17319,1.09452,13.65158,31.15232,16.22329,3.15314,-26.19807,-26.24616,-22.39741,24.30276,-26.15097,-26.16346,-26.16413
2,1.21642,0.95266,0.86042,14.38804,31.22530,16.52038,3.18037,-26.02520,-26.06254,-22.37890,24.24936,-25.99856,-25.96044,-26.01583
3,1.40970,1.11500,1.02503,14.51187,31.67424,16.93885,3.26058,-25.85866,-25.86165,-22.01656,24.63577,-25.81670,-25.80881,-25.79592
4,1.44573,1.14124,1.03202,15.46684,32.48300,17.30291,3.03163,-25.54372,-25.56196,-21.71003,25.04654,-25.48317,-25.45379,-25.47709
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39603,1.23272,0.89982,0.91766,12.98898,30.89798,16.54172,3.28001,-26.44070,-26.43320,-22.63336,24.22475,-26.36413,-26.35827,-26.37433
39604,1.29859,0.92517,0.97948,13.00220,30.86109,16.49300,3.28951,-26.47837,-26.49514,-22.71105,24.17363,-26.42971,-26.43080,-26.43203
39605,1.28119,0.94297,0.95854,12.92747,30.85459,16.48530,3.17961,-26.53564,-26.54602,-22.82558,24.17078,-26.47484,-26.45665,-26.49213
39606,1.23907,0.88723,0.92271,13.16475,30.97334,16.53911,3.21328,-26.51936,-26.50770,-22.76473,24.21642,-26.46513,-26.44715,-26.45363


In [36]:
submission.to_csv("./base_submission.csv", index=None)

In [76]:
submission[list(submission.columns[1:])] = pred

In [77]:
submission

Unnamed: 0,ID,Y_01,Y_02,Y_03,Y_04,Y_05,Y_06,Y_07,Y_08,Y_09,Y_10,Y_11,Y_12,Y_13,Y_14
0,TEST_00001,1.35715,1.03671,0.99336,14.09131,31.69138,16.72478,3.07788,-26.13267,-26.13365,-22.18535,24.48519,-26.05869,-26.03910,-26.07058
1,TEST_00002,1.42831,1.12109,1.02897,13.69207,31.43461,16.85669,3.10026,-26.02814,-26.04793,-22.01384,24.56186,-25.95946,-25.98444,-25.99097
2,TEST_00003,1.36997,1.06288,1.02683,13.41389,31.46871,16.40396,3.07858,-26.24907,-26.27106,-22.34511,24.45382,-26.17200,-26.19355,-26.18957
3,TEST_00004,1.44211,1.18059,1.05732,14.99515,32.40908,16.55465,2.92662,-25.73173,-25.75175,-21.90083,25.04108,-25.70984,-25.70497,-25.70435
4,TEST_00005,1.41916,1.09096,1.01876,14.41536,32.13287,17.22580,3.01528,-25.73981,-25.75782,-21.84433,24.96186,-25.70938,-25.72326,-25.71942
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39603,TEST_39604,1.28790,0.92080,0.91305,14.44741,32.25306,16.99792,3.03814,-26.19200,-26.19888,-22.31393,24.70607,-26.12315,-26.10704,-26.12000
39604,TEST_39605,1.32271,0.97324,0.97803,15.14405,31.78722,16.99181,3.07120,-26.20886,-26.22867,-22.33916,24.68012,-26.13987,-26.13783,-26.14408
39605,TEST_39606,1.25544,0.93182,0.90208,14.36359,32.09843,16.98801,3.04539,-26.18726,-26.16501,-22.36627,24.69319,-26.11795,-26.09544,-26.14106
39606,TEST_39607,1.37600,1.03907,1.01584,14.97587,32.87738,17.18376,2.95024,-26.11946,-26.13365,-22.08794,24.88659,-26.06411,-26.05527,-26.05518


In [78]:
submission.to_csv("./base_submission.csv", index=None)