In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
from matplotlib import pyplot as plt
from matplotlib import rcParams
import numpy as np
from pathlib import Path
import pandas as pd
from sklearn.metrics import accuracy_score
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
import seaborn as sns
from tensorflow import keras
from tensorflow.keras import Input, Model, Sequential
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import plot_model, to_categorical
import warnings

In [3]:
rcParams['figure.figsize'] = (16, 8)
plt.style.use('fivethirtyeight')
pd.set_option('max_columns', 100)
pd.set_option("display.precision", 4)
warnings.simplefilter('ignore')


# 학습데이터 로드. 
전에 했었던 feature .csv 를 이용해서 진행할 것이다. 

In [4]:
data_dir = Path('./Dacon')
feature_dir = Path('./Dacon')
val_dir = Path('./Dacon')
tst_dir = Path('./Dacon')
sub_dir = Path('./Dacon')

trn_file = data_dir / 'train.csv'
tst_file = data_dir / 'test.csv'
sample_file = data_dir / 'sample_submission.csv'

target_col = 'class'
n_fold = 5
n_class = 3
seed = 42

In [5]:
algo_name = 'nncv'
feature_name = 'polyfeature'
model_name = f'{algo_name}_{feature_name}'

feature_file = feature_dir / f'{feature_name}.csv'
p_val_file = val_dir / f'{model_name}.val.csv'
p_tst_file = tst_dir / f'{model_name}.tst.csv'
sub_file = sub_dir / f'{model_name}.csv'

In [6]:
df = pd.read_csv(feature_dir / 'feature_class2_feature.csv', index_col=0)
print(df.shape)
df.head()

(400000, 20)


Unnamed: 0_level_0,z,redshift,dered_u,dered_g,dered_r,dered_i,dered_z,nObserve,airmass_u,class,d_dered_u,d_dered_g,d_dered_r,d_dered_i,d_dered_z,d_dered_ig,d_dered_zg,d_dered_rz,d_dered_iz,d_obs_det
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
0,16.9396,-8.1086e-05,23.1243,20.2578,18.9551,17.6321,16.9089,18,1.1898,0.0,-0.1397,-0.079,-0.0544,-0.0403,-0.0307,-2.6257,-3.3488,2.0462,0.7232,0
1,13.1689,0.0045061,14.9664,14.0045,13.4114,13.2363,13.1347,1,1.2533,1.0,-0.0857,-0.0574,-0.041,-0.0322,-0.0343,-0.7683,-0.8698,0.2767,0.1016,0
2,15.35,0.00047198,16.6076,15.6866,15.44,15.3217,15.2961,2,1.0225,0.0,-0.1787,-0.1388,-0.0963,-0.0718,-0.054,-0.3649,-0.3905,0.144,0.0257,0
3,19.6346,5.8143e-06,25.3536,20.9947,20.0873,19.7947,19.5552,4,1.2054,0.0,-0.307,-0.1941,-0.1339,-0.1003,-0.0795,-1.2,-1.4395,0.5321,0.2395,1
4,17.9826,-3.3247e-05,23.7714,20.4338,18.863,18.1903,17.8759,13,1.1939,0.0,-0.682,-0.2653,-0.1794,-0.1339,-0.1067,-2.2436,-2.5579,0.9871,0.3144,1


# fit_transform 이란? 
scaler = standardscaler 는 데이터들을 다 standard scaler 를 실행시키는 것. 
fit 과 transform 을 한번에 실행하는 것임. 두번 적지 않아서 훨씬 효율적이다.
target_col 을 제외하고 transform 을 진행하게 시킨다. 

# feature_names 
를 통해서 새롭게 생성된 feature 들의 이름들을 바로 뽑아 낼 수 있다.

In [7]:
scaler = StandardScaler()
poly = PolynomialFeatures(2)
X = poly.fit_transform(scaler.fit_transform(df.drop(target_col, axis=1)))
feature_names = poly.get_feature_names(df.columns)
feature_names

['1',
 'z',
 'redshift',
 'dered_u',
 'dered_g',
 'dered_r',
 'dered_i',
 'dered_z',
 'nObserve',
 'airmass_u',
 'class',
 'd_dered_u',
 'd_dered_g',
 'd_dered_r',
 'd_dered_i',
 'd_dered_z',
 'd_dered_ig',
 'd_dered_zg',
 'd_dered_rz',
 'd_dered_iz',
 'z^2',
 'z redshift',
 'z dered_u',
 'z dered_g',
 'z dered_r',
 'z dered_i',
 'z dered_z',
 'z nObserve',
 'z airmass_u',
 'z class',
 'z d_dered_u',
 'z d_dered_g',
 'z d_dered_r',
 'z d_dered_i',
 'z d_dered_z',
 'z d_dered_ig',
 'z d_dered_zg',
 'z d_dered_rz',
 'z d_dered_iz',
 'redshift^2',
 'redshift dered_u',
 'redshift dered_g',
 'redshift dered_r',
 'redshift dered_i',
 'redshift dered_z',
 'redshift nObserve',
 'redshift airmass_u',
 'redshift class',
 'redshift d_dered_u',
 'redshift d_dered_g',
 'redshift d_dered_r',
 'redshift d_dered_i',
 'redshift d_dered_z',
 'redshift d_dered_ig',
 'redshift d_dered_zg',
 'redshift d_dered_rz',
 'redshift d_dered_iz',
 'dered_u^2',
 'dered_u dered_g',
 'dered_u dered_r',
 'dered_u dered

In [8]:
df_poly = pd.DataFrame(data=X, columns=feature_names, index=df.index)
df_poly[target_col] = df[target_col]
df_poly.head()
df_poly.to_csv(feature_file)

del 은 삭제 하는 함수 임. 

In [9]:
del df_poly, df

In [10]:
df=pd.read_csv(feature_file, index_col=0)
print(df.shape)
df.head()

(400000, 210)


Unnamed: 0_level_0,1,z,redshift,dered_u,dered_g,dered_r,dered_i,dered_z,nObserve,airmass_u,class,d_dered_u,d_dered_g,d_dered_r,d_dered_i,d_dered_z,d_dered_ig,d_dered_zg,d_dered_rz,d_dered_iz,z^2,z redshift,z dered_u,z dered_g,z dered_r,z dered_i,z dered_z,z nObserve,z airmass_u,z class,z d_dered_u,z d_dered_g,z d_dered_r,z d_dered_i,z d_dered_z,z d_dered_ig,z d_dered_zg,z d_dered_rz,z d_dered_iz,redshift^2,redshift dered_u,redshift dered_g,redshift dered_r,redshift dered_i,redshift dered_z,redshift nObserve,redshift airmass_u,redshift class,redshift d_dered_u,redshift d_dered_g,...,class d_dered_z,class d_dered_ig,class d_dered_zg,class d_dered_rz,class d_dered_iz,d_dered_u^2,d_dered_u d_dered_g,d_dered_u d_dered_r,d_dered_u d_dered_i,d_dered_u d_dered_z,d_dered_u d_dered_ig,d_dered_u d_dered_zg,d_dered_u d_dered_rz,d_dered_u d_dered_iz,d_dered_g^2,d_dered_g d_dered_r,d_dered_g d_dered_i,d_dered_g d_dered_z,d_dered_g d_dered_ig,d_dered_g d_dered_zg,d_dered_g d_dered_rz,d_dered_g d_dered_iz,d_dered_r^2,d_dered_r d_dered_i,d_dered_r d_dered_z,d_dered_r d_dered_ig,d_dered_r d_dered_zg,d_dered_r d_dered_rz,d_dered_r d_dered_iz,d_dered_i^2,d_dered_i d_dered_z,d_dered_i d_dered_ig,d_dered_i d_dered_zg,d_dered_i d_dered_rz,d_dered_i d_dered_iz,d_dered_z^2,d_dered_z d_dered_ig,d_dered_z d_dered_zg,d_dered_z d_dered_rz,d_dered_z d_dered_iz,d_dered_ig^2,d_dered_ig d_dered_zg,d_dered_ig d_dered_rz,d_dered_ig d_dered_iz,d_dered_zg^2,d_dered_zg d_dered_rz,d_dered_zg d_dered_iz,d_dered_rz^2,d_dered_rz d_dered_iz,d_dered_iz^2
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1
0,1.0,0.0002,-0.1093,1.817,0.127,0.9086,0.0208,0.0011,1.3136,0.12,0.0,0.0063,0.1783,-0.0002,0.0008,-0.0981,-0.0626,0.0367,0.0156,-0.1895,3.4181e-08,-2.0203e-05,0.0003,2.3488e-05,0.0002,3.8455e-06,2.0594e-07,0.0002,2.2191e-05,2.2235e-05,1.1669e-06,3.296e-05,-3.749e-08,1.4942e-07,-1.8144e-05,-1.1575e-05,6.7832e-06,2.8794e-06,-3.5042e-05,0.0119,-0.1986,-0.0139,-0.0993,-0.0023,-0.0001,-0.1435,-0.0131,-0.0131,-0.0007,-0.0195,...,-0.0118,-0.0075,0.0044,0.0019,-0.0228,3.9834e-05,0.0011252,-1.2798e-06,5.1009e-06,-0.00061941,-0.00039513,0.00023156,9.8297e-05,-0.0012,0.0318,-3.615e-05,0.00014408,-0.0175,-0.011161,0.0065,0.0027766,-0.0338,4.1119e-08,-1.6389e-07,1.9901e-05,1.2695e-05,-7.4398e-06,-3.1582e-06,3.8434e-05,6.5318e-07,-7.9317e-05,-5.0598e-05,2.9652e-05,1.2587e-05,-0.00015318,0.0096316,0.0061442,-0.0036007,-0.0015285,0.0186,0.0039195,-0.002297,-0.00097505,0.0119,0.0013461,0.00057142,-0.007,0.00024256,-0.003,0.0359
1,1.0,-0.0719,-0.1007,-2.4251,-0.2656,-2.8534,-0.1751,-0.1054,-0.601,0.6658,1.0,0.0081,0.2144,0.0001,0.0007,0.019,0.0156,-0.0133,-0.0071,-0.1895,0.0051738,0.0072464,0.1744,0.019104,0.2052,0.012597,0.0075809,0.0432,-0.047893,-0.0142,-0.00058349,-0.015419,-8.7153e-06,-5.1123e-05,-0.0013637,-0.0011239,0.0009546,0.00050978,0.013633,0.0101,0.2443,0.0268,0.2875,0.0176,0.0106,0.0605,-0.0671,-0.0199,-0.0008,-0.0216,...,0.0037,0.0031,-0.0026,-0.0014,-0.0374,6.5806e-05,0.0017389,9.829e-07,5.7656e-06,0.00015379,0.00012675,-0.00010766,-5.7493e-05,-0.0015,0.046,2.5973e-05,0.00015236,0.0041,0.0033493,-0.0028,-0.0015193,-0.0406,1.4681e-08,8.6118e-08,2.2971e-06,1.8932e-06,-1.6081e-06,-8.5874e-07,-2.2965e-05,5.0517e-07,1.3475e-05,1.1105e-05,-9.4327e-06,-5.0373e-06,-0.00013471,0.00035943,0.00029622,-0.00025161,-0.00013437,-0.0036,0.00024412,-0.00020736,-0.00011074,-0.003,0.00017613,9.406e-05,0.0025,5.023e-05,0.0013,0.0359
2,1.0,-0.0302,-0.1082,-1.5717,-0.16,-1.4768,-0.0822,-0.0444,-0.4883,-1.3179,0.0,0.0013,0.065,-0.0015,0.0002,0.0444,0.0308,-0.017,-0.0099,-0.1895,0.00091298,0.0032708,0.0475,0.0048338,0.0446,0.002483,0.0013416,0.0148,0.039822,-0.0019476,-3.9487e-05,-0.0019652,4.4024e-05,-5.3424e-06,-0.0013413,-0.00092915,0.00051426,0.00029778,0.0057269,0.0117,0.1701,0.0173,0.1599,0.0089,0.0048,0.0529,0.1427,-0.007,-0.0001,-0.007,...,0.0029,0.002,-0.0011,-0.0006,-0.0122,1.7078e-06,8.4993e-05,-1.904e-06,2.3106e-07,5.8012e-05,4.0186e-05,-2.2242e-05,-1.2879e-05,-0.0002,0.0042,-9.4759e-05,1.1499e-05,0.0029,0.002,-0.0011,-0.00064096,-0.0123,2.1228e-06,-2.5761e-07,-6.4677e-05,-4.4803e-05,2.4797e-05,1.4359e-05,0.00027615,3.1262e-08,7.8488e-06,5.437e-06,-3.0093e-06,-1.7425e-06,-3.3512e-05,0.0019706,0.0013651,-0.00075553,-0.00043748,-0.0084,0.00094561,-0.00052337,-0.00030305,-0.0058,0.00028967,0.00016773,0.0032,9.7124e-05,0.0019,0.0359
3,1.0,0.0517,-0.1091,2.9763,0.1733,1.6769,0.1172,0.0758,-0.2631,0.2544,0.0,-0.0033,-0.0366,-0.0026,-0.0005,-0.0083,-0.0024,-0.0061,-0.0021,0.7897,0.0026757,-0.0056442,0.154,0.008965,0.0867,0.0060617,0.0039204,-0.0136,0.013162,-0.00615,-0.00017133,-0.0018939,-0.0001341,-2.6646e-05,-0.00042725,-0.00012163,-0.00031351,-0.00010655,0.040851,0.0119,-0.3248,-0.0189,-0.183,-0.0128,-0.0083,0.0287,-0.0278,0.013,0.0004,0.004,...,0.001,0.0003,0.0007,0.0002,-0.0939,1.0971e-05,0.00012127,8.5865e-06,1.7062e-06,2.7358e-05,7.7883e-06,2.0075e-05,6.8228e-06,-0.0026,0.0013,9.4914e-05,1.886e-05,0.0003,8.609e-05,0.0002,7.5418e-05,-0.0289,6.7204e-06,1.3354e-06,2.1412e-05,6.0957e-06,1.5712e-05,5.34e-06,-0.0020473,2.6535e-07,4.2547e-06,1.2112e-06,3.1221e-06,1.0611e-06,-0.00040681,6.8222e-05,1.9422e-05,5.0061e-05,1.7014e-05,-0.0065,5.529e-06,1.4251e-05,4.8436e-06,-0.0019,3.6734e-05,1.2485e-05,-0.0048,4.2431e-06,-0.0016,0.6237
4,1.0,0.0201,-0.1092,2.1535,0.1381,0.8461,0.0457,0.0284,0.7505,0.156,0.0,-0.0093,-0.1596,-0.0039,-0.0013,-0.0741,-0.0376,0.0068,0.0007,0.7897,0.00040534,-0.0021983,0.0434,0.0027804,0.017,0.00091963,0.00057181,0.0151,0.0031404,-0.013188,-0.00018663,-0.0032124,-7.9125e-05,-2.5252e-05,-0.0014909,-0.00075793,0.00013661,1.3497e-05,0.0159,0.0119,-0.2351,-0.0151,-0.0924,-0.005,-0.0031,-0.0819,-0.017,0.0715,0.001,0.0174,...,0.0485,0.0247,-0.0044,-0.0004,-0.5173,8.5934e-05,0.0014791,3.6433e-05,1.1627e-05,0.00068645,0.00034898,-6.2902e-05,-6.2147e-06,-0.0073,0.0255,0.00062708,0.00020013,0.0118,0.0060068,-0.0011,-0.00010697,-0.126,1.5446e-05,4.9295e-06,0.00029103,0.00014796,-2.6668e-05,-2.6348e-06,-0.0031038,1.5732e-06,9.288e-05,4.7219e-05,-8.5109e-06,-8.4088e-07,-0.00099055,0.0054834,0.0027877,-0.00050247,-4.9644e-05,-0.0585,0.0014173,-0.00025545,-2.5238e-05,-0.0297,4.6043e-05,4.549e-06,0.0054,4.4944e-07,0.0005,0.6237


In [12]:
print(df.shape)
df.head()

(400000, 210)


Unnamed: 0_level_0,1,z,redshift,dered_u,dered_g,dered_r,dered_i,dered_z,nObserve,airmass_u,class,d_dered_u,d_dered_g,d_dered_r,d_dered_i,d_dered_z,d_dered_ig,d_dered_zg,d_dered_rz,d_dered_iz,z^2,z redshift,z dered_u,z dered_g,z dered_r,z dered_i,z dered_z,z nObserve,z airmass_u,z class,z d_dered_u,z d_dered_g,z d_dered_r,z d_dered_i,z d_dered_z,z d_dered_ig,z d_dered_zg,z d_dered_rz,z d_dered_iz,redshift^2,redshift dered_u,redshift dered_g,redshift dered_r,redshift dered_i,redshift dered_z,redshift nObserve,redshift airmass_u,redshift class,redshift d_dered_u,redshift d_dered_g,...,class d_dered_z,class d_dered_ig,class d_dered_zg,class d_dered_rz,class d_dered_iz,d_dered_u^2,d_dered_u d_dered_g,d_dered_u d_dered_r,d_dered_u d_dered_i,d_dered_u d_dered_z,d_dered_u d_dered_ig,d_dered_u d_dered_zg,d_dered_u d_dered_rz,d_dered_u d_dered_iz,d_dered_g^2,d_dered_g d_dered_r,d_dered_g d_dered_i,d_dered_g d_dered_z,d_dered_g d_dered_ig,d_dered_g d_dered_zg,d_dered_g d_dered_rz,d_dered_g d_dered_iz,d_dered_r^2,d_dered_r d_dered_i,d_dered_r d_dered_z,d_dered_r d_dered_ig,d_dered_r d_dered_zg,d_dered_r d_dered_rz,d_dered_r d_dered_iz,d_dered_i^2,d_dered_i d_dered_z,d_dered_i d_dered_ig,d_dered_i d_dered_zg,d_dered_i d_dered_rz,d_dered_i d_dered_iz,d_dered_z^2,d_dered_z d_dered_ig,d_dered_z d_dered_zg,d_dered_z d_dered_rz,d_dered_z d_dered_iz,d_dered_ig^2,d_dered_ig d_dered_zg,d_dered_ig d_dered_rz,d_dered_ig d_dered_iz,d_dered_zg^2,d_dered_zg d_dered_rz,d_dered_zg d_dered_iz,d_dered_rz^2,d_dered_rz d_dered_iz,d_dered_iz^2
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1
0,1.0,0.0002,-0.1093,1.817,0.127,0.9086,0.0208,0.0011,1.3136,0.12,0.0,0.0063,0.1783,-0.0002,0.0008,-0.0981,-0.0626,0.0367,0.0156,-0.1895,3.4181e-08,-2.0203e-05,0.0003,2.3488e-05,0.0002,3.8455e-06,2.0594e-07,0.0002,2.2191e-05,2.2235e-05,1.1669e-06,3.296e-05,-3.749e-08,1.4942e-07,-1.8144e-05,-1.1575e-05,6.7832e-06,2.8794e-06,-3.5042e-05,0.0119,-0.1986,-0.0139,-0.0993,-0.0023,-0.0001,-0.1435,-0.0131,-0.0131,-0.0007,-0.0195,...,-0.0118,-0.0075,0.0044,0.0019,-0.0228,3.9834e-05,0.0011252,-1.2798e-06,5.1009e-06,-0.00061941,-0.00039513,0.00023156,9.8297e-05,-0.0012,0.0318,-3.615e-05,0.00014408,-0.0175,-0.011161,0.0065,0.0027766,-0.0338,4.1119e-08,-1.6389e-07,1.9901e-05,1.2695e-05,-7.4398e-06,-3.1582e-06,3.8434e-05,6.5318e-07,-7.9317e-05,-5.0598e-05,2.9652e-05,1.2587e-05,-0.00015318,0.0096316,0.0061442,-0.0036007,-0.0015285,0.0186,0.0039195,-0.002297,-0.00097505,0.0119,0.0013461,0.00057142,-0.007,0.00024256,-0.003,0.0359
1,1.0,-0.0719,-0.1007,-2.4251,-0.2656,-2.8534,-0.1751,-0.1054,-0.601,0.6658,1.0,0.0081,0.2144,0.0001,0.0007,0.019,0.0156,-0.0133,-0.0071,-0.1895,0.0051738,0.0072464,0.1744,0.019104,0.2052,0.012597,0.0075809,0.0432,-0.047893,-0.0142,-0.00058349,-0.015419,-8.7153e-06,-5.1123e-05,-0.0013637,-0.0011239,0.0009546,0.00050978,0.013633,0.0101,0.2443,0.0268,0.2875,0.0176,0.0106,0.0605,-0.0671,-0.0199,-0.0008,-0.0216,...,0.0037,0.0031,-0.0026,-0.0014,-0.0374,6.5806e-05,0.0017389,9.829e-07,5.7656e-06,0.00015379,0.00012675,-0.00010766,-5.7493e-05,-0.0015,0.046,2.5973e-05,0.00015236,0.0041,0.0033493,-0.0028,-0.0015193,-0.0406,1.4681e-08,8.6118e-08,2.2971e-06,1.8932e-06,-1.6081e-06,-8.5874e-07,-2.2965e-05,5.0517e-07,1.3475e-05,1.1105e-05,-9.4327e-06,-5.0373e-06,-0.00013471,0.00035943,0.00029622,-0.00025161,-0.00013437,-0.0036,0.00024412,-0.00020736,-0.00011074,-0.003,0.00017613,9.406e-05,0.0025,5.023e-05,0.0013,0.0359
2,1.0,-0.0302,-0.1082,-1.5717,-0.16,-1.4768,-0.0822,-0.0444,-0.4883,-1.3179,0.0,0.0013,0.065,-0.0015,0.0002,0.0444,0.0308,-0.017,-0.0099,-0.1895,0.00091298,0.0032708,0.0475,0.0048338,0.0446,0.002483,0.0013416,0.0148,0.039822,-0.0019476,-3.9487e-05,-0.0019652,4.4024e-05,-5.3424e-06,-0.0013413,-0.00092915,0.00051426,0.00029778,0.0057269,0.0117,0.1701,0.0173,0.1599,0.0089,0.0048,0.0529,0.1427,-0.007,-0.0001,-0.007,...,0.0029,0.002,-0.0011,-0.0006,-0.0122,1.7078e-06,8.4993e-05,-1.904e-06,2.3106e-07,5.8012e-05,4.0186e-05,-2.2242e-05,-1.2879e-05,-0.0002,0.0042,-9.4759e-05,1.1499e-05,0.0029,0.002,-0.0011,-0.00064096,-0.0123,2.1228e-06,-2.5761e-07,-6.4677e-05,-4.4803e-05,2.4797e-05,1.4359e-05,0.00027615,3.1262e-08,7.8488e-06,5.437e-06,-3.0093e-06,-1.7425e-06,-3.3512e-05,0.0019706,0.0013651,-0.00075553,-0.00043748,-0.0084,0.00094561,-0.00052337,-0.00030305,-0.0058,0.00028967,0.00016773,0.0032,9.7124e-05,0.0019,0.0359
3,1.0,0.0517,-0.1091,2.9763,0.1733,1.6769,0.1172,0.0758,-0.2631,0.2544,0.0,-0.0033,-0.0366,-0.0026,-0.0005,-0.0083,-0.0024,-0.0061,-0.0021,0.7897,0.0026757,-0.0056442,0.154,0.008965,0.0867,0.0060617,0.0039204,-0.0136,0.013162,-0.00615,-0.00017133,-0.0018939,-0.0001341,-2.6646e-05,-0.00042725,-0.00012163,-0.00031351,-0.00010655,0.040851,0.0119,-0.3248,-0.0189,-0.183,-0.0128,-0.0083,0.0287,-0.0278,0.013,0.0004,0.004,...,0.001,0.0003,0.0007,0.0002,-0.0939,1.0971e-05,0.00012127,8.5865e-06,1.7062e-06,2.7358e-05,7.7883e-06,2.0075e-05,6.8228e-06,-0.0026,0.0013,9.4914e-05,1.886e-05,0.0003,8.609e-05,0.0002,7.5418e-05,-0.0289,6.7204e-06,1.3354e-06,2.1412e-05,6.0957e-06,1.5712e-05,5.34e-06,-0.0020473,2.6535e-07,4.2547e-06,1.2112e-06,3.1221e-06,1.0611e-06,-0.00040681,6.8222e-05,1.9422e-05,5.0061e-05,1.7014e-05,-0.0065,5.529e-06,1.4251e-05,4.8436e-06,-0.0019,3.6734e-05,1.2485e-05,-0.0048,4.2431e-06,-0.0016,0.6237
4,1.0,0.0201,-0.1092,2.1535,0.1381,0.8461,0.0457,0.0284,0.7505,0.156,0.0,-0.0093,-0.1596,-0.0039,-0.0013,-0.0741,-0.0376,0.0068,0.0007,0.7897,0.00040534,-0.0021983,0.0434,0.0027804,0.017,0.00091963,0.00057181,0.0151,0.0031404,-0.013188,-0.00018663,-0.0032124,-7.9125e-05,-2.5252e-05,-0.0014909,-0.00075793,0.00013661,1.3497e-05,0.0159,0.0119,-0.2351,-0.0151,-0.0924,-0.005,-0.0031,-0.0819,-0.017,0.0715,0.001,0.0174,...,0.0485,0.0247,-0.0044,-0.0004,-0.5173,8.5934e-05,0.0014791,3.6433e-05,1.1627e-05,0.00068645,0.00034898,-6.2902e-05,-6.2147e-06,-0.0073,0.0255,0.00062708,0.00020013,0.0118,0.0060068,-0.0011,-0.00010697,-0.126,1.5446e-05,4.9295e-06,0.00029103,0.00014796,-2.6668e-05,-2.6348e-06,-0.0031038,1.5732e-06,9.288e-05,4.7219e-05,-8.5109e-06,-8.4088e-07,-0.00099055,0.0054834,0.0027877,-0.00050247,-4.9644e-05,-0.0585,0.0014173,-0.00025545,-2.5238e-05,-0.0297,4.6043e-05,4.549e-06,0.0054,4.4944e-07,0.0005,0.6237


In [13]:
y = df[target_col].values[:320000]
df.drop(target_col, axis=1, inplace = True)
trn = df.iloc[:320000].values
tst = df.iloc[320000:].values
feature_name = df.columns.tolist()
print(y.shape, trn.shape,tst.shape)

(320000,) (320000, 209) (80000, 209)


# stratified K-Fold Cross validation

In [14]:
cv= StratifiedKFold(n_splits=n_fold,shuffle=True, random_state=seed)

# Keras Model

# Sequential API 

손쉽게 신경망을 생성할 수 있는 API 이다.
입력층에서 출력층까지 차례로 레이어를 추가 할 수 있는 것이다. 


쉽게 함수로써 신경망을 생성 할 수 있다.
첫줄 에서, 첫번째 hidden layer 에서
units=64 라는 것은, 신경망에서 neuron 를 64 개를 만들고 activation fun 은 relu 를 사용한다. 
그 다음은 output layer. unit=n_class 에서는 우리 데이콘 에는 class =3 개 있기 때문에 출력측에 N_class 로 3개의 neuron 이 있고, activation func 으로는 softmax 를 사용한다.
compile 이라는 단계가 있는데, 손실함수를 정의해 주고, 학습 할 때 사용하는 optimizer. 일반적인 optimizer 를 사용할 것인지. ada,adam 같은 룰을 사용할 수 잇따. 
제일 많이 사용하는 것이 adam 을 많이 사용한다. 잘 모르면 이거 써라. 

학습을 할때는 동일하게 fit 으로 학습을 한다. 
주의 할 점은 multiclass 에서는 xgboost, lightgbm 에서는 label 인코딩 된 종속 변수를 fit 의 입력 변수로 사용했었으나, 
keras 에서는 one-hot encoding 을 해 줘야된다. 
그 것을 해주는 유틸리티 함수가 to-catogorical 이다. 이렇게 해서 학습을 시키는 것이다. 
sequential 을 통해서 만들어낸 것을 시각화 하면 , 
input layer, dense layer (hidden layer), 출력 layer (3개의 아웃풋)
fit 에서 y 를 따로 만들었는데, 이 로 인해서 1차 스칼라 값이 나오는 것이 아니라. 
2차 매트릭스로 뽑아 내는 것이다. 

In [15]:
def get_model():
    mode= Sequential()
    
    model.add(Dense(units=64, activation='relu'))
    model.add(Dense(units=n_class, activation = 'softmax'))
    model.compile(loss='categorical_crossentropy',optimizer='adam')
    return model

# functional API 

sequential 은 layer 하나 하나를 순차적으로 무조건 정의를 해야되는데, 
functional 의 경우에는 그렇지가 않다. 
순차적으로만 구성하는게 아니라, 다양한 종류의 input 갖고 있다. 
ppt 13 쪽에 나와있는 그림 처럼
input 값들이 여러개를 갖고, 새로운 Layer 를만들고 ... 
sequence 가 명확하지 않기 때문에 다른식으로 만들어야된다. 



그러나 예시로는, 앞에서 sequenctial 을 기반으로 functional 을 만들 것이다. 
복잡하게 만드는 functional 은 링크예시를 통해서 확인을 해라. 


input 을 정의해라. sequence 에서는 input layer 를 정의할때 input size 를 정의 했었었다. 
64개의 뉴런을 사용할 것이고, input layer 를 입력 받을 것이고, 동일하게 64 개의 뉴런을 사용. 
출력층을 define 할 때는 input 층, output 층을 정의해 주고, 
compile 을 해주고, fit, predict 함수를 이용해서 구현하게 되는 것이다. 

더 복잡한 neurla network 를 구현할 때는 functional api 를 사용하는 것이다. 

In [16]:
def get_model():
    inputs = Input(shape=(len(feature_name),))
    #input size 를 입력
    x = Dense(64, activation='relu')(inputs)
    #input 값을 맨 뒤에 적어서 정해 주는 것이다. 
    outputs = Dense(n_class, activation = 'softmax')(x)
    #x 를 통해서 input 값을 정하는 것이다. 
    model= Model(inputs=inputs, outputs=outputs)
    #모델을 마지막으로 정할 때는 input , output 이 뭔지 정해 주고. 
    model.compile(loss='categorical_crossentropy',optimizer='adam')
    return model
    

# 신경망 모델 학습!

# Callbacks


earlystoping : 검증 셋 성능이 향상 되지 않으면 학습 중단.

reduceLROnPlateau : 검증셋 서능이 향상 되지 않으면, learning rate 감소

Learning Rate Scheduler : Learning Rate 을 커스텀 하게 변경.

ModelCheckpoinmt : 모델 저장.

TensorBoard : 학습 과정 시각화.



In [19]:
p_val = np.zeros((trn.shape[0],n_class))
p_tst = np.zeros((tst.shape[0],n_class))

for i, (i_trn,i_val) in enumerate(cv.split(trn,y),1):
    print(f'training model for CV #{i}')
    clf=get_model()
    
    es=EarlyStopping(monitor='val_loss', min_delta=0.001, 
                     patience=5, verbose=1,mode='min',
                    baseline=None, restore_best_weights=True)
    rlr=ReduceLROnPlateau(monitor='val_loss', factor=0.5,
                         patience=3,min_lr=1e-6,mode='min',
                         verbose=1)
    
    clf.fit(trn[i_trn], to_categorical(y[i_trn]),
           validation_data=(trn[i_val], to_categorical(y[i_val])),
           epochs=10,
            callbacks=[es,rlr])
    p_val[i_val, :] = clf.predict(trn[i_val])
    p_tst += clf.predict(tst) / n_fold

training model for CV #1
Train on 256000 samples, validate on 64000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
training model for CV #2
Train on 256000 samples, validate on 64000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 00004: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 5/10
Epoch 6/10
Epoch 00006: early stopping
training model for CV #3
Train on 256000 samples, validate on 64000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
training model for CV #4
Train on 256000 samples, validate on 64000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 00008: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 9/10
Epoch 10/10
Epoch 00010: early stopping
training model for CV #5
Train on 256000 samples, validate on 64000 samples
Epoch

In [20]:
print(f'{accuracy_score(y,np.argmax(p_val,axis=1)) *100:.4f}%')

91.3141%


In [21]:
print(p_val.shape, p_tst.shape)

(320000, 3) (80000, 3)


In [22]:
np.savetxt(p_val_file, p_val, fmt='%.6f',delimiter=',')
np.savetxt(p_tst_file, p_tst, fmt='%.6f',delimiter=',')

# 신경망 모델 시각화

In [23]:
clf.summary()

Model: "model_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_5 (InputLayer)         [(None, 209)]             0         
_________________________________________________________________
dense_8 (Dense)              (None, 64)                13440     
_________________________________________________________________
dense_9 (Dense)              (None, 3)                 195       
Total params: 13,635
Trainable params: 13,635
Non-trainable params: 0
_________________________________________________________________


In [25]:
# plot_model(clf)

# 제출 파일 생성

In [26]:
sub=pd.read_csv(sample_file, index_col=0)
print(sub.shape)
sub.head()

(80000, 1)


Unnamed: 0_level_0,class
id,Unnamed: 1_level_1
320000,0
320001,0
320002,0
320003,0
320004,0


In [27]:
sub[target_col] = np.argmax(p_tst, axis=1)
sub.head()

Unnamed: 0_level_0,class
id,Unnamed: 1_level_1
320000,2
320001,0
320002,2
320003,0
320004,2


In [28]:
sub[target_col].value_counts()

2    41956
0    30315
1     7729
Name: class, dtype: int64

In [29]:
sub.to_csv(sub_file)