In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split

plt.rc('font', family='Malgun Gothic')  # 폰트 지정
plt.rc('axes', unicode_minus=False)  # 마이너스 폰트 설정
%config InlineBackend.figure_format = 'retina'  # 그래프 글씨 뚜렷

In [2]:
df_train = pd.read_csv("../data/df_train_004.csv")
df_test = pd.read_csv("../data/df_test_004.csv")

df_train['target'] = df_train['사망자수']*10 + df_train['중상자수']*5 + df_train['경상자수']*3 + df_train['부상자수']
df_train.drop(columns = ['사망자수','중상자수','경상자수','부상자수'], axis=1, inplace = True)

In [3]:
X = df_train.drop(columns = 'target',axis=1)
y = df_train['target']

In [4]:
x_train, x_test, y_train , y_test = train_test_split(X, y, test_size = 0.3, random_state=42 )

# again

In [34]:
from sklearn.model_selection import GridSearchCV
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.metrics import mean_squared_error
from keras.optimizers import Adam

In [35]:
# 모델 생성 함수
def create_model(activation='relu', optimizer='adam', learning_rate=0.001):
    model = Sequential()
    model.add(Dense(128, input_dim=33, activation=activation))
    model.add(Dense(64, activation=activation))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error', optimizer=optimizer(learning_rate=learning_rate))
    return model

In [36]:
# KerasRegressor를 사용하여 모델을 Scikit-learn 추정기로 래핑
model = KerasRegressor(build_fn=create_model, epochs=50, batch_size=32, verbose=0)

  model = KerasRegressor(build_fn=create_model, epochs=50, batch_size=32, verbose=0)


In [37]:
# 탐색할 매개변수 그리드 정의
param_grid = {
    'activation': ['relu', 'leaky_relu', 'prelu', 'swish'],
    'optimizer': [Adam, 'rmsprop', 'sgd', 'adagrad'],
    'learning_rate': [0.001, 0.01, 0.1]
}

In [38]:
# GridSearchCV를 사용하여 최적의 매개변수 찾기
grid = GridSearchCV(estimator=model, param_grid=param_grid, scoring='neg_mean_squared_error', cv=3)
grid_result = grid.fit(x_train, y_train)

117 fits failed out of a total of 144.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
81 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\USER\AppData\Roaming\Python\Python39\site-packages\sklearn\model_selection\_validation.py", line 686, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\ProgramData\Anaconda3\lib\site-packages\keras\wrappers\scikit_learn.py", line 164, in fit
    self.model = self.build_fn(**self.filter_sk_params(self.build_fn))
  File "C:\Users\USER\AppData\Local\Temp\ipykernel_15356\2403317239.py", line 7, in create_model
    model.compile(loss='mean_squared_error', optimizer=optimizer(learning_rate=learning_rate))
TypeError: 'str' object is not ca

In [42]:
# 결과 출력
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

Best: -9.953925 using {'activation': 'leaky_relu', 'learning_rate': 0.01, 'optimizer': <class 'keras.optimizers.legacy.adam.Adam'>}


In [39]:
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: -9.953925 using {'activation': 'leaky_relu', 'learning_rate': 0.01, 'optimizer': <class 'keras.optimizers.legacy.adam.Adam'>}
-10.475194 (0.237600) with: {'activation': 'relu', 'learning_rate': 0.001, 'optimizer': <class 'keras.optimizers.legacy.adam.Adam'>}
nan (nan) with: {'activation': 'relu', 'learning_rate': 0.001, 'optimizer': 'rmsprop'}
nan (nan) with: {'activation': 'relu', 'learning_rate': 0.001, 'optimizer': 'sgd'}
nan (nan) with: {'activation': 'relu', 'learning_rate': 0.001, 'optimizer': 'adagrad'}
-10.175064 (0.143651) with: {'activation': 'relu', 'learning_rate': 0.01, 'optimizer': <class 'keras.optimizers.legacy.adam.Adam'>}
nan (nan) with: {'activation': 'relu', 'learning_rate': 0.01, 'optimizer': 'rmsprop'}
nan (nan) with: {'activation': 'relu', 'learning_rate': 0.01, 'optimizer': 'sgd'}
nan (nan) with: {'activation': 'relu', 'learning_rate': 0.01, 'optimizer': 'adagrad'}
-10.106832 (0.332372) with: {'activation': 'relu', 'learning_rate': 0.1, 'optimizer': <class

In [44]:
best_params = {'activation': 'leaky_relu', 'optimizer': Adam(learning_rate=0.01)}

In [45]:
# 최적의 매개변수로 모델 생성
model = Sequential()
model.add(Dense(128, input_dim=33, activation=best_params['activation']))
model.add(Dense(64, activation=best_params['activation']))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer=best_params['optimizer'])

In [48]:
from keras.callbacks import EarlyStopping

In [51]:
early_stopping = EarlyStopping(monitor = 'loss')

In [60]:
# 모델 훈련
model.fit(x_train, y_train,validation_data=(x_test, y_test), epochs=50, batch_size=32, verbose=2, callbacks=[early_stopping])

Epoch 1/50
867/867 - 3s - loss: 9.9091 - val_loss: 10.5240 - 3s/epoch - 3ms/step
Epoch 2/50
867/867 - 3s - loss: 9.9345 - val_loss: 10.4541 - 3s/epoch - 3ms/step


<keras.callbacks.History at 0x1daecde0490>

In [61]:
# 테스트 데이터에 대한 예측
y_pred = model.predict(x_test)



In [63]:
model.save('../model/007_Leaky-model.h5')

In [64]:
sub_file = pd.read_csv("../data/sample_submission.csv")

In [65]:
sub_file['ECLO'] = model.predict(df_test)
sub_file['ECLO'] = round(sub_file['ECLO'])



In [66]:
sub_file.ECLO.value_counts()

5.0    7451
4.0    3196
6.0     223
3.0      93
Name: ECLO, dtype: int64

In [78]:
sub_file.to_csv("../data/sub_file/010tf_Huber_lr.csv", index=False)

In [None]:
# 평가
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error on Test Data: {mse}')