In [2]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.optimizers import Adam
import h5py


In [9]:

# 데이터 불러오기
data = pd.read_csv('train.csv') 

# 입력과 출력 데이터 나누기
X = data.drop(['FloodProbability','id'], axis=1).values  # 입력 특성
y = data['FloodProbability'].values  # 출력 타겟

# 데이터 정규화
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 훈련, 검증, 테스트 데이터 나누기
X_train, X_temp, y_train, y_temp = train_test_split(X_scaled, y, test_size=0.3, random_state=35)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=35)

# 모델 설계
model = Sequential()
model.add(Dense(64, activation='relu', input_shape=(X_train.shape[1],)))
model.add(Dense(32, activation='relu'))
model.add(Dense(1))

# 모델 컴파일
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# EarlyStopping 콜백 설정
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
model_checkpoint = ModelCheckpoint('best_model.h5', save_best_only=True, monitor='val_loss')

# 모델 훈련
history = model.fit(X_train, y_train, epochs=100, batch_size=64, validation_data=(X_val, y_val), callbacks=[early_stopping, model_checkpoint])

# 모델 평가
loss, mae = model.evaluate(X_test, y_test)
print("Test Loss:", loss)
print("Test MAE:", mae)

# Test Loss: 0.0003612188156694174
# Test MAE: 0.014995004050433636
# 기본

# Test Loss: 0.0003593358560465276
# Test MAE: 0.014905383810400963
# best_model.h5 추가

# Test Loss: 0.00035975821083411574
# Test MAE: 0.01483593787997961
# adam -> rmsprop

# Test Loss: 0.00035775540163740516
# Test MAE: 0.0148658761754632
# batch size 32 -> 64

# Test Loss: 0.00035533090704120696
# Test MAE: 0.01483441423624754
# rmsprop -> adam

# Test Loss: 0.0003565888910088688
# Test MAE: 0.014893828891217709
# 뉴런 64 -> 128

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Test Loss: 0.0003565888910088688
Test MAE: 0.014893828891217709


In [17]:
# 데이터 불러오기
data_train = pd.read_csv('train.csv')
data_test = pd.read_csv('test.csv')

# 입력과 출력 데이터 분할
X_train = data_train.drop(['FloodProbability', 'id'], axis=1).values  # 훈련 데이터 입력 특성
y_train = data_train['FloodProbability'].values  # 훈련 데이터 타겟 변수
X_test = data_test.drop(['id'], axis=1).values  # 테스트 데이터 입력 특성

# 데이터 정규화
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 모델 설계
model = Sequential()
model.add(Dense(64, activation='relu', input_shape=(X_train_scaled.shape[1],)))
model.add(Dense(32, activation='relu'))
model.add(Dense(1))

# 모델 컴파일
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# EarlyStopping 콜백 설정
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
model_checkpoint = ModelCheckpoint('best_model.h5', save_best_only=True, monitor='val_loss')

# 모델 훈련
history = model.fit(X_train_scaled, y_train, epochs=100, batch_size=64, validation_data=(X_val, y_val), callbacks=[early_stopping, model_checkpoint])

# 테스트 데이터의 타겟 변수 예측
y_pred = model.predict(X_test_scaled)

# 예측 결과를 데이터프레임으로 변환
submission_df = pd.DataFrame({'id': data_test['id'], 'FloodProbability': y_pred.flatten()})

submission_df.head()

# CSV 파일로 저장
submission_df.to_csv('submission_ann.csv', index=False)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100


Unnamed: 0,id,FloodProbability
0,1117957,0.576352
1,1117958,0.451702
2,1117959,0.454445
3,1117960,0.472365
4,1117961,0.47229


In [18]:

submission_df.to_csv('submission_ann.csv', index=False)

In [11]:
y_pred

array([[-0.0712075 ],
       [-0.07862947],
       [-0.16359659],
       ...,
       [ 0.00411901],
       [-0.00835801],
       [ 0.09335136]], dtype=float32)

In [3]:
data = pd.read_csv('train.csv') 

X = data.drop(['FloodProbability', 'id'], axis=1).values
y = data['FloodProbability'].values

# 데이터 정규화
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 훈련, 검증, 테스트 데이터 나누기
X_train, X_temp, y_train, y_temp = train_test_split(X_scaled, y, test_size=0.3, random_state=35)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=35)

# 모델 설계
# 은닉층 추가
model = Sequential()
model.add(Dense(128, activation='relu', input_shape=(X_train.shape[1],)))
model.add(Dropout(0.3))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(32, activation='relu'))
model.add(Dense(1))

# 모델 컴파일
optimizer = Adam(learning_rate=0.001)
model.compile(optimizer=optimizer, loss='mse', metrics=['mae'])

# 조기 종료 및 모델 체크포인트 콜백 설정
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
model_checkpoint = ModelCheckpoint('best_model.h5', save_best_only=True, monitor='val_loss')

# 모델 훈련
history = model.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_val, y_val), callbacks=[early_stopping, model_checkpoint])

# 가장 좋은 모델 로드
model.load_weights('best_model.h5')

# 모델 평가
loss, mae = model.evaluate(X_test, y_test)
print("Test Loss:", loss)
print("Test MAE:", mae)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Test Loss: 0.00044515429181046784
Test MAE: 0.016903633251786232


In [6]:
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import GridSearchCV

# 데이터 불러오기
data = pd.read_csv('train.csv') 

# 입력과 출력 데이터 나누기
X = data.drop(['FloodProbability','id'], axis=1).values
y = data['FloodProbability'].values

# 데이터 정규화
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 훈련, 검증, 테스트 데이터 나누기
X_train, X_temp, y_train, y_temp = train_test_split(X_scaled, y, test_size=0.3, random_state=35)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=35)

from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import GridSearchCV
from keras.optimizers import Adam, RMSprop

# 모델 생성 함수
def create_model(optimizer='adam'):
    model = Sequential()
    model.add(Dense(64, activation='relu', input_shape=(X_train.shape[1],)))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(1))
    model.compile(optimizer=optimizer, loss='mse', metrics=['mae'])
    return model

# KerasRegressor 모델 생성
model = KerasRegressor(build_fn=create_model, verbose=0)

# 하이퍼파라미터 그리드
param_grid = {'batch_size': [16, 32, 64],
              'epochs': [50, 100, 200],
              'optimizer': [Adam(), RMSprop()]}

# 그리드 서치
grid = GridSearchCV(estimator=model, param_grid=param_grid, scoring='neg_mean_squared_error', cv=3)
grid_result = grid.fit(X_train, y_train)

# 결과 출력
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

  model = KerasRegressor(build_fn=create_model, verbose=0)


In [None]:
# 데이터 불러오기
data_train = pd.read_csv('train.csv')
data_test = pd.read_csv('test.csv')

# 입력과 출력 데이터 분할
X_train = data_train.drop(['FloodProbability', 'id'], axis=1).values
y_train = data_train['FloodProbability'].values

X_test = data_test.drop(['id'], axis=1).values

# 데이터 정규화
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 모델 설계
model = Sequential()
model.add(Dense(64, activation='relu', input_shape=(X_train_scaled.shape[1],)))
model.add(Dense(32, activation='relu'))
model.add(Dense(1))

# 모델 컴파일
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# 모델 훈련
history = model.fit(X_train_scaled, y_train, epochs=50, batch_size=16, validation_split=0.2)

# 테스트 데이터의 타겟 변수 예측
y_pred = model.predict(X_test_scaled)

# 예측 결과를 데이터프레임으로 변환
submission_df = pd.DataFrame({'id': data_test['id'], 'FloodProbability': y_pred.flatten()})

# CSV 파일로 저장
submission_df.to_csv('submission3.csv', index=False)

In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential
from keras.layers import Dense
data_train = pd.read_csv('train.csv')
data_test = pd.read_csv('test.csv')

In [20]:
from keras.models import Sequential
from keras.layers import Dense


# 입력과 출력 데이터 나누기
X = data.drop(['FloodProbability','id'], axis=1).values
y = data['FloodProbability'].values

# 데이터 정규화
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 훈련, 검증, 테스트 데이터 나누기
X_train, X_temp, y_train, y_temp = train_test_split(X_scaled, y, test_size=0.3, random_state=35)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=35)

# 모델 설계
model = Sequential()
model.add(Dense(64, activation='relu', input_shape=(X_train.shape[1],)))
model.add(Dense(128, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(1))

# 모델 컴파일
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# 모델 훈련
history = model.fit(X_train, y_train, epochs=30, batch_size=32, validation_data=(X_val, y_val))

# 모델 평가
loss, mae = model.evaluate(X_test, y_test)
print("Test Loss:", loss)
print("Test MAE:", mae)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Test Loss: 0.0003681324888020754
Test MAE: 0.015425443649291992


In [3]:
# 입력과 출력 데이터 분할
X_train = data_train.drop(['FloodProbability', 'id'], axis=1).values
y_train = data_train['FloodProbability'].values

X_test = data_test.drop(['id'], axis=1).values

# 데이터 정규화
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 모델 설계
model = Sequential()
model.add(Dense(64, activation='relu', input_shape=(X_train_scaled.shape[1],)))
model.add(Dense(128, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(1))

# 모델 컴파일
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# 모델 훈련
history = model.fit(X_train_scaled, y_train, epochs=50, batch_size=32, validation_split=0.2)

# 테스트 데이터의 타겟 변수 예측
y_pred = model.predict(X_test_scaled)

# 예측 결과를 데이터프레임으로 변환
submission_df = pd.DataFrame({'id': data_test['id'], 'FloodProbability': y_pred.flatten()})

# CSV 파일로 저장
submission_df.to_csv('submission2.csv', index=False)


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
