In [15]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Dense
from sklearn.metrics import mean_squared_error

file_path = "dataset/seoul_pm10.csv"
data = pd.read_csv(file_path, encoding="cp949")
data['date'] = pd.to_datetime(data["date"]) #데이트 타입으로 변환
data = data.sort_values(by='date') #날짜순으로 정렬
data = data.dropna(subset=['pm10']) #pm10 컬럼이랑 pm2.5컬럼 결측치 제거

#데이터 미리 분리(왜인지 모름)
target = 'pm10'
X = data[target].values

In [16]:
scaler = MinMaxScaler()#데이터 정규화
X_scaled = scaler.fit_transform(X.reshape(-1,1)) #2차원 배열 형태로 변환

In [17]:
#시계열 데이터 생성
def create_time_series_data(X, time_step=24):
    Xs,ys = [], []
    for i in range(len(X) - time_step):
        Xs.append(X[i:i + time_step])
        ys.append(X[i + time_step])
    return np.array(Xs), np.array(ys)

In [18]:
time_steps = 24 #하루기준
X_seq, y_seq = create_time_series_data(X_scaled, time_steps) #데이터 생성호출

In [19]:
#데이터 8:2 분할
X_train, X_test = X_seq[:int(len(X_seq) * 0.8)] , X_seq[int(len(X_seq) * 0.8):]
y_train, y_test = y_seq[:int(len(y_seq) * 0.8)] , y_seq[int(len(y_seq) * 0.8):]

In [20]:
#GRU 모델 정의
model = Sequential()
model.add(GRU(50, activation="tanh", input_shape=(time_steps, 1))) #gru 레이어 추가
model.add(Dense(1))

  super().__init__(**kwargs)


In [21]:
model.compile(optimizer='adam', loss='mse')

history = model.fit(X_train,y_train, epochs=20, batch_size=32, validation_data=(X_test,y_test), verbose=1)

Epoch 1/20
[1m5571/5571[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 4ms/step - loss: 2.2728e-04 - val_loss: 2.5586e-04
Epoch 2/20
[1m5571/5571[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 4ms/step - loss: 1.7294e-04 - val_loss: 2.4817e-04
Epoch 3/20
[1m5571/5571[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 4ms/step - loss: 1.7136e-04 - val_loss: 2.4989e-04
Epoch 4/20
[1m5571/5571[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 4ms/step - loss: 1.7066e-04 - val_loss: 2.5166e-04
Epoch 5/20
[1m5571/5571[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 4ms/step - loss: 1.6917e-04 - val_loss: 2.4967e-04
Epoch 6/20
[1m5571/5571[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 3ms/step - loss: 1.6788e-04 - val_loss: 2.4534e-04
Epoch 7/20
[1m5571/5571[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 4ms/step - loss: 1.6984e-04 - val_loss: 2.4802e-04
Epoch 8/20
[1m5571/5571[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 4ms

In [None]:
#평가 예측하기
y_pred = model.predict(X_test)
y_pred_inverse = scaler.inverse_transform(y_pred) #역정규화
y_test_inverse = scaler.inverse_transform(y_test.reshape(-1,1)) #실제값 역정규화

In [None]:
mse = mean_squared_error(y_test_inverse, y_pred_inverse)
rmse = np.sqrt(mean_squared_error(y_test_inverse, y_pred_inverse))
print(f"RMSE: {rmse:.2f}")
print(f"MSE: {mse:.2f}")

In [None]:
plt.figure(figsize=(14,6))
plt.plot(y_test_inverse[:100], label="실제 PM10 농도", color="blue")
plt.plot(y_pred_inverse[:100], label="예측 PM10 농도", color="red")
plt.title("PM 예측 결과 (GRU모델)")
plt.xlabel("샘플 (시간순서)")
plt.ylabel("pm10 농도")
plt.legend()
plt.grid()
plt.show()
