In [1]:
import pandas as pd
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, LSTM
from keras.optimizers import Adam
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_percentage_error
from math import sqrt

# 데이터 로드
df = pd.read_csv('/content/sst_data.csv')

# 위도와 경도를 기반으로 데이터 그룹화 및 평균화
df_grouped = df.groupby(['latitude', 'longitude', 'time']).mean().reset_index()

In [2]:
# 데이터 스케일링
scaler = MinMaxScaler(feature_range=(-1, 1))
df_grouped['scaled_sst'] = scaler.fit_transform(df_grouped['sst'].values.reshape(-1,1))

# 학습 데이터셋 생성
def create_dataset(df, seq_len):
    data = []
    for i in range(len(df)-seq_len):
        data.append(df[i:i+seq_len])
    return np.array(data)

seq_len = 15
data = create_dataset(df_grouped['scaled_sst'], seq_len)
train_set_size = int(np.round(0.9*data.shape[0]))
train_set = data[:train_set_size]
test_set = data[train_set_size:]

X_train = train_set[:,:-1]
y_train = train_set[:,-1]
X_test = test_set[:,:-1]
y_test = test_set[:,-1]

X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

In [3]:
# LSTM 모델 정의
model = Sequential()
model.add(LSTM(100, input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=True, activation='relu'))
model.add(LSTM(50, return_sequences=False, activation='relu'))
model.add(Dense(1))

# Adam 옵티마이저 생성
adam = Adam(learning_rate=0.01)

# 모델 컴파일
model.compile(loss='mean_squared_error', optimizer=adam)

# 모델 학습
model.fit(X_train, y_train, epochs=100, batch_size=64)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.src.callbacks.History at 0x7adfecec7e80>

In [4]:
# 예측
y_pred_train = model.predict(X_train)
y_pred_test = model.predict(X_test)

# 스케일링 되돌리기
y_train_inv = scaler.inverse_transform(y_train.reshape(1, -1))
y_pred_train_inv = scaler.inverse_transform(y_pred_train)
y_test_inv = scaler.inverse_transform(y_test.reshape(1, -1))
y_pred_test_inv = scaler.inverse_transform(y_pred_test)

# 평가 지표 계산
train_rmse = sqrt(mean_squared_error(y_train_inv.flatten(), y_pred_train_inv.flatten()))
test_rmse = sqrt(mean_squared_error(y_test_inv.flatten(), y_pred_test_inv.flatten()))

train_r2 = r2_score(y_train_inv.flatten(), y_pred_train_inv.flatten())
test_r2 = r2_score(y_test_inv.flatten(), y_pred_test_inv.flatten())

train_mape = mean_absolute_percentage_error(y_train_inv.flatten(), y_pred_train_inv.flatten())
test_mape = mean_absolute_percentage_error(y_test_inv.flatten(), y_pred_test_inv.flatten())

print('Train RMSE: ', train_rmse)
print('Test RMSE: ', test_rmse)

print('Train R^2 Score: ', train_r2)
print('Test R^2 Score: ', test_r2)

print('Train MAPE: ', train_mape)
print('Test MAPE: ', test_mape)

Train RMSE:  0.32000464455083266
Test RMSE:  0.3374520311539128
Train R^2 Score:  0.9957097815653033
Test R^2 Score:  0.9942776913648289
Train MAPE:  0.011623423149717153
Test MAPE:  0.012519059993342095


In [15]:
# 'time' 열을 datetime 형태로 변환
df_grouped['time'] = pd.to_datetime(df_grouped['time'])

# 2023년 7월부터 9월까지의 데이터 선택
df_2023_summer = df_grouped[(df_grouped['time'].dt.year == 2023) & (df_grouped['time'].dt.month >= 7) & (df_grouped['time'].dt.month <= 9)]

# 'sst' 데이터 스케일링
df_2023_summer['scaled_sst'] = scaler.transform(df_2023_summer['sst'].values.reshape(-1,1))

# 데이터셋 생성
data_2023_summer = create_dataset(df_2023_summer['scaled_sst'], seq_len)

# 데이터 형태 변환
X_2023_summer = np.reshape(data_2023_summer, (data_2023_summer.shape[0], data_2023_summer.shape[1], 1))

# 예측
y_pred_2023_summer = model.predict(X_2023_summer)

# 스케일링 되돌리기
y_pred_2023_summer_inv = scaler.inverse_transform(y_pred_2023_summer)

# 실제 값 추출
y_true_2023_summer = df_2023_summer['sst'].values[seq_len:]

# 평가 지표 계산
rmse = sqrt(mean_squared_error(y_true_2023_summer, y_pred_2023_summer_inv.flatten()))
r2 = r2_score(y_true_2023_summer, y_pred_2023_summer_inv.flatten())
mape = mean_absolute_percentage_error(y_true_2023_summer, y_pred_2023_summer_inv.flatten())

# 결과 출력
print('RMSE: ', rmse)
print('R^2 Score: ', r2)
print('MAPE: ', mape)

# 예측 결과를 DataFrame으로 변환
df_result = pd.DataFrame(y_pred_2023_summer_inv, columns=['Predicted_SST'])

# 'time', 'latitude', 'longitude' 열 추가
df_result['time'] = df_2023_summer['time'].values[seq_len:]
df_result['latitude'] = df_2023_summer['latitude'].values[seq_len:]
df_result['longitude'] = df_2023_summer['longitude'].values[seq_len:]

# 결과를 CSV 파일로 저장
df_result.to_csv('2023_summer_sst_prediction.csv', index=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_2023_summer['scaled_sst'] = scaler.transform(df_2023_summer['sst'].values.reshape(-1,1))


RMSE:  0.42062118939626403
R^2 Score:  0.939473367042511
MAPE:  0.010449106097144832


In [16]:
model.save('my_model.h5')

  saving_api.save_model(
