<a href="https://colab.research.google.com/github/ianara01/Park-Sangjin/blob/master/Environment_TorchLSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 환경 요소 데이터를 읽고 전처리 (결측치처리)후 데이터 정규화
윈도우 분할
LSTM model 정의
Model 인스턴스 생성

손실함수와 최적화 알고리즘 생성

In [7]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from sklearn.preprocessing import MinMaxScaler

In [8]:
# 특정 CSV 파일 경로
csv_file_path = '/content/sample_data/2021_Env_Int.csv'

# CSV 파일 읽기
df = pd.read_csv(csv_file_path, encoding='cp949')


# 결측치 처리 함수 정의 (전처리)
def fill_missing_values(df, columns):
    for col in columns:
        accumulated_sum = 0
        count = 0
        for i in range(len(df)):
            if pd.isnull(df.loc[i, col]):
                if i == 0:
                    df.loc[i, col] = 0
                else:
                    df.loc[i, col] = accumulated_sum / count
            else:
                accumulated_sum += df.loc[i, col]
                count += 1
    return df
df = df.apply(pd.to_numeric, errors='coerce')
columns_to_fill = ['Temp_Ext', 'WindSpe_Ext', 'Insolation_Ext', 'CumInsolation_Ext', 'Temp_Int', 'Humidity_Int', 'Res_CO2']
filled_df = fill_missing_values(df, columns_to_fill)

In [9]:
# 데이터프레임을 NumPy 배열로 변환
numpy_array = filled_df.to_numpy()

print("Pandas DataFrame:")
print(filled_df)
print("\nNumPy Array:")
print(numpy_array)

Pandas DataFrame:
        State  County  Item  Cultivation  Farmer  Time   Temp_Ext  \
0         NaN     NaN   NaN            1       3   NaN   3.200000   
1         NaN     NaN   NaN            1       3   NaN   4.400000   
2         NaN     NaN   NaN            1       3   NaN   4.000000   
3         NaN     NaN   NaN            1       3   NaN   3.700000   
4         NaN     NaN   NaN            1       3   NaN   3.800000   
...       ...     ...   ...          ...     ...   ...        ...   
827075    NaN     NaN   NaN            1      18   NaN  12.234304   
827076    NaN     NaN   NaN            1      18   NaN  12.234304   
827077    NaN     NaN   NaN            1      18   NaN  12.234304   
827078    NaN     NaN   NaN            1      18   NaN  12.234304   
827079    NaN     NaN   NaN            1      18   NaN  12.234304   

        WindDir_Ext  WindSpe_Ext  Insolation_Ext  CumInsolation_Ext  RainDet  \
0               NaN     2.000000        1.000000         997.000000      

In [None]:
# 데이터 정규화
scaler = MinMaxScaler()
data_normalized = scaler.fit_transform(numpy_array.reshape(-1, 1))
data_normalized = torch.FloatTensor(data_normalized).view(-1)

# 시계열 데이터를 윈도우로 분할
window_size = 10
input_data = []
target_data = []
for i in range(len(data_normalized) - window_size):
    input_data.append(data_normalized[i:i+window_size])
    target_data.append(data_normalized[i+window_size])

input_data = torch.stack(input_data)
target_data = torch.stack(target_data)

# LSTM 모델 정의
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.fc(out[:, -1, :])
        return out

# 모델 인스턴스 생성
input_size = 1
hidden_size = 64
num_layers = 2
model = LSTMModel(input_size, hidden_size, num_layers)

# 손실 함수와 최적화 알고리즘 설정
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [None]:
num_epochs = 100
for epoch in range(num_epochs):
    outputs = model(input_data)
    optimizer.zero_grad()
    loss = criterion(outputs, target_data)
    loss.backward()
    optimizer.step()

    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

# 예측 결과 확인
with torch.no_grad():
    future = 50
    pred = data_normalized[-window_size:].tolist()
    for i in range(future):
        seq = torch.FloatTensor(pred[-window_size:])
        with torch.no_grad():
            pred.append(model(seq).item())

# 예측 결과 시각화
import matplotlib.pyplot as plt
plt.plot(data_normalized, label='Original Data')
plt.plot(np.arange(len(data_normalized), len(data_normalized) + future), pred[window_size:], label='Predicted Data')
plt.legend()
plt.show()