<a href="https://colab.research.google.com/github/YMGYM/TSE_Learning/blob/master/Beijing_air_plollution_2(final).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Introduction

이 파일은 황철현, 신강욱의
`미세먼지 예측 성능 개선을 위한 CNN-LSTM 결합 방법`
논문의 구현 연습 파일입니다.

데이터셋은 [Beijing PM2.5 데이터셋](https://www.kaggle.com/djhavera/beijing-pm25-data-data-set)
을 사용했습니다.

# Import All

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import tensorflow.keras as K
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

# Load Data

In [3]:
! unzip /content/drive/My\ Drive/Datasets/beijing_air.zip -d data

Archive:  /content/drive/My Drive/Datasets/beijing_air.zip
  inflating: data/PRSA_data_2010.1.1-2014.12.31.csv  


In [7]:
def get_data():
  all_data = pd.read_csv('/content/data/PRSA_data_2010.1.1-2014.12.31.csv') # 전체 데이터
  dropped_data = all_data.drop(['No', 'year', 'month', 'day', 'hour'],axis=1) # 필요 없는 데이터는 버림
  pm25 = dropped_data.pop('pm2.5') # 미세먼지 데이터 확인

  return pm25, dropped_data

In [8]:
pm25, proxy = get_data()

# NaN Data fix

In [9]:
pm25 = pm25.fillna(method='pad')

# Make Normalize Dataset

In [13]:
class PmScaler:
  def __init__(self):
    self.scaler = MinMaxScaler()
  
  def make_norlized_dataset(self, x, rate): 
    arrlen = int(len(x) * (rate))
    if isinstance(x, type(np.array([]))) == False:
      reshaped = x.to_numpy().reshape(-1,1)
    else:
      reshaped = x.reshape(-1,1)
    scaled_data = self.scaler.fit_transform(reshaped)

    train, val, test = scaled_data[:-1 * (arrlen * 2)], scaled_data[-1 * (arrlen * 2) : -1 * (arrlen)], scaled_data[-1 * (arrlen):]

    return train, val, test
  
  def invert_scale(self, x):
    inverse = self.scaler.inverse_transform(x)
    return inverse

In [14]:
scaler = PmScaler()
lstm_x_train, lstm_x_val, lstm_x_test = scaler.make_norlized_dataset(pm25, 0.1)

# Entire Model

In [None]:
class EntireModel():
  def __init__(self, pm25, proxydata, pmScaler, proxyScaler):
    self.cnn_model = _get_cnn_model()
    self.lstm_model = _get_lstm_model()
    self.pm25 = pm25
    self.proxydata = proxydata
    self.lstm_train_data_gen = K.preprocessing.sequence.TimeseriesGenerator(lstm_x_train, lstm_x_train, length=15, shuffle=True)
    self.lstm_val_data_gen = K.preprocessing.sequence.TimeseriesGenerator(lstm_x_val, lstm_x_val, length=15, shuffle=True)
    self.lstm_test_data_gen = K.preprocessing.sequence.TimeseriesGenerator(lstm_x_test, lstm_x_test, length=15, batch_size = 1, shuffle=False)
    self.lstm_callbacks = [K.callbacks.TensorBoard(log_dir='lstm_logs')]
    self.cnn_callbacks = [K.callbacks.TensorBoard(log_dir='cnn_logs')]

  def cnn_model_fit(self epochs=1):
    ...
    self.cnn_model.fit()

  def lstm_model_fit(self, epochs=1):

    self.lstm_model.fit()


  def _get_cnn_model(self):
    cnnModel = K.Sequential()
    cnnModel.add(K.layers.Conv2DTranspose(32, (2,2), input_shape=(1,x_train.shape[1],x_train.shape[2]), activation="relu"))
    cnnModel.add(K.layers.MaxPool2D(strides=2))
    cnnModel.add(K.layers.Flatten())
    cnnModel.add(K.layers.Dropout(0.1))
    cnnModel.add(K.layers.Dense(100, activation="relu"))
    cnnModel.add(K.layers.ReLU())
    cnnModel.add(K.layers.Dense(5, activation="softmax"))
    cnnModel.summary()
    cnnModel.compile(optimizer="adam", loss="MSE")

    return cnnModel

  def _get_lstm_model(self):
    lstm_model = K.Sequential()
    lstm_model.add(K.layers.LSTM(216, input_shape=(16,1)))
    lstm_model.add(K.layers.Dropout(0.3))
    lstm_model.add(K.layers.Dense(128, activation="relu"))
    lstm_model.add(K.layers.Dropout(0.3))
    lstm_model.add(K.layers.Dense(1, activation="sigmoid"))
    lstm_model.summary()
    lstm_model.compile(optimizer="adam", loss="MSE")

    return lstm_model




        