In [None]:
from google.colab import drive
import tensorflow as tf
import pandas as pd
from sklearn.preprocessing import LabelEncoder, RobustScaler
import numpy as np
import joblib
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [None]:
#scaler path
all_scaler_path = "/content/gdrive/MyDrive/scaler/all scaler.gz"
wind_encoder_path = "/content/gdrive/MyDrive/scaler/wind encoder.gz"
co_scaler_path = "/content/gdrive/MyDrive/scaler/co scaler.gz"
pm10_scaler_path = "/content/gdrive/MyDrive/scaler/pm10 scaler.gz"
o3_scaler_path = "/content/gdrive/MyDrive/scaler/o3 scaler.gz"
no2_scaler_path = "/content/gdrive/MyDrive/scaler/no2 scaler.gz"
so2_scaler_path = "/content/gdrive/MyDrive/scaler/so2 scaler.gz"

#model path
co_model_path = "/content/gdrive/MyDrive/Model/co_model.h5"
no2_model_path = "/content/gdrive/MyDrive/Model/no2_model.h5"
o3_model_path = "/content/gdrive/MyDrive/Model/o3_model.h5"
pm10_model_path = "/content/gdrive/MyDrive/Model/pm10_model.h5"
so2_model_path = "/content/gdrive/MyDrive/Model/so2_model.h5"

#load model
pm10_model= tf.keras.models.load_model(pm10_model_path)
so2_model = tf.keras.models.load_model(so2_model_path)
co_model = tf.keras.models.load_model(co_model_path)
o3_model = tf.keras.models.load_model(o3_model_path)
no2_model = tf.keras.models.load_model(no2_model_path)

#load
encoder = joblib.load(wind_encoder_path)
scaler = joblib.load(all_scaler_path)
co_scaler = joblib.load(co_scaler_path)
no2_scaler = joblib.load(no2_scaler_path)
o3_scaler = joblib.load(o3_scaler_path)
pm10_scaler = joblib.load(pm10_scaler_path)
so2_scaler = joblib.load(so2_scaler_path)

In [None]:
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
	n_vars = 1 if type(data) is list else data.shape[1]
	df = pd.DataFrame(data)
	cols, names = list(), list()
	# input sequence (t-n, ... t-1)
	for i in range(n_in, 0, -1):
		cols.append(df.shift(i))
		names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
	# forecast sequence (t, t+1, ... t+n)
	for i in range(0, n_out):
		cols.append(df.shift(-i))
		if i == 0:
			names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
		else:
			names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
	# put it all together
	agg = pd.concat(cols, axis=1)
	agg.columns = names
	# drop rows with NaN values
	if dropnan:
		agg.dropna(inplace=True)
	return agg

In [None]:
def conc_predict(gas_name, scaled_data):
  if gas_name == "pm10":
    y_pred  = pm10_model.predict(scaled_data)
    inv_ypred = pm10_scaler.inverse_transform(y_pred[:,0].reshape(-1, 1))
    return inv_ypred

  elif gas_name == "so2":
    y_pred = so2_model.predict(scaled_data)
    inv_ypred = so2_scaler.inverse_transform(y_pred)
    return inv_ypred

  elif gas_name == "co":
    y_pred = co_model.predict(scaled_data)
    inv_ypred = co_scaler.inverse_transform(y_pred)
    return inv_ypred

  elif gas_name == "o3":
    y_pred = o3_model.predict(scaled_data)
    inv_ypred = o3_scaler.inverse_transform(y_pred)
    return inv_ypred
  
  elif gas_name == "no2":
    y_pred = no2_model.predict(scaled_data)
    inv_ypred = no2_scaler.inverse_transform(y_pred)
    return inv_ypred

In [None]:
dataset = pd.read_csv("/content/gdrive/MyDrive/dokumentasi/dataset_full.csv", index_col=0)
dataset

Unnamed: 0_level_0,Temperature,Humidity,Precipitation,Wind Speed,Wind Direction,pm10,so2,co,o3,no2
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
01-01-2010,28.700000,75.000000,0.0,4.000000,W,60.0,4.0,73.0,27.0,14.0
02-01-2010,28.700000,73.000000,6.6,2.000000,NW,32.0,2.0,16.0,33.0,9.0
03-01-2010,28.800000,72.000000,0.0,3.000000,NE,27.0,2.0,19.0,20.0,9.0
04-01-2010,29.900000,70.000000,0.0,2.000000,SE,22.0,2.0,16.0,15.0,6.0
05-01-2010,28.500000,70.000000,0.4,3.000000,E,25.0,2.0,17.0,15.0,8.0
...,...,...,...,...,...,...,...,...,...,...
27-12-2020,29.400000,70.000000,0.0,3.000000,NW,44.0,26.0,44.0,48.0,8.0
28-12-2020,29.100000,70.000000,0.0,3.000000,W,45.0,27.0,35.0,53.0,17.0
29-12-2020,28.300000,73.000000,0.0,2.000000,C,26.0,23.0,27.0,45.0,19.0
30-12-2020,28.000000,75.000000,0.0,2.000000,W,39.0,22.0,21.0,54.0,7.0


In [None]:
values = dataset.values
# integer encode direction
encoder = LabelEncoder()
encoder = encoder.fit(values[:,4])
values[:,4] = encoder.transform(values[:,4])
# ensure all data is float
values = values.astype('float32')
# normalize features
scaler = RobustScaler()
scaler = scaler.fit(values)
scaled = scaler.transform(values)

In [None]:
param = {"pm10":[-1,-2,-3,-4, -6, -7, -8,-9,-10], 
         "so2":[-1,-2,-3,-5, -6, -7, -8,-9,-10], 
         "co":[-1,-2,-4,-5, -6, -7, -8,-9,-10], 
         "o3":[-1, -3,-4,-5, -6, -7, -8,-9,-10],
         "no2":[-2,-3,-4,-5, -6, -7, -8,-9,-10]}

predicted_conc = {}
for gas_name in param.keys():
  print(f"predict: {gas_name}")
  n_days = 7
  n_features = 10
  # frame as supervised learning
  df = dataset[[gas_name]]
  num = df.values
  # ensure all data is float
  num = num.astype('float32')
  # normalize features
  scale = RobustScaler()
  scale = scale.fit(num)
  result = scale.transform(num)

  reframed = series_to_supervised(scaled, n_days, 1)
  reframed.drop(reframed.columns[param[gas_name]], axis=1, inplace=True)
  print(reframed.shape)
  # split into train and test sets
  values = reframed.values
  n_train_days = 365*10 
  test = values[n_train_days:, :]
  # split into input and outputs
  n_obs = n_days * n_features
  test_X, test_y = test[:, :n_obs], test[:, -1:]
  # reshape input to be 3D [samples, timesteps, features]
  test_X = test_X.reshape((test_X.shape[0], n_days, n_features))
  print(test_X.shape, test_y.shape)
  reframed.head()
  predicted_conc[gas_name] = conc_predict(gas_name, test_X)

predict: pm10
(4006, 71)
(356, 7, 10) (356, 1)
predict: so2
(4006, 71)
(356, 7, 10) (356, 1)
predict: co
(4006, 71)
(356, 7, 10) (356, 1)
predict: o3
(4006, 71)
(356, 7, 10) (356, 1)
predict: no2
(4006, 71)
(356, 7, 10) (356, 1)


In [None]:
max_conc = []
for x in range(356):
  temp = [predicted_conc["pm10"][x], predicted_conc["so2"][x], predicted_conc["co"][x], predicted_conc["o3"][x], predicted_conc["no2"][x]]
  idx = temp.index(max(temp))
  if idx == 0:
    max_conc.append("pm10")
  elif idx == 1:
    max_conc.append("so2")
  elif idx == 2:
    max_conc.append("co")
  elif idx == 3:
    max_conc.append("o3")
  elif idx == 4:
    max_conc.append("no2")


In [None]:
actual_conc = dataset[["pm10", "so2", "co", "o3", "no2"]][n_train_days+7:].values
actual_max = []
for x in actual_conc.tolist():
  idx = x.index(max(x))
  if idx == 0:
    actual_max.append("pm10")
  elif idx == 1:
    actual_max.append("so2")
  elif idx == 2:
    actual_max.append("co")
  elif idx == 3:
    actual_max.append("o3")
  elif idx == 4:
    actual_max.append("no2")

In [None]:
correct = 0
wrong = 0
for act, pred in zip(actual_max, max_conc):
  if act == pred:
    correct +=1
  else:
    wrong += 1

print(f"Accuracy {(correct)/(correct+wrong)*100}")

Accuracy 66.01123595505618
