<a href="https://colab.research.google.com/github/ele9996/Temperature-and-Humidity-forecasting/blob/main/main.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Temperature and humidity forecasting

In [144]:
import tensorflow as tf
import numpy as np
from tensorflow import keras
import pandas as pd
import matplotlib.pyplot as plt
import argparse
import os
from zipfile import ZipFile

In [145]:
seed = 42
tf.random.set_seed(seed)
np.random.seed(seed)

In [146]:
#parser = argparse.ArgumentParser()
#parser.add_argument('--model', type=str, required=True, help='model name')
#parser.add_argument('--labels', type=int, required=True, help='model output')
#args = parser.parse_args()

#per prova
label_to_predict= 0
model_name= "my_model"

##Import Dataset

In [147]:
uri = "https://storage.googleapis.com/tensorflow/tf-keras-datasets/jena_climate_2009_2016.csv.zip"
zip_path = keras.utils.get_file(origin=uri, fname="jena_climate_2009_2016.csv.zip")
zip_file = ZipFile(zip_path)
zip_file.extractall()
csv_path = "jena_climate_2009_2016.csv"

df = pd.read_csv(csv_path)
df

Unnamed: 0,Date Time,p (mbar),T (degC),Tpot (K),Tdew (degC),rh (%),VPmax (mbar),VPact (mbar),VPdef (mbar),sh (g/kg),H2OC (mmol/mol),rho (g/m**3),wv (m/s),max. wv (m/s),wd (deg)
0,01.01.2009 00:10:00,996.52,-8.02,265.40,-8.90,93.30,3.33,3.11,0.22,1.94,3.12,1307.75,1.03,1.75,152.3
1,01.01.2009 00:20:00,996.57,-8.41,265.01,-9.28,93.40,3.23,3.02,0.21,1.89,3.03,1309.80,0.72,1.50,136.1
2,01.01.2009 00:30:00,996.53,-8.51,264.91,-9.31,93.90,3.21,3.01,0.20,1.88,3.02,1310.24,0.19,0.63,171.6
3,01.01.2009 00:40:00,996.51,-8.31,265.12,-9.07,94.20,3.26,3.07,0.19,1.92,3.08,1309.19,0.34,0.50,198.0
4,01.01.2009 00:50:00,996.51,-8.27,265.15,-9.04,94.10,3.27,3.08,0.19,1.92,3.09,1309.00,0.32,0.63,214.3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
420546,31.12.2016 23:20:00,1000.07,-4.05,269.10,-8.13,73.10,4.52,3.30,1.22,2.06,3.30,1292.98,0.67,1.52,240.0
420547,31.12.2016 23:30:00,999.93,-3.35,269.81,-8.06,69.71,4.77,3.32,1.44,2.07,3.32,1289.44,1.14,1.92,234.3
420548,31.12.2016 23:40:00,999.82,-3.16,270.01,-8.21,67.91,4.84,3.28,1.55,2.05,3.28,1288.39,1.08,2.00,215.2
420549,31.12.2016 23:50:00,999.81,-4.23,268.94,-8.53,71.80,4.46,3.20,1.26,1.99,3.20,1293.56,1.49,2.16,225.8


In [148]:
selected_feat=['T (degC)','rh (%)']
selected_df=df[selected_feat]    #df.loc[:,selected_feat].values

selected_df

Unnamed: 0,T (degC),rh (%)
0,-8.02,93.30
1,-8.41,93.40
2,-8.51,93.90
3,-8.31,94.20
4,-8.27,94.10
...,...,...
420546,-4.05,73.10
420547,-3.35,69.71
420548,-3.16,67.91
420549,-4.23,71.80


In [149]:
#Data conversion to a 32-bit float numpy array

selected_array=selected_df.to_numpy(dtype=np.float32)   #.astype(np.float32)
selected_array

array([[-8.02, 93.3 ],
       [-8.41, 93.4 ],
       [-8.51, 93.9 ],
       ...,
       [-3.16, 67.91],
       [-4.23, 71.8 ],
       [-4.82, 75.7 ]], dtype=float32)

In [150]:
#I define training (70%),validation (10%) and test data set (20%)

n=len(selected_array)
train_data=selected_array[0:int(n*0.7)]
val_data=selected_array[int(n*0.7):int(n*0.8)]
test_data = selected_array[int(n*0.8):]


In [151]:
#I define mean and standard deviation (for normalization)
mean = train_data.mean(axis=0)
std = train_data.std(axis=0)

#I define the size of the window and Label Options (which is a number)
#LABEL_OPTIONS= is a number used to make my model understand if I'm predicting only temperature, only humidity or humidity and temperature together.
#              is 0 for temperature, 1 for humidity, 2 for both

input_width = 6
LABEL_OPTIONS =label_to_predict
#LABEL_OPTIONS = args.labels


##Window Generation


In [152]:
#features: temperature, humidity (x6 values)
#one temperature value (the one corresponding to the next time interval)


class WindowGenerator:
    def __init__(self, input_width, label_options, mean, std):
        self.input_width = input_width
        self.label_options = label_options
        self.mean = tf.reshape(tf.convert_to_tensor(mean), [1, 1, 2])
        self.std = tf.reshape(tf.convert_to_tensor(std), [1, 1, 2])

    def split_window(self, features):
        input_indeces = np.arange(self.input_width)
        inputs = features[:, :-1, :]

        if self.label_options < 2:
            labels = features[:, -1, self.label_options]
            labels = tf.expand_dims(labels, -1)
            num_labels = 1
        else:
            labels = features[:, -1, :]
            num_labels = 2

        inputs.set_shape([None, self.input_width, 2])
        labels.set_shape([None, num_labels])

        return inputs, labels

    def normalize(self, features):
        features = (features - self.mean) / (self.std + 1.e-6)

        return features

    def preprocess(self, features):
        inputs, labels = self.split_window(features)
        inputs = self.normalize(inputs)

        return inputs, labels

    def make_dataset(self, data, train):
        ds = tf.keras.preprocessing.timeseries_dataset_from_array(
                data=data,
                targets=None,
                sequence_length=input_width+1,
                sequence_stride=1,
                batch_size=32)
        ds = ds.map(self.preprocess)
        ds = ds.cache()
        if train is True:
            ds = ds.shuffle(100, reshuffle_each_iteration=True)

        return ds

In [153]:
generator = WindowGenerator(input_width, LABEL_OPTIONS, mean, std)
train_ds = generator.make_dataset(train_data, True)
val_ds = generator.make_dataset(val_data, False)
test_ds = generator.make_dataset(test_data, False)




##Dataset Generation

In [157]:
# split datasets into input (X) and output (y) variables

#train
for x_train,y_train in train_ds.take(1):
  print(x_train.shape)
  print(y_train.shape)


#validation
for x_val,y_val in val_ds.take(1):
  print(x_val.shape)
  print(y_val.shape)


#test
for x_test,y_test in test_ds.take(1):
  print(x_test.shape)
  print(y_test.shape)


(32, 6, 2)
(32, 1)
(32, 6, 2)
(32, 1)
(32, 6, 2)
(32, 1)


In [155]:
#CNN_1D Model definition

cnn_1d_model = keras.Sequential([
    keras.layers.Conv1D(filters=64,kernel_size=3, activation='relu'),
    keras.layers.Flatten(),
    keras.layers.Dense(units=64, activation='relu'),
    keras.layers.Dense(units=1)
])

In [156]:
#LSTM Model definition

lstm_model = keras.Sequential([
    keras.layers.LSTM(units=64),
    keras.layers.Flatten(),
    keras.layers.Dense(units=1)
])

##MPL Model

In [154]:
#MLP Model definition
mlp_model = keras.Sequential([
    keras.layers.Flatten(),
    keras.layers.Dense(units=128, activation='relu'),
    keras.layers.Dense(units=128, activation='relu'),
    keras.layers.Dense(units=1)
])

In [158]:
#Training, evaluation and testing with mlp model

mlp_model.compile(loss='mse', optimizer='adam', metrics=['mse'])
mlp_model.fit(x_train, y_train, epochs=20)


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7fe17bd38f60>

In [159]:
#model evaluation on validation set

mlp_model.compile(loss='mae', optimizer='adam')
val_loss= mlp_model.evaluate(x_val, y_val) 



In [160]:
#test the model
#model evaluation on validation set

mlp_model.compile(loss='mae', optimizer='adam')
test_loss= mlp_model.evaluate(x_test, y_test) 





In [161]:
#check 
predictions = mlp_model.predict(x_test)
# summarize the first 5 cases
for i in range(5):
	print('%s => %d (expected %d)' % (x_test[i].numpy().tolist(), predictions[i], y_test[i]))

[[0.9651238322257996, -0.9859264492988586], [0.9535689353942871, -0.9665994048118591], [0.9293032288551331, -0.9146581292152405], [0.9027267098426819, -0.8609045147895813], [0.8761501312255859, -0.7860122323036194], [0.8553510308265686, -0.7286349534988403]] => 0 (expected 16)
[[0.9535689353942871, -0.9665994048118591], [0.9293032288551331, -0.9146581292152405], [0.9027267098426819, -0.8609045147895813], [0.8761501312255859, -0.7860122323036194], [0.8553510308265686, -0.7286349534988403], [0.8276189565658569, -0.6501188278198242]] => 0 (expected 16)
[[0.9293032288551331, -0.9146581292152405], [0.9027267098426819, -0.8609045147895813], [0.8761501312255859, -0.7860122323036194], [0.8553510308265686, -0.7286349534988403], [0.8276189565658569, -0.6501188278198242], [0.8068197965621948, -0.5909294486045837]] => 0 (expected 15)
[[0.9027267098426819, -0.8609045147895813], [0.8761501312255859, -0.7860122323036194], [0.8553510308265686, -0.7286349534988403], [0.8276189565658569, -0.650118827819