In [1]:
import os
import pandas as pd
import numpy as np

# read the files in the folder preprocessed_datasets
path = 'preprocessed_datasets'
files = os.listdir(path)
files = [file for file in files if file.endswith('.csv')]
print(files)

NO3_train = pd.read_csv('preprocessed_datasets/NO3_train.csv')
NO3_val = pd.read_csv('preprocessed_datasets/NO3_val.csv')
NO3_test = pd.read_csv('preprocessed_datasets/NO3_test.csv')

print(f"n NO3_train: {NO3_train.head()}")

['NO1_test.csv', 'NO1_train.csv', 'NO1_val.csv', 'NO2_test.csv', 'NO2_train.csv', 'NO2_val.csv', 'NO3_test.csv', 'NO3_train.csv', 'NO3_val.csv', 'NO4_test.csv', 'NO4_train.csv', 'NO4_val.csv', 'NO5_test.csv', 'NO5_train.csv', 'NO5_val.csv']
n NO3_train:                    timestamp  consumption  temperature  time_of_day  \
0  2017-05-02 00:00:00+00:00     0.227701     0.450877     0.000000   
1  2017-05-02 01:00:00+00:00     0.222187     0.450877     0.043478   
2  2017-05-02 02:00:00+00:00     0.226947     0.443860     0.086957   
3  2017-05-02 03:00:00+00:00     0.236823     0.431579     0.130435   
4  2017-05-02 04:00:00+00:00     0.287226     0.426316     0.173913   

   time_of_week  time_of_year  lag_24_hours  lag_1_hour  
0      0.166667      0.331507      0.246874    0.236276  
1      0.166667      0.331507      0.248087    0.227701  
2      0.166667      0.331507      0.248142    0.222187  
3      0.166667      0.331507      0.250252    0.226947  
4      0.166667      0.331507

In [2]:
NO3_train = NO3_train.drop('timestamp', axis=1)
NO3_val = NO3_val.drop('timestamp', axis=1)
NO3_test = NO3_test.drop('timestamp', axis=1)

# Separate features and target for the training set
NO3_train_features = NO3_train.drop('consumption', axis=1).to_numpy(dtype=np.float32)
NO3_train_targets = NO3_train['consumption'].to_numpy(dtype=np.float32)

# Separate features and target for the validation set
NO3_val_features = NO3_val.drop('consumption', axis=1).to_numpy(dtype=np.float32)
NO3_val_targets = NO3_val['consumption'].to_numpy(dtype=np.float32)

# Separate features and target for the test set
NO3_test_features = NO3_test.drop('consumption', axis=1).to_numpy(dtype=np.float32)
NO3_test_targets = NO3_test['consumption'].to_numpy(dtype=np.float32)

print(f"n NO3_train: {NO3_train.head()}")
print(f"NO3_train.shape: {NO3_train.shape}")

n NO3_train:    consumption  temperature  time_of_day  time_of_week  time_of_year  \
0     0.227701     0.450877     0.000000      0.166667      0.331507   
1     0.222187     0.450877     0.043478      0.166667      0.331507   
2     0.226947     0.443860     0.086957      0.166667      0.331507   
3     0.236823     0.431579     0.130435      0.166667      0.331507   
4     0.287226     0.426316     0.173913      0.166667      0.331507   

   lag_24_hours  lag_1_hour  
0      0.246874    0.236276  
1      0.248087    0.227701  
2      0.248142    0.222187  
3      0.250252    0.226947  
4      0.268047    0.236823  
NO3_train.shape: (40920, 7)


## Feed-forward neural network

In [4]:
# Create a nn with TENSORFLOW to train on the train set, validate on the validation set and test on the test set
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

print(tf.__version__)

# Create the neural network
model = keras.Sequential([
    layers.Dense(10, activation='relu', input_shape=[6]),
    layers.Dense(10, activation='relu'),
    layers.Dense(1)
])

# Compile the model
model.compile(optimizer='sgd', loss='mean_squared_error')

# Train the model
model.fit(NO3_train_features, NO3_train_targets, epochs=500, validation_data=(NO3_val_features, NO3_val_targets))

# Validate the model
val_loss = model.evaluate(NO3_val_features, NO3_val_targets)
print(f"Validation loss: {val_loss}")

# Test the model
test_loss = model.evaluate(NO3_test_features, NO3_test_targets)
print(f"Test loss: {test_loss}")

# save the model
model.save('my_model.keras')
print("Model saved")

2.16.0-rc0
Epoch 1/500
[1m1279/1279[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 507us/step - loss: 0.0443 - val_loss: 0.0029
Epoch 2/500
[1m1279/1279[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 446us/step - loss: 0.0024 - val_loss: 0.0011
Epoch 3/500
[1m1279/1279[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 430us/step - loss: 0.0011 - val_loss: 8.2657e-04
Epoch 4/500
[1m1279/1279[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 428us/step - loss: 8.4879e-04 - val_loss: 7.4053e-04
Epoch 5/500
[1m1279/1279[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 436us/step - loss: 7.7439e-04 - val_loss: 6.7397e-04
Epoch 6/500
[1m1279/1279[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 429us/step - loss: 7.1048e-04 - val_loss: 6.4549e-04
Epoch 7/500
[1m1279/1279[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 415us/step - loss: 6.7500e-04 - val_loss: 6.1264e-04
Epoch 8/500
[1m1279/1279[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0