In [1]:
import pandas as pd
import math
import numpy as np
from timeit import default_timer as timer
import tensorflow as tf

print("tensorflow version:",tf.__version__)

tensorflow version: 2.4.1


In [2]:
# Set seed
seed=1920
np.random.seed(seed)
tf.random.set_seed(seed)

In [3]:
# Using dataframe index to choose stations
id_index = 0

# Import station data from csv
stations = [70351, 70217, 65103, 66194, 68192, 75041, 66037, 63291,
 73138, 51049, 62100, 58198, 67113, 61078, 61363, 69148,
 61287, 51161, 74148, 58208, 66161, 62101, 47048, 65068,
 69139, 59007, 58214, 60141, 68262, 66137, 58012, 75019,
 56238, 63292, 49000, 67105, 63303, 58077, 68257, 66212,
 55202, 68242, 74258, 65111, 58212, 70330, 48245, 54038,
 72160, 72162, 72161, 50017, 60139, 61375, 68072, 68239,
 61425, 46012, 64017, 69128, 68228, 67108, 69137, 52088,
 61392, 67119, 55325, 61055, 50137, 69138, 61366, 65070,
 61260, 69147, 68241]

In [4]:
# Read one preprocessed station as training data
train_str = "../BOM/spatial_pre/" + str(stations[id_index]) + "_ann_train.csv"
train_df = pd.read_csv(train_str)
# Remove rows with NaN data
train_df.dropna(inplace=True)
train_df.reset_index(drop=True, inplace=True)
train_df.head()

Unnamed: 0,temperature,dew point,RH,Nwind,Ewind,MinTemp
0,15.8,15.0,95.0,8.375461,-4.267511,15.5
1,15.8,15.0,95.0,8.140639,-4.7,15.5
2,15.7,14.9,95.0,7.700029,-5.391619,15.5
3,15.6,14.8,95.0,5.906309,-4.782835,15.5
4,15.5,14.5,94.0,6.148529,-4.467168,15.5


In [5]:
# Randomly allocate data to training (80%) and testing (20%) sets
train_dataset = train_df.sample(frac=0.8,random_state=seed)

# From clean_df remove the data that is chosen as training dataset to from the testing dataset
test_dataset = train_df.drop(train_dataset.index)

train_dataset.head()

Unnamed: 0,temperature,dew point,RH,Nwind,Ewind,MinTemp
284857,3.5,1.5,87.0,3.211899,6.887939,0.9
434485,23.3,10.4,44.0,-21.34655,22.891371,20.5
504814,29.6,16.8,46.0,-8.221425,4.55721,29.0
393705,13.6,2.5,47.0,-18.26563,9.306805,13.4
518265,16.7,12.5,76.0,-3.049371e-15,-16.6,16.1


In [6]:
# Extract the MinTemp Series as labels (Expected Outputs)
train_labels = train_dataset.pop('MinTemp')
test_labels = test_dataset.pop('MinTemp')

# Show the first 5 rows of the training dataset
train_dataset.head()

Unnamed: 0,temperature,dew point,RH,Nwind,Ewind
284857,3.5,1.5,87.0,3.211899,6.887939
434485,23.3,10.4,44.0,-21.34655,22.891371
504814,29.6,16.8,46.0,-8.221425,4.55721
393705,13.6,2.5,47.0,-18.26563,9.306805
518265,16.7,12.5,76.0,-3.049371e-15,-16.6


In [7]:
# Define a Neural Network
def build_model():
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(5, activation='relu', input_shape=[len(train_dataset.keys())]),
        tf.keras.layers.Dense(7, activation='relu'),
        tf.keras.layers.Dense(1, activation='linear')
    ])
    
    model.compile(optimizer='adam', loss='mse', metrics=['mse'])
    return model

model = build_model()

# View model summary
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 10)                60        
_________________________________________________________________
dense_1 (Dense)              (None, 14)                154       
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 15        
Total params: 229
Trainable params: 229
Non-trainable params: 0
_________________________________________________________________


In [8]:
# Set callback to record training time
class TimingCallback(tf.keras.callbacks.Callback):
    def __init__(self, logs={}):
        self.logs=[]
    def on_epoch_begin(self, epoch, logs={}):
        self.starttime = timer()
    def on_epoch_end(self, epoch, logs={}):
        self.logs.append(timer()-self.starttime)
        
timetaken = TimingCallback()

# Training for 50 epoches to fit model parameters 80% training and 20% for validation
history = model.fit(train_dataset.to_numpy(), train_labels.to_numpy(), batch_size=64, epochs=100, validation_split = 0.2, callbacks = [timetaken])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [9]:
# Save history
hist_pd = pd.DataFrame(history.history)
hist_pd['epoch'] = np.add(history.epoch, 1).tolist()
hist_pd

Unnamed: 0,loss,mse,val_loss,val_mse,epoch
0,7.146724,7.146724,0.987434,0.987434,1
1,1.034685,1.034685,0.95932,0.95932,2
2,1.025005,1.025005,0.968314,0.968314,3
3,1.019753,1.019753,0.961295,0.961295,4
4,1.013999,1.013999,1.017846,1.017846,5
5,1.010794,1.010794,0.956702,0.956702,6
6,1.003223,1.003223,0.964999,0.964999,7
7,0.997515,0.997515,0.975472,0.975472,8
8,0.992324,0.992324,1.021294,1.021294,9
9,0.992161,0.992161,0.932591,0.932591,10


In [10]:
# Save training time
train_time = pd.DataFrame(timetaken.logs)
train_time.columns = ['training seconds']

train_time

Unnamed: 0,training seconds
0,7.402596
1,6.792535
2,6.828007
3,6.899499
4,6.848828
5,6.902111
6,6.852588
7,6.876189
8,6.829552
9,6.838245


In [11]:
# Testing
lose, mse = model.evaluate(test_dataset.to_numpy(), test_labels.to_numpy())



In [12]:
# Save Reports
report_path = "../Reports/Spatial/ex1/"

# History
hist_pd.to_csv(report_path + str(stations[id_index]) + '_ann_history.csv', index = False)

# Training Time
train_time.to_csv(report_path + str(stations[id_index]) + '_ann_train.csv', index = False)

# Testing loss
f= open(report_path + str(stations[id_index]) + '_ann_loss.txt',"w+")
f.write(str(lose))
f.close()

# Save Model
model_path = "../Models/Spatial/ex1/"
model.save(model_path + str(stations[id_index]) + '_ann.h5') 