In [None]:
import pandas as pd
import numpy as np
import pickle
import sys

from tqdm.notebook import tqdm

from matplotlib import pyplot as plt
import matplotlib.image as mpimg


from datetime import datetime

import random
import math

from multiprocessing.pool import Pool

In [None]:
#
#
#
#
################# Data Loading and Preprocessing
#
#
#
#

In [None]:
# Reload dataframe from disk, if it exists

try:
    pickle_out_extended_dataframe = open('extended_dataframe.pickle',"rb")
    data = pickle.load(pickle_out_extended_dataframe)
    pickle_out_extended_dataframe.close()
except IOError:
    print("No previous dataframe is available")


In [None]:
data.head(3)

In [None]:
# Now I delete all days containing NaN values
# Better to do proceed in this way, so not to have strange cut points

rows_with_nans = np.where(np.isnan(data['temperature'].values))[0]
dates_with_nans = np.unique(data['only_date'][rows_with_nans])
dates_indexes = data.index[data['only_date'].isin(dates_with_nans)].tolist()
print("% of discarded data:", len(dates_indexes) / len(data))
data = data.drop(data.index[dates_indexes]).reset_index(drop=True)

In [None]:
data['year_week'] = data.datetime.dt.strftime('%Y-%U')

In [None]:
# For each sensor, I determine the indexes to partition its data into training and test
# In doing that, I keep into account the temporal relationships
# Meaning, test data is in the future wrt training data

perc_training = 0.8


split_type = 'week_based_random' # temporal_wise, day_based_random, full_random

    
# Checking that every sensor has the same length
counts_per_sensor = data[['node', 'datetime']].groupby('node').count()
assert np.max(counts_per_sensor['datetime']) == np.min(counts_per_sensor['datetime'])
sensor_data_length = counts_per_sensor['datetime'][0]    

   
# Maps that store the training and test indexes for each sensor
sens_map_train_indexes = {}
sens_map_test_indexes = {}

for sens in list(counts_per_sensor.index):
    sens_map_train_indexes[sens] = []
    sens_map_test_indexes[sens] = []    
    
    
    
if split_type == 'temporal_wise':
    # first 70% of each sensor data is traning, and next 30% is test

    # Generate training and test indexes

    # I have already sorted the values by node and datetime in the original dataframe
    for i, sens in enumerate(list(counts_per_sensor.index)):
        base_sens = i*sensor_data_length
        last_train = base_sens + int(sensor_data_length*perc_training)
        last_test = last_train + int(sensor_data_length*(1 - perc_training)) + 1
        train_inds = list(range(base_sens, last_train))
        test_inds = list(range(last_train, last_test))
        sens_map_train_indexes[sens] = train_inds
        sens_map_test_indexes[sens] = test_inds
        
    for sens in list(counts_per_sensor.index):
        first_train = sens_map_train_indexes[sens][0]
        last_train = sens_map_train_indexes[sens][-1]
        first_test = sens_map_test_indexes[sens][0]
        last_test = sens_map_test_indexes[sens][-1]
        assert first_train == data.query("node == @sens").reset_index().iloc[0,0]
        assert last_test == data.query("node == @sens").reset_index().iloc[-1,0]
        assert last_train > first_train and last_train < first_test
        assert first_test > last_train and first_test < last_test
    
elif split_type == 'full_random':
    # divide randomly the data of the sensors
    
    possible_indexes = list(range(sensor_data_length))
    random.seed(42)
    # I make sure to use the same split for each sensor
    # meaning, corresponding training and test indexes of the different sensors refer to the same datetimes
    train_indexes_overall = sorted(random.sample(possible_indexes, int(sensor_data_length * perc_training)))
    test_indexes_overall = sorted(list(set(possible_indexes) - set(train_indexes_overall)))
    
    for i, sens in enumerate(list(counts_per_sensor.index)):
        base_sens = i * sensor_data_length
        # it is sufficent to add the baseline, since all sensor tracks have the same length
        sens_map_train_indexes[sens] = np.asarray(train_indexes_overall) + base_sens
        sens_map_test_indexes[sens] = np.asarray(test_indexes_overall) + base_sens

    
    assert np.array_equal(sens_map_train_indexes['raspihat01'], sens_map_train_indexes['raspihat03'] - sensor_data_length*2)
    assert np.array_equal(sens_map_test_indexes['raspihat01'], sens_map_test_indexes['raspihat03'] - sensor_data_length*2)
    assert len(set(sens_map_train_indexes['raspihat01']).intersection(sens_map_test_indexes['raspihat01'])) == 0
    assert len(set(sens_map_train_indexes['raspihat01']).union(sens_map_test_indexes['raspihat01'])) == len(possible_indexes)
    # The training and test indexes are different for each sensor, and refer to the original dataframe

elif split_type == 'day_based_random':
    
    np.random.seed(42)
    days_list = data.only_date.unique().tolist()
    np.random.shuffle(days_list)
    random_train_days = sorted(days_list[0:int(len(days_list)*perc_training)])
    random_test_days =  sorted(days_list[int(len(days_list)*perc_training):])

    for i, sens in enumerate(list(counts_per_sensor.index)):
        subdata = data[data['node'] == sens] # here the indexes are still the same as in the original frame
        sens_map_train_indexes[sens] = subdata[subdata.only_date.isin(random_train_days)].index.tolist().copy()
        sens_map_test_indexes[sens] = subdata[subdata.only_date.isin(random_test_days)].index.tolist().copy()        
    
    assert np.array_equal(sens_map_train_indexes['raspihat01'], sens_map_train_indexes['raspihat03'] - sensor_data_length*2)
    assert np.array_equal(sens_map_test_indexes['raspihat01'], sens_map_test_indexes['raspihat03'] - sensor_data_length*2)
    assert len(set(sens_map_train_indexes['raspihat01']).intersection(sens_map_test_indexes['raspihat01'])) == 0
    assert len(set(sens_map_train_indexes['raspihat01']).union(sens_map_test_indexes['raspihat01'])) == len(data[data['node'] == 'raspihat01'])
    # The training and test indexes are different for each sensor, and refer to the original dataframe

elif split_type == 'week_based_random':
    
    np.random.seed(42)
    weeks_list = data.year_week.unique().tolist()
    np.random.shuffle(weeks_list)
    random_train_weeks = sorted(weeks_list[0:int(len(weeks_list)*perc_training)])
    random_test_weeks =  sorted(weeks_list[int(len(weeks_list)*perc_training):])

    for i, sens in enumerate(list(counts_per_sensor.index)):
        subdata = data[data['node'] == sens] # here the indexes are still the same as in the original frame
        sens_map_train_indexes[sens] = subdata[subdata.year_week.isin(random_train_weeks)].index.tolist().copy()
        sens_map_test_indexes[sens] = subdata[subdata.year_week.isin(random_test_weeks)].index.tolist().copy()        
    
    assert np.array_equal(sens_map_train_indexes['raspihat01'], sens_map_train_indexes['raspihat03'] - sensor_data_length*2)
    assert np.array_equal(sens_map_test_indexes['raspihat01'], sens_map_test_indexes['raspihat03'] - sensor_data_length*2)
    assert len(set(sens_map_train_indexes['raspihat01']).intersection(sens_map_test_indexes['raspihat01'])) == 0
    assert len(set(sens_map_train_indexes['raspihat01']).union(sens_map_test_indexes['raspihat01'])) == len(data[data['node'] == 'raspihat01'])
    # The training and test indexes are different for each sensor, and refer to the original dataframe

else:
    assert False, 'Unsupported split type'
    

In [None]:
# Some general information

window_coords = [(1, 0), (3, 0), (6, 0), (8, 0), (10, 0),
                 (1, 9), (3, 9), (6, 9), (8, 9), (10, 9)]

all_train_idxs = []
for sens in sens_map_train_indexes:
    all_train_idxs.extend(sens_map_train_indexes[sens])

all_test_idxs = []
for sens in sens_map_test_indexes:
    all_test_idxs.extend(sens_map_test_indexes[sens])

node_map_coords = {}
for sens in sens_map_test_indexes:
    coord_x, coord_y = data[data['node'] == sens].iloc[0][['coord_x', 'coord_y']].values
    node_map_coords[sens] = (coord_x, coord_y)
    
all_sensors = np.unique(data['node'])

all_training_data = data.iloc[all_train_idxs].reset_index(drop=True)
all_test_data = data.iloc[all_test_idxs].reset_index(drop=True)

def eucl_dist(x_0, y_0, x_1, y_1):
    return np.sqrt((x_0 - x_1)**2 + (y_0 - y_1)**2)

training_data_ts = len(all_training_data[all_training_data['node'] == 'raspihat01'])

test_data_ts = len(all_test_data[all_test_data['node'] == 'raspihat01'])

ref_sensors = ['raspihat02','raspihat03','raspihat05']

air_tube_coords = [(x, 4) for x in list(range(11))]

def array_rep(arr, n):
    assert len(arr.shape) <= 4
    if len(arr.shape) == 1:
        return np.tile(arr, n)
    elif len(arr.shape) == 2:
        retarr = np.full((arr.shape[0]*n, arr.shape[1]), -1.)
        for i in range(n):
            retarr[i*arr.shape[0]: (i+1)*arr.shape[0]] = arr
        return retarr
    elif len(arr.shape) == 3:
        retarr = np.full((arr.shape[0]*n, arr.shape[1], arr.shape[2]), -1.)
        for i in range(n):
            retarr[i*arr.shape[0]: (i+1)*arr.shape[0]] = arr
        return retarr
    elif len(arr.shape) == 4:
        retarr = np.full((arr.shape[0]*n, arr.shape[1], arr.shape[2], arr.shape[3]), -1.)
        for i in range(n):
            retarr[i*arr.shape[0]: (i+1)*arr.shape[0]] = arr
        return retarr

In [None]:
###### Weighted distance utils functions

## Spatial distances 
center = (5,5)


import numpy as np
import math

## Spatial distances 

# Angular distance between two point
def angular_distance(p1,p2):
    # determines the angle between two points (x_0, y_0) (x_1, y_1)
    # if p2 is the origin (0, 0), then I get the angle wrt x axis
    
    if len(np.array(p1).shape)==1:
        p1=np.array([p1])
    if len(np.array(p2).shape)==1:
        p2=np.array([p2])

    ang1 = np.arctan2(p1[:,1], p1[:,0])
    ang2 = np.arctan2(p2[:,1], p2[:,0])
    
    return np.minimum(np.rad2deg(np.mod(ang1 - ang2, 2 * np.pi)), np.rad2deg(np.mod(ang1 - ang2, 2 * np.pi)))
    
# Euclidean distance between two point
def euclidean_distance(p1,p2):    
    if len(np.array(p1).shape)==1:
        p1=np.array([p1])
    if len(np.array(p2).shape)==1:
        p2=np.array([p2])
    return np.sqrt(np.add(np.subtract(p1[:,0],p2[:,0])**2, np.subtract(p1[:,1],p2[:,1])**2))

# Manhattan distance between two point
def manhattan_distance(p1,p2):
    if len(np.array(p1).shape)==1:
        p1=np.array([p1])
    if len(np.array(p2).shape)==1:
        p2=np.array([p2])    
    return np.abs(p1[:, 0] - p2[:,0]) + np.abs(p1[:,1] - p2[:,1])

# Chebyshev distance between two point
def chebyshev_distance(p1,p2):
    if len(np.array(p1).shape)==1:
        p1=np.array([p1])
    if len(np.array(p2).shape)==1:
        p2=np.array([p2])
    return np.maximum(np.abs(p1[:, 0] - p2[:,0]), np.abs(p1[:,1] - p2[:,1]))

# Ditance from the nearest window
def min_window_distance(p):
    window_coords = [[1., 0.], [3., 0.], [6., 0.], [8., 0.], [10., 0.],
                     [1., 9.], [3., 9.], [6., 9.], [8., 9.], [10., 9.]]
    if len(np.array(p).shape)==1:
        p=np.array([p])
    return np.min([euclidean_distance(p, np.array([w])) for w in window_coords], axis=0)

# Center distance
def center_distance(p):
    center = [5.,5.]
    if len(np.array(p).shape)==1:
        p=np.array([p])
    return euclidean_distance(np.array([center]), p)

# Window distance between two point
def window_dist_similarity(p1,p2):
    window_coords = [[1., 0.], [3., 0.], [6., 0.], [8., 0.], [10., 0.],
                     [1., 9.], [3., 9.], [6., 9.], [8., 9.], [10., 9.]]
    
    if len(np.array(p1).shape)==1:
        p1=np.array([p1])
    if len(np.array(p2).shape)==1:
        p2=np.array([p2])

    d1 = np.min([euclidean_distance(p1, np.array([w])) for w in window_coords], axis=0)
    d2 = np.min([euclidean_distance(p2, np.array([w])) for w in window_coords], axis=0)
    return np.abs(d1 - d2)

# Center distance between two point
def center_dist_similarity(p1,p2):
    center = [5.,5.]
    
    if len(np.array(p1).shape)==1:
        p1=np.array([p1])
    if len(np.array(p2).shape)==1:
        p2=np.array([p2])    
    
    d1 = euclidean_distance(np.array([center]), p1)
    d2 = euclidean_distance(np.array([center]), p2)
    return np.abs(d1 - d2)


# Given all distances estimate the weighted distance 
def weighted_distance(p1, p2):    
    ## dists=[ 'angular','euclidean', 'manhattan', 'c11hebyshev', 'window', 'center'] 
    
    if len(np.array(p1).shape)==2:
        p1 = np.array([[a,b] for (a,b) in p1])
    if len(np.array(p2).shape)==2:
        p2 = np.array([[a,b] for (a,b) in p2])
    
    dists=np.array([angular_distance(p1,p2), euclidean_distance(p1,p2), manhattan_distance(p1,p2),
           chebyshev_distance(p1,p2), window_dist_similarity(p1,p2), center_dist_similarity(p1,p2)])
    
    rank_weights = [0.008478247652264459, 1.3935710976695117, 0.39760921865944, 
                    0.8009320148032265, 3.7056463223028255, -0.15417942722471556]
    
    res=np.dot(dists.T,rank_weights)
    
    if len(res) == 1:
        return res[0]
    
    return res


def my_sqrt(arg1):
    return np.nan_to_num(np.sqrt(arg1))
def my_log(arg1):
    return np.nan_to_num(np.log(arg1))
def my_abs(arg1):
    return np.nan_to_num(np.abs(arg1))
def my_neg(arg1):
    return np.nan_to_num(np.negative(arg1))
def my_square(arg1):
    return np.nan_to_num(np.square(arg1))
def my_add(arg1, arg2):
    return np.nan_to_num(np.add(arg1, arg2))
def my_sub(arg1, arg2):
    return np.nan_to_num(np.subtract(arg1, arg2))
def my_mul(arg1, arg2):
    return np.nan_to_num(np.multiply(arg1, arg2))
def my_div(arg1, arg2):   
    return np.nan_to_num(np.divide(arg1, arg2))
def my_pow(arg1, arg2):
    return np.nan_to_num(np.power(arg1, arg2))
def my_max(arg1, arg2):
    return np.nan_to_num(np.maximum(arg1, arg2), nan=sys.float_info.min)
def my_min(arg1, arg2):
    return np.nan_to_num(np.minimum(arg1, arg2), nan=sys.float_info.max)

def gp_func(ARG0,ARG1,ARG2,ARG3):
    return my_max(my_add(my_div(my_max(ARG3, my_log(ARG1)), my_sub(my_sqrt(0.28819287526174175), my_add(-0.8372387615502006, ARG1))), my_max(0.4761407317066777, ARG3)), my_max(-0.3152168742592161, my_div(0.8792291653581332, ARG0)))

# Given all distances estimate the weighted distance 
def gp_distance(p1, p2):    

    ## dists=[ 'angular','euclidean', 'manhattan', 'c11hebyshev', 'window', 'center'] 
    
    if len(np.array(p1).shape)==2:
        p1 = np.array([[a,b] for (a,b) in p1])
    if len(np.array(p2).shape)==2:
        p2 = np.array([[a,b] for (a,b) in p2])
    
    res = gp_func(angular_distance(p1,p2), euclidean_distance(p1,p2), manhattan_distance(p1,p2),
           chebyshev_distance(p1,p2))
    
    if len(res) == 1:
        return res[0]
    
    return res


In [None]:
# Dataframe that collects all the results
results = pd.DataFrame(columns=['method', 'fold', 'median_error', '95_perc_error'])

In [None]:
# LSTM: Generating the dataset: TRAINING AND VALIDATION

ref_sensors_here = ['raspihat08','raspihat09','raspihat01','raspihat04']

eval_sensors = sorted(list(set(all_sensors) - set(ref_sensors_here)))
dataframe_training = all_training_data[all_training_data['node'].isin(eval_sensors)]

X_data = np.full((len(dataframe_training), 7), -111.)
# Temporal information
X_data[:, 0] = dataframe_training['moy_sin']
# X_data[:, 1] = dataframe_training['moy_cos']
X_data[:, 1] = dataframe_training['dow_sin']
# X_data[:, 3] = dataframe_training['dow_cos']
X_data[:, 2] = dataframe_training['seconds_from_midnight_sin']
X_data[:, 3] = dataframe_training['seconds_from_midnight_cos']

# # Distances from the reference sensors
# for i, ref_sensor in enumerate(ref_sensors_here):
#         X_coord_ref, Y_coord_ref = node_map_coords[ref_sensor]
#         X_data[:, 4+i] = eucl_dist(X_coord_ref, Y_coord_ref, dataframe_training['coord_x'], dataframe_training['coord_y'])
        
# # Mutual center distances
# for i, ref_sensor in enumerate(ref_sensors_here):
#     X_coord_ref, Y_coord_ref = node_map_coords[ref_sensor]
#     center_distance = eucl_dist(dataframe_training['coord_x'], dataframe_training['coord_y'], 5, 5)
#     ref_center_distance = eucl_dist(X_coord_ref, Y_coord_ref, 5, 5)
#     center_distance_similarity = abs(ref_center_distance - center_distance)
#     X_data[:, 9+i] = center_distance_similarity    
    
# Min window distance
# X_data[:, 4] = np.min([eucl_dist(dataframe_training['coord_x'], dataframe_training['coord_y'], w_x, w_y) for (w_x, w_y) in window_coords], axis=0)   
# # Center distance
# X_data[:, 10] = eucl_dist(dataframe_training['coord_x'], dataframe_training['coord_y'], 5, 5)  

X_data[:, 4] = dataframe_training['coord_x']
X_data[:, 5] = dataframe_training['coord_y']

# Weighted distance
for i, ref_sensor in enumerate(ref_sensors_here[0:1]):
    X_coord_ref, Y_coord_ref = node_map_coords[ref_sensor]
    ref_points = [(X_coord_ref, Y_coord_ref)]
    X_coord, Y_coord = dataframe_training['coord_x'].values, dataframe_training['coord_y'].values
    sens_points = list(zip(X_coord,Y_coord))
    X_data[:, 6+i] = gp_distance(ref_points,sens_points)


assert np.min(X_data) > -111.


# Now the temporal data
history_timesteps = 17 # so, a total of 18 values, 17 historical and 1 current

X_data_temporal_aux = np.full((training_data_ts, history_timesteps+1, 4), -111.)

ref_temps = np.full((training_data_ts, 4), -1.)
ref_temps[:, 0] = all_training_data[all_training_data['node'] == 'raspihat08']['temperature'].values
ref_temps[:, 1] = all_training_data[all_training_data['node'] == 'raspihat09']['temperature'].values
ref_temps[:, 2] = all_training_data[all_training_data['node'] == 'raspihat01']['temperature'].values
ref_temps[:, 3] = all_training_data[all_training_data['node'] == 'raspihat04']['temperature'].values

for row in range(training_data_ts):
    for col_idx in range(4):
        history = ref_temps[max(0,row-history_timesteps):row+1, col_idx]

        history = np.pad(history, (history_timesteps+1-history.shape[0], 0), 
                         'constant', constant_values=(-1, -1))

        X_data_temporal_aux[row, :, col_idx] =  history

X_data_temporal = array_rep(X_data_temporal_aux, len(eval_sensors))

X_data_temporal[X_data_temporal == -1.] = np.nan



# Now the y data

y_data = dataframe_training['temperature'].values





# Splitting into training and eval data (week based splitting)
np.random.seed(42)
weeks_list = dataframe_training.year_week.unique().tolist()
np.random.shuffle(weeks_list)
train_weeks = sorted(weeks_list[0:int(len(weeks_list)*perc_training)])
# val_weeks =  sorted(weeks_list[int(len(weeks_list)*perc_training):])



# Different weeks between training and validation...
X_data_training = X_data[dataframe_training['year_week'].isin(train_weeks)].copy()
X_data_temporal_training = X_data_temporal[dataframe_training['year_week'].isin(train_weeks)].copy()
y_data_training = y_data[dataframe_training['year_week'].isin(train_weeks)].copy()

X_data_validation = X_data[~ dataframe_training['year_week'].isin(train_weeks)].copy()
X_data_temporal_validation = X_data_temporal[~ dataframe_training['year_week'].isin(train_weeks)].copy()
y_data_validation = y_data[~ dataframe_training['year_week'].isin(train_weeks)].copy()


print(X_data_training.shape, X_data_validation.shape)
print(X_data_temporal_training.shape, X_data_temporal_validation.shape)



# Normalizing predictors
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
X_data_training=scaler.fit_transform(X_data_training)
X_data_validation=scaler.transform(X_data_validation)

X_data_temporal_training_min = np.nanmin(X_data_temporal_training, axis=(0, 1), keepdims=True)
X_data_temporal_training_max = np.nanmax(X_data_temporal_training, axis=(0, 1), keepdims=True)

X_data_temporal_training = np.nan_to_num((X_data_temporal_training - X_data_temporal_training_min) / (X_data_temporal_training_max - X_data_temporal_training_min), nan=-1.)
X_data_temporal_validation = np.nan_to_num((X_data_temporal_validation - X_data_temporal_training_min) / (X_data_temporal_training_max - X_data_temporal_training_min), nan=-1.)


In [None]:
# https://gist.github.com/Abhiswain97/b316dfbc0fec644009335f0748ff6a8d#file-pytorch-tpu-efficientnet-b5-tutorial-reference-ipynb
# https://www.kaggle.com/tanlikesmath/the-ultimate-pytorch-tpu-tutorial-jigsaw-xlm-r

# TPU imports

import warnings
import torch_xla
import torch_xla.debug.metrics as met
import torch_xla.distributed.data_parallel as dp
import torch_xla.distributed.parallel_loader as pl
import torch_xla.utils.utils as xu
import torch_xla.core.xla_model as xm
import torch_xla.distributed.xla_multiprocessing as xmp
import torch_xla.test.test_utils as test_utils
import warnings

warnings.filterwarnings("ignore")

# Other imports
import torch
import torchvision
import torchvision.transforms as transforms
import time

# https://www.kaggle.com/tanlikesmath/the-ultimate-pytorch-tpu-tutorial-jigsaw-xlm-r


In [None]:
import math 

def calcola_scarto(len_dataset, batch_s, n_tpu):
    return len_dataset - math.floor((len_dataset/(batch_s*n_tpu)))*(batch_s*n_tpu)

prova_batch = list(range(100, 5000, 1))
risultati = []
for bs in prova_batch:
    tr = calcola_scarto (len(y_data_training), bs, 8)
    val = calcola_scarto (len(y_data_validation), bs, 8)
    risultati.append((bs, tr, val, tr+val))
    
sorted(risultati, key=lambda tup: tup[3])

In [None]:
# Trying to determine how many TPUs to use, and the batch size, so to reduce the number of discarded data
import math


BATCH_SIZE = 1024

NUM_TPU_CORES = 8


def calcola_scarto(len_dataset, batch_s, n_tpu):
    return len_dataset - math.floor((len_dataset/(batch_s*n_tpu)))*(batch_s*n_tpu)


print("Istanze che verranno scartate nel training:", calcola_scarto(len(y_data_training), BATCH_SIZE, NUM_TPU_CORES), "in frazione:", round(calcola_scarto(len(y_data_training), BATCH_SIZE, NUM_TPU_CORES)/len(y_data_training),2))
print("Istanze che verranno scartate nel validation:", calcola_scarto(len(y_data_validation), BATCH_SIZE, NUM_TPU_CORES), "in frazione:", round(calcola_scarto(len(y_data_validation), BATCH_SIZE, NUM_TPU_CORES)/len(y_data_validation),2))


In [None]:
# From numpy arrays to tensor Datasets


trainset = torch.utils.data.TensorDataset(torch.from_numpy(X_data_temporal_training.astype('float32')), # temporal
                                          torch.from_numpy(X_data_training.astype('float32')), # atemporal  
                                          torch.from_numpy(y_data_training.astype('float32').reshape(-1, 1)))

validationset = torch.utils.data.TensorDataset(torch.from_numpy(X_data_temporal_validation.astype('float32')),
                                               torch.from_numpy(X_data_validation.astype('float32')), 
                                               torch.from_numpy(y_data_validation.astype('float32').reshape(-1, 1)))  


torch.set_printoptions(precision=6)


In [None]:
# Defining the neural network: temporal + atemporal

import torch.nn as nn
import torch.nn.functional as F
import random

import torch
from torch.nn.utils.rnn import PackedSequence
from typing import *



class VariationalDropout(nn.Module):
    """
    Applies the same dropout mask across the temporal dimension
    See https://arxiv.org/abs/1512.05287 for more details.
    Note that this is not applied to the recurrent activations in the LSTM like the above paper.
    Instead, it is applied to the inputs and outputs of the recurrent layer.
    """
    def __init__(self, dropout: float, batch_first: Optional[bool]=False):
        super().__init__()
        self.dropout = dropout
        self.batch_first = batch_first

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        if not self.training or self.dropout <= 0.:
            return x

        is_packed = isinstance(x, PackedSequence)
        if is_packed:
            x, batch_sizes = x
            max_batch_size = int(batch_size[0])
        else:
            batch_sizes = None
            max_batch_size = x.size(0)

        # Drop same mask across entire sequence
        if self.batch_first:
            m = x.new_empty(max_batch_size, 1, x.size(2), requires_grad=False).bernoulli_(1 - self.dropout)
        else:
            m = x.new_empty(1, max_batch_size, x.size(2), requires_grad=False).bernoulli_(1 - self.dropout)
        x = x.masked_fill(m == 0, 0) / (1 - self.dropout)

        if is_packed:
            return PackedSequence(x, batch_sizes)
        else:
            return x
        


class LSTM(nn.LSTM):
    def __init__(self, *args, dropouti: float=0.,
                 dropoutw: float=0., dropouto: float=0.,
                 batch_first=True, unit_forget_bias=True, **kwargs):
        super().__init__(*args, **kwargs, batch_first=batch_first)
        self.unit_forget_bias = unit_forget_bias
        self.dropoutw = dropoutw
        self.input_drop = VariationalDropout(dropouti,
                                             batch_first=batch_first)
        self.output_drop = VariationalDropout(dropouto,
                                              batch_first=batch_first)
        self._init_weights()

    def _init_weights(self):
        """
        Use orthogonal init for recurrent layers, xavier uniform for input layers
        Bias is 0 except for forget gate
        """
        for name, param in self.named_parameters():
            if "weight_hh" in name:
                nn.init.orthogonal_(param.data)
            elif "weight_ih" in name:
                nn.init.xavier_uniform_(param.data)
            elif "bias" in name and self.unit_forget_bias:
                nn.init.zeros_(param.data)
                param.data[self.hidden_size:2 * self.hidden_size] = 1

    def _drop_weights(self):
        for name, param in self.named_parameters():
            if "weight_hh" in name:
                getattr(self, name).data = \
                    torch.nn.functional.dropout(param.data, p=self.dropoutw,
                                                training=self.training).contiguous()

    def forward(self, input, hx=None):
        self._drop_weights()
        input = self.input_drop(input)
        seq, state = super().forward(input, hx=hx)
        return self.output_drop(seq), state
    
    
    
    
    
class Net(nn.Module):
      
    
    def __init__(self):
        super(Net, self).__init__()
        
        # Temporal part
        self.lstm_1= LSTM(input_size=4, hidden_size=128, num_layers=1, bidirectional=False, batch_first=True)#, dropoutw=0.1)#, dropouto=0.1, dropouti=0.1)
        self.ln_1 = nn.LayerNorm(128)
        
        # Atemporal part
        self.fc_1 = nn.Linear(7, 64) 
        self.bn_1 = nn.BatchNorm1d(64)
        
        # Combined part
        self.bn_comb_0 = nn.BatchNorm1d(192)
        self.drop_comb_0 = nn.Dropout(0.1)  
        self.fc_comb_1 = nn.Linear(192, 128) 
        self.bn_comb_1 = nn.BatchNorm1d(128)
        self.fc_comb_2 = nn.Linear(128, 1) 
        
        
    def forward(self, x, y):
        
        # Temporal part
        x, states = self.lstm_1(x)
        x = x[:, -1, :]
        x = self.ln_1(x)
        
        # Atemporal part
        y = self.bn_1(F.relu(self.fc_1(y)))
                
        # Combined part
        z = torch.cat((x, y), 1)
        z = self.drop_comb_0(self.bn_comb_0(z))
        z = self.bn_comb_1(F.relu(self.fc_comb_1(z)))
        z = self.fc_comb_2(z)
            
        return z

    
# Helper function that is used to (re)define the model and the optimizer from scratch
# https://pytorch.org/docs/stable/notes/randomness.html
def reinit_model():
    torch.manual_seed(42)
    np.random.seed(42)
    random.seed(42)
    net = Net()
    return net



model = reinit_model()

print(model)

In [None]:
# Defining some helper functions for the model that guide the training
import math

def sigmoid(x):
    return 1 / (1 + torch.exp(-x))


def reduce_fn_avg(vals):
    # take average
    return sum(vals) / len(vals)


def reduce_fn_sum(vals):
    # take average
    return sum(vals)




def _run(model, EPOCHS, param, training_data_in, validation_data_in=None):
    
    xm.set_rng_state(42)
    xm.save(xm.get_rng_state(), 'xm_seed')    

    def train_fn(train_dataloader, model, optimizer, criterion, device, lr_scheduler=None):
        
        xm.set_rng_state(torch.load('xm_seed'), device=device)
        
        
        xm.master_print(xm.get_rng_state())

        running_loss = 0.
        running_mae = 0.
        running_mse = 0.
        running_instances = 0.

        # training() is a kind of switch for some specific layers/parts of the model that behave
        # differently during training and inference (evaluating) time
        # For example, Dropouts Layers, BatchNorm Layers etc. 
        model.train()

        for batch_idx, (temporal, atemporal, labels) in enumerate(train_dataloader, 1):

            optimizer.zero_grad() # need to zero out the gradients every time, otherwise they accumulate
            temporal = temporal.to(device) # transfer the data to the computing device
            atemporal = atemporal.to(device) # transfer the data to the computing device
            labels = labels.to(device)# transfer the labels to the computing device     
                
            outputs = model(temporal, atemporal)
                                    
            loss = criterion(outputs, labels)
            
            #xm.master_print(f'Batch: {batch_idx}, loss: {loss.item()}')
                        
            loss.backward() # calculate the gradients
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            xm.optimizer_step(optimizer) # update the network weights
                                                
            running_loss += loss.item()*len(labels)
            running_instances += len(labels)
            
            predicted = outputs.data
            
            mse = torch.square(labels - predicted).sum().item() 
            mae = torch.abs(labels - predicted).sum().item() 
            
            running_mse += mse
            running_mae += mae

            
            if lr_scheduler != None:
                lr_scheduler.step()
                
                
        running_loss /= running_instances
        loss_reduced = xm.mesh_reduce('loss_reduced', running_loss, reduce_fn_avg)
        
        running_mse /= running_instances
        running_mae /= running_instances
        mse_reduced = xm.mesh_reduce('mse_reduce', running_mse, reduce_fn_avg) 
        mae_reduced = xm.mesh_reduce('mae_reduce', running_mae, reduce_fn_avg) 
        
        retval = {'loss':  loss_reduced,
                  'mse': mse_reduced,
                  'mae': mae_reduced
                 }
        
        xm.save(xm.get_rng_state(), 'xm_seed')
            
        return retval
            

        
    def valid_fn(valid_dataloader, model, criterion, device):
            
        xm.save(xm.get_rng_state(), 'xm_seed')
            
        running_loss = 0.
        running_mae = 0.
        running_mse = 0.
        running_instances = 0.
         
        # eval() is a kind of switch for some specific layers/parts of the model that behave
        # differently during training and inference (evaluating) time
        # For example, Dropouts Layers, BatchNorm Layers etc. 
        model.eval()
        
        for batch_idx, (temporal, atemporal, labels) in enumerate(valid_dataloader, 1):

            temporal = temporal.to(device)
            atemporal = atemporal.to(device)
            labels = labels.to(device)

            outputs = model(temporal, atemporal)
            
            loss = criterion(outputs, labels)
            
            running_loss += loss.item()*len(labels)
            running_instances += len(labels)
            
            predicted = outputs.data
            
            mse = torch.square(labels - predicted).sum().item() 
            mae = torch.abs(labels - predicted).sum().item() 
            
            mse_reduced = xm.mesh_reduce('mse_reduce', mse, reduce_fn_sum) 
            mae_reduced = xm.mesh_reduce('mae_reduce', mae, reduce_fn_sum) 
            
            running_mse += mse
            running_mae += mae
            
        
        running_loss /= running_instances
        loss_reduced = xm.mesh_reduce('loss_reduced', running_loss, reduce_fn_avg)    
        
        running_mse /= running_instances
        running_mae /= running_instances
        mse_reduced = xm.mesh_reduce('mse_reduce', running_mse, reduce_fn_avg) 
        mae_reduced = xm.mesh_reduce('mae_reduce', running_mae, reduce_fn_avg) 
        
        retval = {'loss': loss_reduced, 
                  'mse': mse_reduced,
                  'mae': mae_reduced
                 }
                    
            
        xm.set_rng_state(torch.load('xm_seed'), device=device)
        xm.master_print(xm.get_rng_state())
        
        return retval
    
    
    
    # Defining distributed samplers and data loaders
    train_sampler = torch.utils.data.distributed.DistributedSampler(training_data_in,
                                                                    num_replicas=xm.xrt_world_size(), #numcores
                                                                    rank=xm.get_ordinal(),
                                                                    shuffle=True)
    
    train_dataloader = torch.utils.data.DataLoader(training_data_in, batch_size=BATCH_SIZE, sampler=train_sampler, num_workers=1) # only for GPUs num_workers=2, pin_memory=True)


    
    if validation_data_in != None:
        validation_sampler = torch.utils.data.distributed.DistributedSampler(validation_data_in,
                                                                             num_replicas=xm.xrt_world_size(),
                                                                             rank=xm.get_ordinal(),
                                                                             shuffle=True)

        validation_dataloader = torch.utils.data.DataLoader(validation_data_in, batch_size=BATCH_SIZE, sampler=validation_sampler, num_workers=1)



    # Defining the handle to the TPU device
    device = xm.xla_device()
    # Transferring the model to the computing device
    model.to(device) 
    # Defining the loss function
    criterion = nn.MSELoss()
        
    # Defining optimizer
    import torch.optim as optim
    optimizer = optim.Adam(model.parameters(), lr=9e-5, amsgrad=False) #3e-4
    lr_scheduler = None
    
    # Training code
    
    metrics_history = {"loss":[], "mae":[], "mse":[], "val_loss":[], "val_mae":[], "val_mse":[]}
    
    train_begin = time.time()
    for epoch in range(EPOCHS):
        start = time.time()
        para_loader = pl.ParallelLoader(train_dataloader, [device], fixed_batch_size=True) # needed for parallel training

        xm.master_print("EPOCH:", epoch+1)

        train_metrics = train_fn(train_dataloader=para_loader.per_device_loader(device), 
                                 model=model,
                                 optimizer=optimizer, 
                                 criterion=criterion,
                                 device=device,
                                 lr_scheduler=lr_scheduler)
        
        metrics_history["loss"].append(train_metrics["loss"])
        metrics_history["mae"].append(train_metrics["mae"])
        metrics_history["mse"].append(train_metrics["mse"])
        
            
        
        if validation_data_in != None:    
            # Calculate the metrics on the validation data, in the same way as done for training
            with torch.no_grad(): # don't keep track of the info necessary to calculate the gradients
                para_loader = pl.ParallelLoader(validation_dataloader, [device], fixed_batch_size=True)

                val_metrics = valid_fn(valid_dataloader=para_loader.per_device_loader(device), 
                                       model=model,
                                       criterion=criterion, 
                                       device=device)

                metrics_history["val_loss"].append(val_metrics["loss"])
                metrics_history["val_mae"].append(val_metrics["mae"])
                metrics_history["val_mse"].append(val_metrics["mse"])
                
            xm.master_print("  > Training/validation loss:", round(train_metrics['loss'], 4), round(val_metrics['loss'], 4))
            xm.master_print("  > Training/validation mae:", round(train_metrics['mae'], 4), round(val_metrics['mae'], 4))
            xm.master_print("  > Training/validation mse:", round(train_metrics['mse'], 4), round(val_metrics['mse'], 4))
        else:
            xm.master_print("  > Training loss:", round(train_metrics['loss'], 4))
            xm.master_print("  > Training loss:", round(train_metrics['mae'], 4))
            xm.master_print("  > Training loss:", round(train_metrics['mse'], 4))


        xm.master_print("Completed in:", round(time.time() - start, 1), "seconds \n")

    xm.master_print("Training completed in:", round((time.time()- train_begin)/60, 1), "minutes")    

    
    
    # Save the model weights
    xm.save(
        model.state_dict(), './nnet_model_physio.pt'
    )
    
    # Save the metrics history
    xm.save(metrics_history, 'training_history')
    
    
    

# Prediction function, it runs on the CPU
def predict_(model, data_in): 
    import math
    
    data_dataloader = torch.utils.data.DataLoader(data_in, batch_size=BATCH_SIZE, shuffle=False, num_workers=1)
    
    model.load_state_dict(torch.load('./nnet_model_physio.pt'))
    
    model.eval()
    
    predictions = []
    with torch.no_grad(): # don't keep track of the info necessary to calculate the gradients 
        pbar = tqdm(desc="Minibatches: ", total=math.ceil(len(data_in)/BATCH_SIZE))
        
        for batch_idx, (temporal, atemporal) in enumerate(data_dataloader, 1):
            outputs = model(temporal, atemporal)
            predicted = outputs.data
            predictions.extend(predicted.numpy())
            pbar.update(1)
        pbar.close()
    
    return np.asarray(predictions)


In [None]:
# Start the actual training process, WITH the validation data
import pickle

def smooth_curve(points, factor=0.5):
    smoothed_points = []    
    for point in points:
        if len(smoothed_points) > 0:
            previous = smoothed_points[-1]
            smoothed_points.append(previous*factor + point*(1-factor))
        else:
            smoothed_points.append(point)
    
    return smoothed_points


tuning_params = ['dummy']


EPOCHS = 200


print(tuning_params)
tuning_results = []

for param in tuning_params:
    
    print(param)

    model = reinit_model() # reinitialize the model and optimizer before training
    # model = reinit_model(param) # reinitialize the model and optimizer before training
    def _mp_fn(rank, flags):
        torch.set_default_tensor_type('torch.FloatTensor')
        _run(model, EPOCHS, param, training_data_in=trainset, validation_data_in=validationset)
        if rank == 0:
            time.sleep(2)

    FLAGS={}
    xmp.spawn(_mp_fn, args=(FLAGS,), nprocs=NUM_TPU_CORES, start_method='fork')
    training_history = torch.load('training_history')
    
    print("Parameter", param)
    print("Minimum loss (smoothed) training/validation:", np.round(np.min(smooth_curve(training_history['loss'])),4), np.round(np.min(smooth_curve(training_history['val_loss'])),4), "at indexes", np.argmin(smooth_curve(training_history['loss'])), np.argmin(smooth_curve(training_history['val_loss'])))
    print("Minimum MSE (smoothed) training/validation:", np.round(np.max(smooth_curve(training_history['mse'])),4), np.round(np.max(smooth_curve(training_history['val_mse'])),4), "at indexes", np.argmax(smooth_curve(training_history['mse'])), np.argmax(smooth_curve(training_history['val_mse'])))
    print("Minimum MAE (smoothed) training/validation:", np.round(np.max(smooth_curve(training_history['mae'])),4), np.round(np.max(smooth_curve(training_history['val_mae'])),4), "at indexes", np.argmax(smooth_curve(training_history['mae'])), np.argmax(smooth_curve(training_history['val_mae'])))
    print("\n")

    
    tuning_results.append([param, training_history, np.round(np.min(smooth_curve(training_history['val_loss'])),4), np.round(np.max(smooth_curve(training_history['val_mse'])),4), np.argmin(smooth_curve(training_history['val_mae']))])
    
    pickle_file = open('./tuning_results', 'wb')
    pickle.dump(tuning_results, pickle_file)
    pickle_file.close()
    

tuning_results = pd.DataFrame(tuning_results, columns=['params', 'training_history', 'val_loss', 'val_mse', 'val_mae'])

pickle_file = open('./tuning_results', 'wb')
pickle.dump(tuning_results, pickle_file)
pickle_file.close()

In [None]:
# Plot some statistics regarding the training process

import matplotlib.pyplot as plt


start_epoch = 10


def smooth_curve(points, factor=0.5):
    smoothed_points = []    
    for point in points:
        if len(smoothed_points) > 0:
            previous = smoothed_points[-1]
            smoothed_points.append(previous*factor + point*(1-factor))
        else:
            smoothed_points.append(point)
    
    return smoothed_points



plt.figure(figsize=(20, 10))

plt.subplot(221)
plt.plot(range(1, len(training_history['loss'][start_epoch:])+1), smooth_curve(training_history['loss'][start_epoch:]), label="training")
plt.plot(range(1, len(training_history['val_loss'][start_epoch:])+1), smooth_curve(training_history['val_loss'][start_epoch:]), label="validation")
#plt.xticks(range(1, len(training_history['train_loss'])+1, 5))
plt.legend()
plt.grid()
plt.xlabel("epochs")
plt.ylabel("value")
plt.title("Loss")

plt.subplot(222)
plt.plot(range(1, len(training_history['mse'][start_epoch:])+1), smooth_curve(training_history['mse'][start_epoch:]), label="training")
plt.plot(range(1, len(training_history['val_mse'][start_epoch:])+1), smooth_curve(training_history['val_mse'][start_epoch:]), label="validation")
#plt.xticks(range(1, len(training_history['train_loss'])+1, 5))
plt.legend()
plt.grid()
plt.xlabel("epochs")
plt.ylabel("value")
plt.title("Mean Squared Error")

plt.subplot(223)
plt.plot(range(1, len(training_history['mae'][start_epoch:])+1), smooth_curve(training_history['mae'][start_epoch:]), label="training")
plt.plot(range(1, len(training_history['val_mae'][start_epoch:])+1), smooth_curve(training_history['val_mae'][start_epoch:]), label="validation")
#plt.xticks(range(1, len(training_history['train_loss'])+1, 5))
plt.legend()
plt.grid()
plt.xlabel("epochs")
plt.ylabel("value")
plt.title("Mean Absolute Error")

plt.show()

print("Minimum loss (smoothed) training/validation:", np.round(np.min(smooth_curve(training_history['loss'])),4), np.round(np.min(smooth_curve(training_history['val_loss'])),4), "at indexes", np.argmin(smooth_curve(training_history['loss'])), np.argmin(smooth_curve(training_history['val_loss'])))
print("Minimum f1 (smoothed) training/validation:", np.round(np.min(smooth_curve(training_history['mae'])),4), np.round(np.min(smooth_curve(training_history['val_mae'])),4), "at indexes", np.argmin(smooth_curve(training_history['mae'])), np.argmin(smooth_curve(training_history['val_mae'])))
print("Minimum accuracy (smoothed) training/validation:", np.round(np.min(smooth_curve(training_history['mse'])),4), np.round(np.min(smooth_curve(training_history['val_mse'])),4), "at indexes", np.argmin(smooth_curve(training_history['mse'])), np.argmin(smooth_curve(training_history['val_mse'])))
print("\n")
print("Minimum loss (NON smoothed) training/validation:", np.round(np.min(training_history['loss']),4), np.round(np.min(training_history['val_loss']),4), "at indexes", np.argmin(training_history['loss']), np.argmin(training_history['val_loss']))
print("Minimum f1 (NON smoothed) training/validation:", np.round(np.min(training_history['mae']),4), np.round(np.min(training_history['val_mae']),4), "at indexes", np.argmin(training_history['mae']), np.argmin(training_history['val_mae']))
print("Minimum accuracy (NON smoothed) training/validation:", np.round(np.min(training_history['mse']),4), np.round(np.min(training_history['val_mse']),4), "at indexes", np.argmin(training_history['mse']), np.argmin(training_history['val_mse']))


In [None]:
# LSTM: final model and predictions

import random as rn
np.random.seed(42)
rn.seed(42)


to_delete = [x for x in results['method'] if 'lstm' in x]
results = results[np.logical_not(results['method'].isin(to_delete))]
results.reset_index(drop=True, inplace=True)


ref_sensors_here = ['raspihat08','raspihat09','raspihat01','raspihat04']

eval_sensors = sorted(list(set(all_sensors) - set(ref_sensors_here)))

# Data that is common for each fold (training)
common_data_training = np.full((training_data_ts, 4), -111.)
# Temporal information
common_data_training[:, 0] = all_training_data[all_training_data['node'] == 'raspihat01']['moy_sin']
# common_data_training[:, 1] = all_training_data[all_training_data['node'] == 'raspihat01']['moy_cos']
common_data_training[:, 1] = all_training_data[all_training_data['node'] == 'raspihat01']['dow_sin']
# common_data_training[:, 3] = all_training_data[all_training_data['node'] == 'raspihat01']['dow_cos']
common_data_training[:, 2] = all_training_data[all_training_data['node'] == 'raspihat01']['seconds_from_midnight_sin']
common_data_training[:, 3] = all_training_data[all_training_data['node'] == 'raspihat01']['seconds_from_midnight_cos']


# Data that is common for each fold (test)
common_data_test = np.full((test_data_ts, 4), -111.)
# Temporal information
common_data_test[:, 0] = all_test_data[all_test_data['node'] == 'raspihat01']['moy_sin']
# common_data_test[:, 1] = all_test_data[all_test_data['node'] == 'raspihat01']['moy_cos']
common_data_test[:, 1] = all_test_data[all_test_data['node'] == 'raspihat01']['dow_sin']
# common_data_test[:, 3] = all_test_data[all_test_data['node'] == 'raspihat01']['dow_cos']
common_data_test[:, 2] = all_test_data[all_test_data['node'] == 'raspihat01']['seconds_from_midnight_sin']
common_data_test[:, 3] = all_test_data[all_test_data['node'] == 'raspihat01']['seconds_from_midnight_cos']

    
    
fold_sensors = sorted(eval_sensors)
fold_data_training = all_training_data[all_training_data['node'].isin(fold_sensors)]

X_train_data_fold = np.full((training_data_ts*len(fold_sensors), 7), -111.)
    
# Copy the common data
X_train_data_fold[:, 0:4] = array_rep(common_data_training, len(fold_sensors))

# # Distances from the reference sensors
# for i, ref_sensor in enumerate(ref_sensors_here):
#     X_coord_ref, Y_coord_ref = node_map_coords[ref_sensor]
#     X_train_data_fold[:, 6+i] = eucl_dist(X_coord_ref, Y_coord_ref, fold_data_training['coord_x'], fold_data_training['coord_y'])
    
# # Mutual center distances
# for i, ref_sensor in enumerate(ref_sensors_here):
#     X_coord_ref, Y_coord_ref = node_map_coords[ref_sensor]
#     ref_center_distance = eucl_dist(X_coord_ref, Y_coord_ref, 5, 5)
#     center_distance = eucl_dist(fold_data_training['coord_x'], fold_data_training['coord_y'], 5, 5)   
#     center_distance_similarity = abs(ref_center_distance - center_distance)
#     X_train_data_fold[:, 9+i] = center_distance_similarity
    
# Min window distance    
# X_train_data_fold[:, 4] = np.min([eucl_dist(fold_data_training['coord_x'], fold_data_training['coord_y'], w_x, w_y) for (w_x, w_y) in window_coords], axis=0)   

# X and Y coord
X_train_data_fold[:, 4] = fold_data_training['coord_x']
X_train_data_fold[:, 5] = fold_data_training['coord_y']

# Weighted distance
for i, ref_sensor in enumerate(ref_sensors_here[0:1]):
    X_coord_ref, Y_coord_ref = node_map_coords[ref_sensor]
    ref_points = [(X_coord_ref, Y_coord_ref)]
    X_coord, Y_coord = fold_data_training['coord_x'].values, fold_data_training['coord_y'].values
    sens_points = list(zip(X_coord,Y_coord))
    X_train_data_fold[:, 6+i] = gp_distance(ref_points, sens_points)


assert np.min(X_train_data_fold) > -111.




# Now the temporal data (training)
history_timesteps = 17 # so, a total of 18 values, 17 historical and 1 current

X_data_temporal_aux = np.full((training_data_ts, history_timesteps+1, 4), -111.)

ref_temps = np.full((training_data_ts, 4), -1.)
ref_temps[:, 0] = all_training_data[all_training_data['node'] == 'raspihat08']['temperature'].values
ref_temps[:, 1] = all_training_data[all_training_data['node'] == 'raspihat09']['temperature'].values
ref_temps[:, 2] = all_training_data[all_training_data['node'] == 'raspihat01']['temperature'].values
ref_temps[:, 3] = all_training_data[all_training_data['node'] == 'raspihat04']['temperature'].values

for row in range(training_data_ts):
    for col_idx in range(4):
        history = ref_temps[max(0,row-history_timesteps):row+1, col_idx]

        history = np.pad(history, (history_timesteps+1-history.shape[0], 0), 
                         'constant', constant_values=(-1, -1))

        X_data_temporal_aux[row, :, col_idx] =  history

X_data_temporal_training = array_rep(X_data_temporal_aux, len(eval_sensors))

X_data_temporal_training[X_data_temporal_training == -1.] = np.nan



# Now the temporal data (test)
history_timesteps = 17 # so, a total of 18 values, 17 historical and 1 current

X_data_temporal_aux = np.full((test_data_ts, history_timesteps+1, 4), -111.)

ref_temps = np.full((test_data_ts, 4), -1.)
ref_temps[:, 0] = all_test_data[all_test_data['node'] == 'raspihat08']['temperature'].values
ref_temps[:, 1] = all_test_data[all_test_data['node'] == 'raspihat09']['temperature'].values
ref_temps[:, 2] = all_test_data[all_test_data['node'] == 'raspihat01']['temperature'].values
ref_temps[:, 3] = all_test_data[all_test_data['node'] == 'raspihat04']['temperature'].values

for row in range(test_data_ts):
    for col_idx in range(4):
        history = ref_temps[max(0,row-history_timesteps):row+1, col_idx]

        history = np.pad(history, (history_timesteps+1-history.shape[0], 0), 
                         'constant', constant_values=(-1, -1))

        X_data_temporal_aux[row, :, col_idx] =  history

X_data_temporal_test = X_data_temporal_aux

X_data_temporal_test[X_data_temporal_test == -1.] = np.nan





# Normalizing predictors
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
scaler.fit(X_train_data_fold)
X_train_data_fold=scaler.transform(X_train_data_fold)

X_data_temporal_training_min = np.nanmin(X_data_temporal_training, axis=(0, 1), keepdims=True)
X_data_temporal_training_max = np.nanmax(X_data_temporal_training, axis=(0, 1), keepdims=True)

X_data_temporal_training = np.nan_to_num((X_data_temporal_training - X_data_temporal_training_min) / (X_data_temporal_training_max - X_data_temporal_training_min), nan=-1.)
X_data_temporal_test = np.nan_to_num((X_data_temporal_test - X_data_temporal_training_min) / (X_data_temporal_training_max - X_data_temporal_training_min), nan=-1.)





# Train the model

trainset = torch.utils.data.TensorDataset(torch.from_numpy(X_data_temporal_training.astype('float32')), # temporal
                                          torch.from_numpy(X_train_data_fold.astype('float32')), # atemporal  
                                          torch.from_numpy(fold_data_training['temperature'].values.astype('float32').reshape(-1, 1)))

import random as rn
np.random.seed(42)
rn.seed(42)
import os
os.environ['PYTHONHASHSEED'] = str(42)


EPOCHS = 90

model = reinit_model() # reinitialize the model and optimizer before training

def _mp_fn(rank, flags):
    torch.set_default_tensor_type('torch.FloatTensor')
    _run(model, EPOCHS, 'dummy', training_data_in=trainset, validation_data_in=None)
    if rank == 0:
        time.sleep(2)
    
FLAGS={}
xmp.spawn(_mp_fn, args=(FLAGS,), nprocs=NUM_TPU_CORES, start_method='fork')



# Now I evaluate on each sensor separately
pbar = tqdm(total=len(eval_sensors), desc="FOLDS done")
for sens_index, sensor in enumerate(eval_sensors):
    fold_data_test = all_test_data[all_test_data['node'] == sensor]
    
    X_test_data_fold = np.full((test_data_ts, 7), -111.)
    
    # Copy the common data
    X_test_data_fold[:, 0:4] = common_data_test
    
#     # Distances from the reference sensors
#     for i, ref_sensor in enumerate(ref_sensors_here):
#         X_coord_ref, Y_coord_ref = node_map_coords[ref_sensor]        
#         X_test_data_fold[:, 6+i] = eucl_dist(X_coord_ref, Y_coord_ref, fold_data_test['coord_x'], fold_data_test['coord_y'])
    
#     # Mutual center distances
#     for i, ref_sensor in enumerate(ref_sensors_here):
#         X_coord_ref, Y_coord_ref = node_map_coords[ref_sensor]
#         ref_center_distance = eucl_dist(X_coord_ref, Y_coord_ref, 5, 5)        
#         center_distance = eucl_dist(fold_data_test['coord_x'], fold_data_test['coord_y'], 5, 5)
#         center_distance_similarity = abs(ref_center_distance - center_distance)
#         X_test_data_fold[:, 9+i] = center_distance_similarity 
  
    # Min window distance
#     X_test_data_fold[:, 4] = np.min([eucl_dist(fold_data_test['coord_x'], fold_data_test['coord_y'], w_x, w_y) for (w_x, w_y) in window_coords], axis=0)

    # X and Y coord
    X_test_data_fold[:, 4] = fold_data_test['coord_x']   
    X_test_data_fold[:, 5] = fold_data_test['coord_y']   
    
    # Weighted distance
    for i, ref_sensor in enumerate(ref_sensors_here[0:1]):
        X_coord_ref, Y_coord_ref = node_map_coords[ref_sensor]
        ref_points = [(X_coord_ref, Y_coord_ref)]
        X_coord, Y_coord = fold_data_test['coord_x'].values, fold_data_test['coord_y'].values
        sens_points = list(zip(X_coord,Y_coord))
        X_test_data_fold[:, 6+i] = gp_distance(ref_points, sens_points)

    
    assert np.min(X_test_data_fold) > -111.

    # Normalizing predictors
    X_test_data_fold=scaler.transform(X_test_data_fold)
    
    
    # Predict the values, and evaluate the errors
    
    testset = torch.utils.data.TensorDataset(torch.from_numpy(X_data_temporal_test.astype('float32')), # temporal
                                             torch.from_numpy(X_test_data_fold.astype('float32'))) # atemporal  
    
    fold_preds = predict_(model, testset).flatten()

    
    errors = abs(fold_data_test['temperature'].values - fold_preds)
    
    median_error = np.percentile(errors, 50)
    p95_error = np.percentile(errors, 95)
    results.loc[len(results)] = ['lstm_' + str(history_timesteps), sensor, median_error, p95_error]
    
    pbar.update(1)

pbar.close()
    
results.sort_values(by=['method', 'fold'], inplace=True)

pickle_out = open('results_nnets.pickle',"wb")
pickle.dump(results, pickle_out, protocol=pickle.HIGHEST_PROTOCOL)
pickle_out.close()    

In [None]:
perc = 50

print(np.percentile(results['95_perc_error'], perc))


import pickle
pickle_file = open('results.pickle', 'rb')
res_other = pickle.load(pickle_file)
pickle_file.close()
print(np.percentile(res_other[res_other['method'] == 'xgboost']['95_perc_error'], perc))

prova = pd.concat([results, res_other])


plt.figure(figsize=(15,10))
res_list = []
for method in np.unique(prova['method']):
    res_list.append(prova[prova['method'] == method]['95_perc_error'].values)
plt.boxplot(res_list)
plt.title("95th percentile error per method")
plt.ylabel("Error (temperature)")
plt.xlabel("Method")
plt.xticks(np.asarray(list(range(len(np.unique(prova['method']))))) + 1, np.unique(prova['method']), rotation=90)
plt.show()