## Imports and Setup - Data prep


In [32]:
%%writefile "ts_utils.py"

# DO NOT EDIT THIS FILE - GENERATED FROM 02_ts_utils.ipynb

import tensorflow as tf
import tensorflow.keras as keras
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import pickle

mpl.rcParams['figure.figsize'] = (14, 4)
mpl.rcParams['axes.grid'] = True

#--------------------------------------------------------------------------------
'''
    Load the data you prepared - you must have run 01_ts_dataprep 
'''
def load_file( file = '../data/jena_climate_2009_2016.csv.zip'):
    df = pd.read_csv(file+".csv")
    df['Date Time'] = pd.to_datetime( df['Date Time'], format='%Y-%m-%d %H:%M:%S' )
    df_scaled_trn   = pd.read_csv(file+".trn.csv")
    df_scaled_tst   = pd.read_csv(file+".tst.csv")
    scaler          = pickle.load(open(f'{file}.scaler.pkl', 'rb'))

    # You can inverse transform predicted value to get original value 
    # pd.DataFrame(scaler.inverse_transform(scaler.transform(df_train)))

    return df, df_scaled_trn, df_scaled_tst, scaler

#--------------------------------------------------------------------------------
'''
    dataset:        must be tf.data.Dataset.from_tensor_slices
    label_slice:    labels (indices or slice(start,end, skip) )
    window_len:     Length of the window
    output_len:     Length of the labels (# of steps to predict)

Usage:
    df = pd.read_csv(file) or [[0,1,2,3], [0,1,2,3], [0,1,2,3], [0,1,2,3], [0,1,2,3]]
    ds = timeseries_dataset_from_dataset(df, 2, 2, slice(0, 2))
    #print_dataset(ds)

'''
def window(dataset, window_len, output_length, label_slice=slice(0,1), batch_size=1, skip = 0 ):
    ds = dataset.window(window_len + skip + output_length, shift=1, drop_remainder=True)
    ds = ds.flat_map(lambda x: x).batch(window_len + skip+ output_length)
     
    def split_feature_label(x):
        return x[:window_len], x[window_len+skip:,label_slice]
     
    ds = ds.map(split_feature_label)

    return ds.batch(batch_size)

#--------------------------------------------------------------------------------
# Compute the Average of the training output and we will use this as default predictions
# Also for computing R-squared value
def compute_avg(window):
    count, total = 0, None;
    for w in window:
        if (not count):
            total = w[1]
        else:
            total += w[1]
        count += 1

    avg_output = total/count
    return avg_output

#--------------------------------------------------------------------------------
'''
    predict the model,
    y:      is the original array of expected 
    yhat:   is the predicted values
'''
def model_predict(model, window, y=None, yhat= None, howmany=1024*1024):
    for w in window.take(howmany):
        xc = w[0]
        yc = w[1]
        yp = model.predict(xc, verbose=0)

        yc = yc[:,-1,:]
        yp = yp[:,-1,:]

        if ( y is None):
            y = yc
            yhat = yp
            continue;
        
        y = np.concatenate([y,yc])
        yhat = np.concatenate([yhat,yp])

    return y, yhat


#--------------------------------------------------------------------------------
# Define inv_transform functions - Note: yh: [batch, time, features length]
def inverse_transform(yh, scaler, label_slice, df=None):
    yy=np.empty([yh.shape[0], scaler.n_features_in_])
    yy[:] = np.nan

    yy[:, label_slice] = yh
    ys = scaler.inverse_transform(yy)

    if (df is not None):
        ys = pd.DataFrame(ys[:, label_slice], columns=df.columns[label_slice])

    return ys    


Overwriting ts_utils.py


In [38]:
%%writefile "ts_plot_utils.py"

# DO NOT EDIT THIS FILE - GENERATED FROM 02_ts_utils.ipynb

import tensorflow as tf
import tensorflow.keras as keras
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import pickle
import ts_utils

#--------------------------------------------------------------------------------
def plot(y, yh, x=None, title="", scaler=None):
    if (scaler):
        y1  = scaler.inverse_transform(y) 
        yh1 = scaler.inverse_transform(yh) 
    else:
        y1, yh1 = y, yh

    x = x or range(max(len(y1),len(yh)))
    
    plt.scatter(x, y1,  marker='.', s=64, edgecolor='k', label="Y")
    plt.scatter(x, yh1, marker='x', s=64, edgecolor='k', label="$\hat{y}$")
    plt.title(title)
    plt.grid(1)
    plt.legend()

#--------------------------------------------------------------------------------
def plotFeatureImportance(weights, labels):
    plt.bar( x = range(len(weights)), height=weights)
    if (labels):
        print(labels)
        axis = plt.gca()
        axis.set_xticks(range(len(labels)))
        axis.tick_params(axis='both', which='major', labelsize=15)

        _ = axis.set_xticklabels(labels, rotation=90)


#--------------------------------------------------------------------------------
def eval_performance(model, trn_dataset, tst_dataset=None, metric_name="loss"):
    en = model.evaluate(trn_dataset)
    if (tst_dataset):
        et = model.evaluate(tst_dataset);
    else:
        et = [0] * len(en)

    mi = max(0, model.metrics_names.index(metric_name))

    return np.array(en).flat[mi], np.array(et).flat[mi]

performance = {}
def plot_performance(models, trn_dataset, tst_dataset=None, metric_name="loss", performance = {}, reeval=0):
    for m in models:
        if (not reeval and performance.get(m.name, None)):
            print(f"Performance for {m.name} exists")
            continue;  # Dont evaluate if performance is already computed

        performance[m.name] = eval_performance(m, trn_dataset, tst_dataset, metric_name)

    if (len(performance) <= 0 ):
        print("No models to plot?")

    x = np.arange(len(performance))
    width = 0.3
    val_mae =  [v[0] for v in performance.values()]
    test_mae = [v[1] for v in performance.values()]

    plt.title(f"Comparisons of '{metric_name}' : ")
    plt.ylabel('Metrics')
    plt.bar(x - 0.17, val_mae, width,  label= f'Training {metric_name}')
    plt.bar(x + 0.17, test_mae, width, label= f'Test {metric_name}')
    plt.xticks(ticks=x, labels=performance.keys(), rotation=45)
    _ = plt.legend()
    
    return performance

#--------------------------------------------------------------------------------
def plot_predictions(ydf, yhatdf, start=0, end=1024*1024, title=""):
    plt.figure(figsize=(14, 4))

    for c in ydf.columns:
        y1, p1 = ydf[c][start:end], yhatdf[c][start:end]
        plt.scatter( y1.index, y1, edgecolors='k', marker='o', label= f'{c}: y',    c='#2ca02c' )
        plt.scatter( p1.index, p1, edgecolors='k', marker='X', label= f'{c}: yhat', c='#ff7f0e')

        plt.title = title
        plt.legend()
        plt.show()


#--------------------------------------------------------------------------------
def predict_and_plot( model, window_trn, window_tst, howmany=1024* 1024,
                        plot_start=0, plot_end=1024*1024, df=None, scaler=None, label_slice=None):
    y, yhat = None, None
    y, yhat = ts_utils.model_predict( model , window_trn,  y, yhat, howmany)
    if (window_tst is not None):
        y, yhat = ts_utils.model_predict( model , window_tst,  y, yhat, howmany)

    if ( df is not None):
        ydf = ts_utils.inverse_transform(y, scaler, label_slice, df)
        pdf = ts_utils.inverse_transform(yhat, scaler, label_slice, df)
    else:
        ydf = pd.DataFrame(y   )
        pdf = pd.DataFrame(yhat)

    plot_predictions(ydf,pdf, plot_start, plot_end, title=f"{model.name}")

    return ydf, pdf

Overwriting ts_plot_utils.py


## Test the utilities

In [30]:
# Test window function

ds = tf.data.Dataset.from_tensor_slices(df_scaled_trn[df_scaled_trn.columns[:4]])
wd = window(ds, 3, 2, slice(1,3), 1,1)
for w in wd.take(3):
    print(f"{w[0].numpy()}\n=>:\n{w[1].numpy()} \n")

Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089
[[[ 0.91819914 -2.01847258 -2.07559971 -1.95168275]
  [ 0.93257019 -2.11680321 -2.17421487 -2.0966161 ]
  [ 0.95891712 -2.10851026 -2.16834492 -2.0805124 ]]]
=>:
[[[-2.20565618 -2.27048205]
  [-2.21039501 -2.27752599]]] 

[[[ 0.93257019 -2.11680321 -2.17421487 -2.0966161 ]
  [ 0.95891712 -2.10851026 -2.16834492 -2.0805124 ]
  [ 0.97688093 -2.13694321 -2.19769467 -2.13321544]]]
=>:
[[[-2.21039501 -2.27752599]
  [-2.15115969 -2.22352245]]] 

[[[ 0.95891712 -2.10851026 -2.16834492 -2.0805124 ]
  [ 0.97688093 -2.13694321 -2.19769467 -2.13321544]
  [ 1.03316755 -2.20565618 -2.27048205 -2.22544575]]]
=>:
[[[-2.15115969 -2.22352245]
  [-2.02439611 -2.10612345]]] 



2023-01-27 13:30:51.495701: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
