In [None]:
import socket
import pandas as pd
import numpy as np
import tensorflow as tf
import tensorflow.keras as Keras
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

# Introducing the keras [timeseries data set from array function](https://keras.io/api/preprocessing/timeseries)

## A simple example that fits on-screen
To illustrate this lets make a simple data set with four features and 10 time steps.
We'll assume:
- the first feature is 1,2,...10, 
- the second is 10,20,... and so on

In [None]:
dataset = np.zeros([10,4],dtype='int32')
for row in range(10):
    dataset[row][0] = row
    dataset[row][1] = row*10
    dataset[row][2] = row*100
    dataset[row][3] = row*1000
dataset

Now to make it easy to visualise on screen lets say:
- we are using a window of size 3,
- we are trying to predict the first column, one day in advance
- i.e. at time _t_ we give the model x[t], x[t-1], and x[t-2] andf we want to predict x[t+1][0] 

and use a Keras built in function to give us a tensorflow dataset object

In [None]:
window_size = 3
timeseries = Keras.utils.timeseries_dataset_from_array(
    dataset,
    sequence_length=window_size, # lets take sequences of length 3
    targets=dataset[window_size:,0],# we want to predict feature[0] on the next input after our sequence
    sequence_stride=1,
    sampling_rate=1,
    batch_size=1,
    shuffle=False,
    seed=None,
    start_index=None,
    end_index=None,
)

## What's in a dataset?
Lots of stuff and lots of functionality!

Datasets are designed to be used as part of a production pipeline.  
So instead of getting access via indexes (lie you would for pandas or numpy), they
provide access via iterators  such as _batch()_ or _take()_

For now, it's easiest to look at what this dataset object contains if we convert it to a list

In [None]:
list(timeseries)

### Yuk!
We can just about see, if we squint a lot, that this contains 7 pairs of items.
- there are 7 because that is how many length 3 sequences you can get from 10 items

Each item  has:
- a tensor of shape (batchsize, sequence length, num_features)
- a scalar value (again wrapped up inside a tensor) 
  - because we only asked to predict 1 feature (the one at index 0)
  

### For you to experiment
Try  asking for different length sequences (line 1) or different size batches (line 8), or for more than one feature as a label(line 5) and re-running the two code cells above and make sure you understand what you are getting

## But a tensorflow dataset does have some advantages
- for example we can ask it batch up the data
- and if we pass it to tensorflow preprocessing layers or a model's fit() method they will do that

So let's ask our timeseries to give us a load of batches

Note that batch() or take() give us the outputs of type batchdataset so we have to iterate over their contents using
````
    for item in timeseries.batch(batch_size=1):
    ````
    
instead of using slices like we would for numpy arrays or pandas dataframes

In [None]:
print('First batches of size 1')
for item in timeseries.batch(batch_size=1):
    print(f'{item[:-1]} : {item[-1]}')
    

In [None]:
print('Now batches of size 2')
x2= timeseries.batch(batch_size=2)
for item in x2:
    print(f'x= {item[:-1]} \n y= {item[-1]}')
print('The last batch only has one thing in, because there are only 7 sequences of length 3 in 0...9') 

## Finally, this is one way to convert one of these items back to numpy using a lambda function

Lets take the first batch as an example and turn it into a 'windowed' row of size (sequence length *number of features). and a scalar (the label)


In [None]:
def sequence_to_flat(item):
    # get size of array to hold one windowed row
    item_shape= item[0].shape
    num_windowed_features= item_shape[-2]*item_shape[-1]
    X= tf.reshape(item[0],num_windowed_features)
    y = tf.reshape(item[1],1)
    return(X,y)

my_iterator = iter(timeseries)
first_item = my_iterator.get_next()
X,y = sequence_to_flat(first_item) 
print(f'when flattened the first item is:\n {X} : {y}')
X,y = sequence_to_flat(my_iterator.get_next())
print(f'when flattened the next item is;\n {X} : {y}')


# Comparing three different neural architectures for a time-series prediction problem

## Data set description and characteristics
Delhi data from [here](https://www.kaggle.com/datasets/sumanthvrao/daily-climate-time-series-data)

4 variables: temp, humidity, windspeed, pressure
Training set: 

In [None]:
if (socket.gethostname()=='csctcloud'): #on csctcloud
    datapath="/home/common/datasets"
elif (socket.gethostname()[0:7]=='jupyter'): #on csctcloud
    datapath="~/shared/datasets"
else: #machine specific- this is for jim's development
    datapath = "../datasets"
reldirname= datapath +"/delhi/"
train_raw = pd.read_csv(reldirname +"DailyDelhiClimateTrain.csv")
test_raw =  pd.read_csv(reldirname +"DailyDelhiClimateTest.csv")

print (f'training data set has {train_raw.shape[0]} '
       f'rows and {train_raw.shape[1]} features\n'
       f' it has {train_raw.isna().sum().sum()} nulls\n'
      f'    text data set has  {test_raw.shape[0]} '
       f'rows and {test_raw.shape[1]} features\n'
       f' it has {train_raw.isna().sum().sum()} nulls\n'
      )
print(f'column names are {train_raw.columns}')
train_raw.describe()

## Fixing the outliers
What stands out immediately is that there are some wierd outliers in the meanPressure column.
If it turns out that these are isolated odd readings we will replace them by the 50th centile value (~the mode)

In [None]:
median = train_raw['meanpressure'].median()
outliers=abs(train_raw['meanpressure'] - 1000) >200
train_raw['meanpressure'][outliers] = np.nan
train_raw['meanpressure'].fillna(median, inplace=True)
train_raw.describe()

## For now just predict the next day's temperature. 

In [None]:
print(train_raw.head())

#select the mean temp to be the y value
#and copy with the date value
train_y = train_raw[['date','meantemp']]
train_y.set_index('date', inplace=True)
#shift works nicely if the index is a datetime object
train_y =train_y.shift(periods=1)
#fill NaN in first rowwith something sensible
train_y.iloc[0] = train_raw['meantemp'].mean()
print(train_y.head())




## Getting the data ready for ML with Keras 

Start by making a time series dataset from the train and test data we loaded
with the aim of predicting the next day's temperature.   
- for simplicity we'll drop the date columns and change everything to numpy arrays
- we will leave out all the parameters where the default settings are fine
- we'll take a window size of 7 days in case there are weekly effects



In [None]:
if 'date' in train_raw.columns:
    train_raw=train_raw.drop(columns=['date'])
if 'date' in test_raw.columns:
    test_raw=test_raw.drop(columns=['date'])
train=train_raw.to_numpy()
test = test_raw.to_numpy()

print(f'train and test shape {train.shape} , {test.shape}')

### next we'll apply a standard scale to transform values 

In [None]:
scaler = StandardScaler()
train=scaler.fit_transform(train)
test=scaler.transform(test)

## Some common values to use across all experiments
- some e.g. epochs might want tuning - or adapting if you put in early stopping

In [None]:
window_size=7
epochs=15
larger_batch_size=32
num_nodes=32

## Finally lets make the basic timeseries datasets
and afterwards print out the shape of each batch

In [None]:
train_series = Keras.utils.timeseries_dataset_from_array(
    train,
    sequence_length=window_size, 
    targets=train[window_size:,0],
    batch_size=1
    )

test_series = Keras.utils.timeseries_dataset_from_array(
    test,
    sequence_length=window_size, 
    targets=test[window_size:,0],
    batch_size=1
    )

In [None]:
iterator= iter(test_series)
print(f'shape of batches from basic datasets is {list( iterator.get_next())[0].shape}')

# Algorithm 1: a MLP with a time window.

## Preprocess data
For this case we can 'flatten' each X item the timeseries dataset from a windowsizex4 array into a 12x1

- I've been a bit lazy and worked out the flat size in advance
- and i've just printed out the first item from the dataset   
  to illustrate the dataset.take() method

In [None]:
flatsize = window_size* train.shape[1] 

def sequence_to_flat2(X,y):
    return(tf.reshape(X,[1,flatsize]),y)

In [None]:
flattened_train = train_series.map(sequence_to_flat2)
flattened_test = test_series.map(sequence_to_flat2)

In [None]:
first_item= flattened_train.take(1)
print( f'shape of batches in flattened version is now {list(first_item)[0][0].shape}')

### Now lets build a sequential model for our MLP

In [None]:
mlp= Keras.Sequential([
    Keras.layers.Dense(num_nodes,activation='relu'),
    Keras.layers.Dense(1,activation='linear')]
    )
mlp.compile(optimizer='adam', loss='mse')


### Note that we can dynamically change the batch size of the datset
- from 1: which we used for ease of visualisation
- to 32 for speed when fitting


In [None]:
flattened_train32 = flattened_train.batch(batch_size=larger_batch_size)
flattened_test32= flattened_test.batch(batch_size=larger_batch_size)
history = mlp.fit(flattened_train32,epochs=epochs,batch_size=larger_batch_size)

### Let's see how well it did on the training and test data
using a neat bit of code from [stackoverflow](https://stackoverflow.com/questions/56226621/how-to-extract-data-labels-back-from-tensorflow-dataset) to get the y labels back

In [None]:
def evaluate_and_report(model, name,train_ds,test_ds):
    ''' gets the train and test mse error
        for a given model and train/test datasets
        and make a nice plot
        Parameters:
        ==========
        model: trained instance of Keras Sequential or Model class
        name: string to use for reporting
        train_ds: ndarray or Keras dataset
        test_ds: ndarray or keras dataset
        '''
    trainres=f'Training MSE= {model.evaluate(train_ds)}'
    testres=f'Test MSe= {model.evaluate(test_ds)}'

    y_train = np.concatenate([y for x, y in train_ds], axis=0)
    y_train_pred= model.predict(train_ds).reshape(y_train.shape[0])

    y_test = np.concatenate([y for x, y in test_ds], axis=0)
    y_test_pred= model.predict(test_ds).reshape(y_test.shape[0])

    actual = np.concatenate((y_train,y_test))
    predicted= np.concatenate((y_train_pred, y_test_pred))

    fig,ax = plt.subplots(figsize=(15,5))
    ax.set_ylim((-2.5,2.5))
    ax.plot(predicted,label='predicted')
    ax.plot(actual,label="actual")
    ax.axvline(x=y_train.shape[0],color='red')
    ax.set_title(f'MLP results, red line denotes switch from train to test\n{trainres}\n{testres}')
    ax.legend()

In [None]:
evaluate_and_report(mlp,"MLP", flattened_train32, flattened_test32)

# Algorithm 2: A 1-D CNN

## Preprocess the data
The 2D CNN neeed to know the height and width of images in order to optimise its inner loops
- look at my code from week 2 for an example
- or [Keras.layers.Conv2d api](https://keras.io/api/layers/convolution_layers/convolution2d/)

Similarly the 1D CNN layer needs to have a fixed size number of timesteps (sequences) to work with
- not necessarily the same as the size of the filters (usually bigger)
- but  it needs to know the size of the loop to run it's filters over


So we can re-use the code we wrote to create the datasets 
- we'll change  the length of sequences we convolve over to 20 so convolution has something to work with
- but we should consider the number of timesteps in a sequence as a hyper-paramter to optimise


In [None]:


cnn_train = train_series.batch(larger_batch_size,drop_remainder=True)

cnn_test = test_series.batch(larger_batch_size,drop_remainder=True)

In [None]:
batch_shape= list(cnn_test.take(1))[0][0].shape
print(f'shape of batches is {batch_shape}')

## Now specify the 1D CNN architecture
- Let's see how we get on with kernel size 3 (days) : another hyper-parameter to be tuned
- we also need to specify the input shape which is (batch_size,1,sequence_length, num_features)  
  i.e the shape of the batches we just found

   

In [None]:
oneD_cnn = Keras.Sequential(
                           [ Keras.layers.Conv1D(
                                 filters=num_nodes,
                                 kernel_size=5,
                                 batch_input_shape=batch_shape,
                                 activation='relu'
                                 ),
                            Keras.layers.Flatten(),
                            Keras.layers.Dense(1,activation='linear')]
            )
oneD_cnn.compile(optimizer='adam', loss='mse')
oneD_cnn.summary()

In [None]:
history= oneD_cnn.fit(cnn_train, epochs=epochs)

In [None]:
evaluate_and_report(oneD_cnn,"1-D ConvNet",cnn_train,cnn_test)

# Algorithm 3 LSTM Network
- For the LSTMs we will simply use our original dataset, 
- noting that we only have batchsize 1 which is slower, 
  but makes sense if we also preserve state between timesteps
- getting the sequence length right would be a good start for experimentation

### Warning: this is considerably (batch_size X) slower than MLP or CNN  

## Preprocess data

In [None]:
lstm_train=  train_series
lstm_test=  test_series

In [None]:
batch_shape = list(lstm_test.take(1))[0][0].shape
print(f' for the lstm the batch shape is {batch_shape}' )

## Define and train model

In [None]:
lstmnet=Keras.Sequential(
        [Keras.layers.LSTM(units=num_nodes,
                           stateful=True,
                           batch_input_shape=batch_shape),
         Keras.layers.Dense(1)]
)

lstmnet.compile(optimizer='adam',loss='mse')
history=lstmnet.fit(lstm_train,epochs=epochs)


## Evaluate and show results

In [None]:
evaluate_and_report(lstmnet,"LSTM", lstm_train,lstm_test)
