<a href="https://colab.research.google.com/github/davide-gurrieri/timeseries-forecasting/blob/main/main.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Settings

In [1]:
COLAB = False
FIRST_RUN = True
PLOT = False
MODEL_NAME = "ConvLSTMDense"

VALIDATION_SET = True
VAL_SPLIT = 0.1

CUT = False # cut the initial timestamps of the timeseries in order to mantain the last N_TIME_STAMPS
N_TIME_STAMPS = 209 # WINDOW + TELESCOPE # number of time stamps to mantain

### Colab

In [2]:
if COLAB:
    if FIRST_RUN:
        ## Clone the private repository in Colab
        TOKEN = "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"
        REPO_URL= "github.com/davide-gurrieri/timeseries-forecasting.git"
        USER_NAME = "davide-gurrieri"
        USER_EMAIL = "gurrieri99@gmail.com"

        !git clone --branch main https://oauth2:$TOKEN@$REPO_URL
        %cd timeseries-forecasting/
        !git remote set-url origin  https://oauth2:$TOKEN@$REPO_URL
        !git config user.name $USER_NAME
        !git config user.email $USER_EMAIL
        %cd ..
        
        # Import the data from the drive
        from google.colab import drive
        drive.mount('/content/drive')
        # Copy the data from the drive to the local repository folder
        %cp "drive/MyDrive/[2023-2024] AN2DL/Homework 2/training_dataset.zip" "timeseries-forecasting/data/"
        # Unzip the data
        !unzip timeseries-forecasting/data/training_dataset.zip -d timeseries-forecasting/data/
        # Remove the zip file
        !rm timeseries-forecasting/data/training_dataset.zip
        %cd timeseries-forecasting/
        
        # Install the requirements
        # !pip install keras-cv
    else:
        %cd timeseries-forecasting/

### Import libraries

In [3]:
import models.ConvLSTMDense as MyModel
from models.ConvLSTMDense import ConvLSTMDense as Constructor

2.14.0
Using TensorFlow backend


In [4]:
from imports import *
from preprocessing_params import *
import utils

### Data processing

In [5]:
data = np.load("data/training_data.npy")
data.shape

(48000, 2776)

In [6]:
data = data.astype(np.float32)

In [7]:
categories = np.load("data/categories.npy")
categories.shape
print("Unique categories:")
print(np.unique(categories))

Unique categories:
['A' 'B' 'C' 'D' 'E' 'F']


In [8]:
valid_periods = np.load("data/valid_periods.npy")
valid_periods.shape
print(valid_periods[0:4,])
print("Min and max start time: ", min(valid_periods[:,0]), max(valid_periods[:,0]))
print("Min and max end time: ", min(valid_periods[:,1]), max(valid_periods[:,1]))

[[2325 2776]
 [2325 2776]
 [2325 2776]
 [2712 2776]]
Min and max start time:  0 2752
Min and max end time:  2776 2776


In [9]:
# Better to save the image and open the pdf to see all the details
if PLOT:
    utils.plot_matrix(data, save=True, show=True)

In [10]:
# Better to save the images and open pdfs to see all the details
if PLOT:
    for category in np.unique(categories):
        utils.plot_matrix(data[categories == category], save=True, show=True, name=category)

In [11]:
# count the number of rows in each category
print("Number of rows for each category:")
for category in np.unique(categories):
    print(category, np.sum(categories == category))

Number of rows for each category:
A 5728
B 10987
C 10017
D 10016
E 10975
F 277


In [12]:
if PLOT:
    utils.plot_time_series(data, categories, category="A", n=5)

In [13]:
# cut the data
if CUT:
    start_time_index = len(data[0]) - N_TIME_STAMPS
    data = data[:,start_time_index:]
    valid_periods = valid_periods - start_time_index
    # set each element of  valid_periods[:,0] to 0 if it is negative
    valid_periods[:,0] = np.maximum(valid_periods[:,0], 0)
    if PLOT:
        utils.plot_matrix(data)
    print(data.shape)

In [14]:
# build sequences
if VALIDATION_SET:
    X_train_raw, X_val_raw, categories_train, categories_val, valid_periods_train, valid_periods_val = train_test_split(data, categories, valid_periods, stratify=categories, test_size=VAL_SPLIT, random_state=SEED)
    X_train, y_train = utils.build_sequences(X_train_raw, valid_periods_train, WINDOW, STRIDE, TELESCOPE)
    X_val, y_val = utils.build_sequences(X_val_raw, valid_periods_val, WINDOW, STRIDE, TELESCOPE)
    print(X_train.shape, y_train.shape, X_val.shape, y_val.shape)
else:
    X_train, y_train = utils.build_sequences(data, valid_periods, WINDOW, STRIDE, TELESCOPE)
    print(X_train.shape, y_train.shape)


(609170, 100, 1) (609170, 9, 1) (68320, 100, 1) (68320, 9, 1)


In [15]:
# inspect_multivariate(X_train, y_train, TELESCOPE, idx=0, n=5)

In [16]:
model_obj = Constructor(MODEL_NAME,
                        MyModel.build_param_1,
                        MyModel.compile_param_1,
                        MyModel.fit_param_1,)

model_obj.build()
model_obj.compile()
model_obj.model.summary()

Model: "ConvLSTMDense"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 Input (InputLayer)          [(None, 100, 1)]          0         
                                                                 
 bidirectional_lstm (Bidire  (None, 100, 128)          33792     
 ctional)                                                        
                                                                 
 conv (Conv1D)               (None, 100, 32)           12320     
                                                                 
 flatten (Flatten)           (None, 3200)              0         
                                                                 
 Output (Dense)              (None, 9)                 28809     
                                                                 
Total params: 74921 (292.66 KB)
Trainable params: 74921 (292.66 KB)
Non-trainable params: 0 (0.00 Byte)
_______________

In [None]:
if VALIDATION_SET:
    model_obj.train_val(X_train, y_train, X_val, y_val)
else:
    model_obj.train(X_train, y_train)

Save the model as `saved_model/MODEL_NAME` and also in the drive (Colab case).

In [17]:
model_obj.save_model()
if COLAB:
    %cd ..
    %cp -r "/content/timeseries-forecasting/saved_models/MODEL_NAME" "drive/MyDrive/"
    %cd plants-classifier/

### Performance evaluation

Plot the training history

In [None]:
if VALIDATION_SET:
    model_obj.plot_history()

Evaluate the model on the validation set

In [None]:
if VALIDATION_SET:
    model_obj.evaluate(X_val, y_val)