# 9. System-wide tests

*Made by David Serrano for his Final Bachelor Thesis*

This file tests the entire system with unseen sequences.

## 9.1. Environment preparation

In [None]:
# Useful general imports
from google.colab import drive

In [None]:
# mount the google drive folder to access to all the files in there
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
# Add the folder called utils that has all the custom modules in .py files
import sys
sys.path.append('/content/drive/MyDrive/TFG/utils')

In [None]:
from paths import PROJECT_ROOT, HDF5_FOLDER, JSON_FOLDER, USABLE_SEQUENCES, MAX_SQUARE_LEN

In [None]:
# Example Sequence Selected
EXAMPLE_FILE = '20181002.047_AEQ31'

In [None]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

## 9.2. Obtaining of the tracks

In [None]:
from hdf5Loader import hdf5Loader

print('The entire system is going to be tested with a unlabeled sequence')
print('\tFilename: {}'.format(EXAMPLE_FILE))
loader = hdf5Loader(HDF5_FOLDER + EXAMPLE_FILE + '.h5')
print('\tNum of frames: {}'.format(loader.nFrames()))

The entire system is going to be tested with a unlabeled sequence
	Filename: 20181002.047_AEQ31
	Num of frames: 593


In [None]:
# custom imports
from archive import getTimestamps
from sequenceAnalyzer import sequenceAnalyzer
from os import walk
from image_tools import showImage

import time

for (dirpath, dirnames, filenames) in walk(HDF5_FOLDER):
    for file in filenames:
        # if file.endswith('.h5'):
        if file == (EXAMPLE_FILE + '.h5'): 

            t = time.time()

            sourceFile =  HDF5_FOLDER + file

            # Get the timestamps
            [T1, T4, T4e] = getTimestamps(file)

            # Create sequenceAnalyzer object
            sequenceProcessor = sequenceAnalyzer(sourceFile, T1, T4, T4e)

            print('Analyzing video sequence...')
            json_path = PROJECT_ROOT + '/data/experiment/'
            trackerObject_path = PROJECT_ROOT + '/data/experiment/'
            classify = False
            image = sequenceProcessor.processSequence(jsonPath=json_path,
                                              modelPath=trackerObject_path,
                                              showBinarized = True,
                                              trackDetections = True,
                                              classifyDetections= classify)
                     
            elapsed = time.time() - t

            print('Elapsed time: {}m {}s'.format(elapsed//60, elapsed%60))



Output hidden; open in https://colab.research.google.com to view.

## 9.3. Obtaining of the features

In [None]:
from JSONLoader import loadData
all_detections = []
for (dirpath, dirnames, filenames) in walk(PROJECT_ROOT + '/data/experiment/Frames'):
    for filename in filenames:
        frame_detections = loadData(PROJECT_ROOT + '/data/experiment/Frames/' + filename)
        for d in frame_detections:
            all_detections.append(d)

# ------------------------------------------------------------------------------

maxTrackID = max(d[1] for d in all_detections)

sortedDetections = []

for trackID in range(maxTrackID + 1):
    trackDetections = []
    for d in all_detections:
        if d[1] == trackID:
            trackDetections.append(d)

    trackDetections = sorted(trackDetections, key=lambda x: x[0])   
    for td in trackDetections:
        sortedDetections.append(td)

# ------------------------------------------------------------------------------

from JSONLoader import saveData

saveData(PROJECT_ROOT + '/data/experiment/' + EXAMPLE_FILE + '.h5.json', sortedDetections)

## 9.4. Training

In [None]:
modelName = 'ResNet50_epoch5_dataAugPixRange5.pt'

In [None]:
from torchvision.models import resnet50
import torch.nn as nn
import torch
from torch.utils.data import DataLoader
import time
from tqdm.notebook import tqdm

# Custom Imports
from dataPrep import detectionLoader, hdf5Initializer
from dataStructures import Dataset
from hdf5Loader import hdf5Loader

# Load the model
model = resnet50(pretrained=False)
num_ftrs = model.fc.in_features
model.fc = nn.Sequential(
    nn.Linear(num_ftrs, 128),
    nn.ReLU(),
    nn.Linear(128,3)
)

model.load_state_dict(torch.load(PROJECT_ROOT + '/models/' + modelName,
                                    map_location=torch.device('cpu')))

# Delete the final two layers of the model (ReLU and Linnear(128,3))
model.fc = nn.Sequential(*[model.fc[0]])
print('The fully connected layer of the model is:\n', model.fc)

# Initialize the hdf5Loaders and create a Dataloader for each sequence

print('Creating a DataLoader for the sequence file...')

loader = hdf5Loader(HDF5_FOLDER + EXAMPLE_FILE + '.h5')
detections, labels = detectionLoader([EXAMPLE_FILE], tracks=False)
n=5

dataset = Dataset([loader], labels, detections, n, dataAug=False)
dataloader = DataLoader(dataset=dataset,
                        batch_size=1,
                        num_workers=1)

# Extract the features 
print('Extracting the features of all the sequence...')
since = time.time()

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model.eval()    # Set model to evaluation mode

X = []  # Features matrix
y = []  # Label list

print('\tLoading {} with {} batches...'.format(EXAMPLE_FILE,
                                                len(dataloader)))

# Iterate over data
loop = tqdm(dataloader, unit=' batches', leave=False)

for inputs, labels in loop:
    
    inputs = inputs[0,:,:,:,:]
    inputs = inputs.to(device)
    labels = labels.to(device)

    outputs = model(inputs)

    features_i = torch.flatten(outputs)
    features_i = features_i.tolist()
    labels = labels.tolist()

    X.append(features_i)

elapsed = time.time() - since
print('Elapsed time extracting the features: {:.0f}m {:.0f}s'.format(elapsed // 60,
                                                                        elapsed % 60))

The fully connected layer of the model is:
 Sequential(
  (0): Linear(in_features=2048, out_features=128, bias=True)
)
Creating a DataLoader for the sequence file...
Extracting the features of all the sequence...
	Loading 20181002.047_AEQ31 with 84 batches...


HBox(children=(FloatProgress(value=0.0, max=84.0), HTML(value='')))

Elapsed time extracting the features: 1m 9s


## 9.5. Classification

In [None]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler, MaxAbsScaler

def normalizeData(X, scaler='Standard', range=(0,1)):
    """
    Function to scale features with different parameters
    :param X: feature matrix
    :param scaler: type of scaler ('Standard', 'MaxAbs', 'MinMax')
    :return X_norm: feature matrix normalized
    """

    X_norm = []

    if scaler=='Standard':
        print('Scaling each feature by removing the mean and scaling to unit variance')
        scaler = StandardScaler()
        scaler.fit(X)
        X_norm = scaler.transform(X)

    if scaler=='MaxAbs':
        print('Scaling each feature by its maximum absoulute value.')
        scaler = MaxAbsScaler()
        scaler.fit(X)
        X_norm = scaler.transform(X)

    if scaler=='MinMax':
        print('Normalizing the input data such that the min and max value are', range)
        scaler = MinMaxScaler(feature_range=range)
        scaler.fit(X)
        X_norm = scaler.transform(X)
        
    return X_norm

In [None]:
from sklearn.externals import joblib
from JSONLoader import loadData, saveData
import numpy as np

model = joblib.load(PROJECT_ROOT + '/models/XGBoostClass.pkl')

X = normalizeData(X, scaler='Standard', range=(0,1))
pred = model.predict(X)

print('\nFrom the sequence {}, the following events have been predicted:'.format(EXAMPLE_FILE))
print('NHS: ', np.count_nonzero(pred==0))
print('HS: ', np.count_nonzero(pred==1))
print('AN: ', np.count_nonzero(pred==2))

print('\nSaving the predicted labels...')

unlabeledDetections = loadData(PROJECT_ROOT + '/data/experiment/' + EXAMPLE_FILE + '.h5.json')

predictedDetections = []
for uD in unlabeledDetections:
    trackID = uD[1]
    predictedDetections.append([uD[0], uD[1], uD[2], uD[3], uD[4], uD[5], int(pred[trackID])])

saveData(PROJECT_ROOT + '/data/experiment/' + EXAMPLE_FILE + 'pred.h5.json', predictedDetections)

Scaling each feature by removing the mean and scaling to unit variance

From the sequence 20181002.047_AEQ31, the following events have been predicted:
NHS:  79
HS:  4
AN:  1

Saving the predicted labels...
