# 6.Feature Extraction

*Made by David Serrano for his Final Bachelor Thesis*

This file extracts the features of the fully trained ResNet50

## 6.1. Environment Preparation


In [None]:
# Useful general imports
from google.colab import drive 

In [None]:
# mount the google drive folder to access to all the files in there
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
# Add the folder called utils that has all the custom modules in .py files
import sys
sys.path.append('/content/drive/MyDrive/TFG/utils')

In [None]:
from paths import PROJECT_ROOT, HDF5_FOLDER, JSON_FOLDER, USABLE_SEQUENCES, MAX_SQUARE_LEN



---




## 6.2. Load the Feature Extractor

In [None]:
from os import walk
from torchvision.models import resnet50
import torch
import torch.nn as nn

# Take the model with the best accuracy
# bestAcc = 0
# MODELS_FOLDER = PROJECT_ROOT + '/models/'
# for (dirpath, dirnames, filenames) in walk(MODELS_FOLDER):
#     for filename in filenames:
#         if filename[-3:] == '.pt' and float(filename[-9:-3]) > bestAcc:
#             bestAcc = float(filename[-9:-3])
#             modelName = filename

MODELS_FOLDER = PROJECT_ROOT + '/models/'
modelName = 'ResNet50_epoch5_noDataAug.pt'

# Load the model
model = resnet50(pretrained=False)
num_ftrs = model.fc.in_features
model.fc = nn.Sequential(
    nn.Linear(num_ftrs, 128),
    nn.ReLU(),
    nn.Linear(128, 3)
)
model.load_state_dict(torch.load(MODELS_FOLDER + modelName,
                                 map_location=torch.device('cpu')))

<All keys matched successfully>

In [None]:
# Delete the final two layers of the model (ReLU and Linnear(128, 3))
model.fc = nn.Sequential(*[model.fc[0]])
model.fc

Sequential(
  (0): Linear(in_features=2048, out_features=128, bias=True)
)

In [None]:
from torch.utils.data import DataLoader
# Custom imports
from dataPrep import detectionLoader, hdf5Initializer
from dataStructures import Dataset


# Create a list of datasets. Each dataset is only for one sequence
dataloaders = []
for idx, seq in enumerate(USABLE_SEQUENCES):
    loaders = [None] * len(USABLE_SEQUENCES)
    loaders[idx] = hdf5Initializer([seq])[0]
    detections, labels = detectionLoader([seq], tracks=False)
    n=5

    dataset = Dataset(loaders, labels, detections, n)
    dataloaders.append(DataLoader(dataset=dataset, batch_size=1, num_workers=1))

## 6.3.Extract the Features

In the following block, the function passes all the detections through the loaded CNN and extract the logits of the last layer. Then, there extist 2 possibilities:

* Concatenate the features. The features of the *n* frames of the same track are concatenated obtaining a total number of *n*x128 features per track, aside from the label. (if *n*=5: 640 features)

* Average the features. The features of the *n* frames of the same track are averaged obtaining a total number of 128 features per track, aside from the label.

In [None]:
import time
from tqdm.notebook import tqdm

from JSONLoader import saveData

since = time.time()

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

X = []
y = []

# Set model to evaluation mode
model.eval()

for idx, dataloader in enumerate(dataloaders):
    features_con = []
    features_avg = []
    print('Loading {} with {} tracks'.format(USABLE_SEQUENCES[idx],
                                            len(dataloader)))

    # Iterate over data
    for inputs, labels in dataloader:
        
        inputs = inputs[0,:,:,:,:]
        inputs = inputs.to(device)
        labels = labels.to(device)

        outputs = model(inputs)

        # Create the concatenated features
        features_con_i = torch.cat((torch.flatten(outputs), labels))
        features_con_i = features_con_i.tolist()                  
        features_con.append(features_con_i)

        # # Create the averaged features
        features_avg_i = torch.cat((torch.mean(outputs, 0), labels))
        features_avg_i = features_avg_i.tolist()
        features_avg.append(features_avg_i)

    path = PROJECT_ROOT + '/data/Features/concatenatedFeatures/con/' + USABLE_SEQUENCES[idx] +'.h5.json'
    saveData(path, features_con)
    
    path = PROJECT_ROOT + '/data/Features/averageFeatures/avg/' + USABLE_SEQUENCES[idx] +'.h5.json'
    saveData(path, features_avg)

elapsed = time.time() - since
print('Elapsed time extracting the features {:.0f}m {:.0f}s'.format(elapsed // 60,
                                                        elapsed % 60))

Loading 20171207.039_AEQ11 with 52 tracks
Loading 20171207.043_AEQ11 with 77 tracks
Loading 20180918.036_AEQ50 with 67 tracks
Loading 20180918.038_AEQ50 with 70 tracks
Loading 20180918.040_AEQ50 with 88 tracks
Loading 20180919.007_AEQ40 with 63 tracks
Loading 20180920.034_AEQ11 with 70 tracks
Loading 20181002.028_AEQ20 with 45 tracks
Loading 20181004.038_AEQ10 with 78 tracks
Loading 20181004.038_AEQ20 with 69 tracks
Loading 20181004.038_AEQ40 with 79 tracks
Loading 20181004.046_AEQ20 with 66 tracks
Loading 20181004.046_AEQ40 with 62 tracks
Loading 20181004.046_AEQ50 with 65 tracks
Elapsed time extracting the features 28m 25s
