In [3]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

import os
import sys
sys.path.insert(0, os.path.dirname('../'))

from data_utils import video_to_frames
from data_utils import metadata_loader
from data_utils.kth_dataset_builder import DatasetBuilder

from models.IMAGENET import Imagenet, Video_Feature_Extractor 
from models.IMAGENET import AVG_Video_Classifier, LSTM_Video_Classifier

# Load Dataset

In [5]:
# Setup builder
video_path = '../data/kth-actions/video'
frame_path = '../data/kth-actions/frame'
builder = DatasetBuilder(video_path, frame_path, img_width=84, img_height=84, ms_per_frame=1000, max_frames=16)

# Convert videos and generate metadata
#builder.convert_videos_to_frames()
metadata = builder.generate_metadata()

# Build datasets
train_ds = builder.make_video_dataset(metadata=metadata['train'])
valid_ds = builder.make_video_dataset(metadata=metadata['valid'])

# Preprocess dataset
IMG_SIZE = 160 # All images will be resized to 160x160
IMG_SHAPE = (IMG_SIZE, IMG_SIZE, 3)

def format_example(image, label):
    image = tf.repeat(image,3,axis=3)   
    image = tf.image.resize(image, (IMG_SIZE, IMG_SIZE))
    return image, label

train_ds = train_ds.map(format_example)
valid_ds = train_ds.map(format_example)

# Print
for x, lab in train_dataset.take(1):
    print(x.shape, lab.shape)
train_dataset

NameError: name 'train_dataset' is not defined

# Transfer learning 
### For videos
Below we show to ways how to do transfer learning based on a pretrained base model.
The only part that should be changed is the one comming after video_fature_extractor. Below we show to ways how one can use an RNN(LSTM) or a simple MLP to do the job.

### For images
If we want to train with frames as input there is no feature_extractor necessary. We can put a classifier directly on top of the base model.
In order to see how we do fine tuning chacke the **Transfer_learning.ipynb**

## 1)RNN(LSTM) based classifier with Inception backbone

In [70]:
# Base model (returns pretrained frozen base model trained on Imagenet)
inception = IMAGENET.Imagenet(input_shape=IMG_SHAPE, name='inception')

# Feature Extractor (Has output (NR_FRAME x D) where D is feature dimension)
featuer_ex = IMAGENET.Video_Feature_Extractor(inception)

# LSTM Clasifier
model = IMAGENET.LSTM_Video_Classifier(features=featuer_ex1, class_nr=6, optimizer=RMSprop(lr=0.0001))

Model: "sequential_26"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
time_distributed_13 (TimeDis (None, None, 1280)        2257984   
_________________________________________________________________
dense_35 (Dense)             (None, None, 128)         163968    
_________________________________________________________________
lstm_13 (LSTM)               (None, 128)               131584    
_________________________________________________________________
dense_37 (Dense)             (None, 6)                 774       
Total params: 2,554,310
Trainable params: 296,326
Non-trainable params: 2,257,984
_________________________________________________________________
(5, 6)


In [None]:
model.fit(train_ds.shuffle(100).batch(5), validation_data=valid_ds.batch(5), epochs=5)

In [62]:
model.evaluate(train_ds.batch(5))



[1.8776769638061523, 0.1666666716337204]

## 2)MLP classifier with Inception backbone

In [15]:
# Base model (returns pretrained frozen base model trained on Imagenet)
inception = IMAGENET.Imagenet(name='inception')

# Feature Extractor (Has output (NR_FRAME x D) where D is feature dimension)
featuer_ex = IMAGENET.Video_Feature_Extractor(inception)

# MLP Clasifier
model = IMAGENET.AVG_Video_Classifier(features=featuer_ex1, class_nr=6, optimizer=RMSprop(lr=0.0001))

Model: "sequential_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
time_distributed_4 (TimeDist (None, None, 1280)        2257984   
_________________________________________________________________
global_average_pooling1d_4 ( (None, 1280)              0         
_________________________________________________________________
dense_12 (Dense)             (None, 128)               163968    
_________________________________________________________________
dense_14 (Dense)             (None, 6)                 774       
Total params: 2,422,726
Trainable params: 164,742
Non-trainable params: 2,257,984
_________________________________________________________________
(5, 6)


In [None]:
model.fit(train_batches,validation_data=valid_batches, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5

In [9]:
model.evaluate(valid_batches)



[1.8728103637695312, 0.1666666716337204]