#Setting environment (necessary after restarting runtime)

Ho dovuto installare la versione 2.0 perché la versione 2.1 ha problemi con la gpu

In [1]:
#@title Selecting tensorflow version 2
#%tensorflow_version 2.x
!pip install -q tensorflow==2.0
import tensorflow as tf
tf.__version__

'2.0.0'

In [2]:
#@title Mounting Drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
#@title Importing things and defining functions
import sys
from tensorflow import keras
import numpy as np
import cv2 as cv
import tensorflow_datasets as tfds
import datetime
import time
import tensorflow as tf
import os

### Functions

In [0]:
#@title To display videos
import imageio
from IPython import display

def animate(images):
  converted_images = np.clip(images, 0, 255).astype(np.uint8)
  imageio.mimsave('./animation.gif', converted_images, fps=25)
  with open('./animation.gif','rb') as f:
        display.display(display.Image(data=f.read(), height=300))

In [0]:
#@title Pipeline functions
# function graph-enabled
def sampling(video, number_of_frames=20):
    result = tf.Variable(np.zeros((number_of_frames, 256, 256, 3), dtype=np.float16))
    for i in range(number_of_frames):
        result[i].assign(video[i])
    print(result)
    return result

# function that creates packets of frames
def pack_resize_frames(video):
    framesPerPacket = 5
    samplingFactor = 3
    numPackets = int(video.shape[0]//(framesPerPacket * samplingFactor))
    result = [] * numPackets
    for j in range(numPackets):
        offset = tf.random.uniform([framesPerPacket], 0, samplingFactor, tf.dtypes.int32)
        temp = [] * framesPerPacket
        for i in range(framesPerPacket):
             temp.append(video[(j * framesPerPacket + i) * samplingFactor + offset[i]])
        result.append(tf.image.resize(tf.convert_to_tensor(temp), size=(64,64)))
    return tf.convert_to_tensor(result)

# function that uses standard python libraries (deprecated)
def py_sampling(video, seed_in, number_of_frames=20):
    size = video.get_shape()[0]
    indexes = tf.random.stateless_truncated_normal(shape=[number_of_frames],
                                                   seed=[seed_in, seed_in],
                                                   mean=size/2,
                                                   stddev=size/4)
    indexes = tf.cast(tf.sort(indexes, axis=0, direction='ASCENDING'), tf.uint16)
    result = tf.convert_to_tensor(video.numpy()[indexes.numpy()])
    return result

# casting and normalizing
def custom_cast(feature):
    video = tf.cast(feature['video'], tf.float16) / 255.
    return {'label':feature['label'], 'video':video}

# Custom layers

In [0]:
#@title Custom3DConv
class CustomConv3D(keras.layers.Layer):

    def __init__(self, filters, kernel_size, **kwargs):
        super(CustomConv3D, self).__init__()
        self.spatial = keras.layers.Conv3D(filters=filters,
                                            kernel_size=(kernel_size, kernel_size, 1),
                                            strides=1,
                                            padding='same',
                                            **kwargs)
        self.temporal = keras.layers.Conv3D(filters=filters,
                                            kernel_size=(1, 1, kernel_size),
                                            strides=1,
                                            padding='same',
                                            **kwargs)

    def __call__(self, inputs):
        x = self.spatial(inputs)
        outputs = self.temporal(x)
        return outputs

In [0]:
#@title InceptionBlock
class InceptionBlock(keras.layers.Layer):

    def __init__(self, filters, kernel_size, **kwargs):
        super(InceptionBlock, self).__init__()
        self.short = keras.layers.Conv3D(filters=filters,
                                         kernel_size=1,
                                         strides=1,
                                         **kwargs)
        self.medium = CustomConv3D(filters=filters,
                                   kernel_size=kernel_size,
                                   **kwargs)
        self.tall = [CustomConv3D(filters=filters,
                                   kernel_size=kernel_size,
                                   **kwargs),
                     CustomConv3D(filters=filters,
                                   kernel_size=kernel_size,
                                   **kwargs)]
        self.concat = keras.layers.Concatenate(axis=-1)
        self.pool = keras.layers.MaxPool3D(pool_size=(1,1,3),
                                           padding='same',
                                           data_format='channels_first')
        
    def __call__(self, inputs):
        x_1 = self.short(inputs)
        x_2 = self.medium(inputs)
        x_3 = inputs
        for layer in self.tall:
            x_3 = layer(x_3)
        x = self.concat([x_1, x_2, x_3])
        outputs = self.pool(x)
        return outputs

In [0]:
#@title ResnetLayer
class ResnetLayer(keras.layers.Layer):

    def __init__(self, filters, kernel_size, **kwargs):
        super(ResnetLayer, self).__init__()
        self.block = InceptionBlock(filters=filters,
                                        kernel_size=kernel_size,
                                        trainable=True,
                                        **kwargs)
        self.batch = keras.layers.BatchNormalization(axis=-1)

    def __call__(self, inputs):
        x = self.block(inputs)
        x = self.batch(x)
        return inputs + x

In [0]:
#@title ResnetBlock
class ResnetBlock(keras.layers.Layer):

    def __init__(self, blocks, filters, kernel_size, **kwargs):
        super(ResnetBlock, self).__init__()
        self.blocks = []
        for i in range(blocks): 
            self.blocks.append(ResnetLayer(filters=filters,
                                           kernel_size=kernel_size,
                                           **kwargs))
            
    def __call__(self, inputs):
        x = inputs
        for layer in self.blocks:
            x = layer(x)
        return x

# Loading Dataset and writing to disk

## Useful functions

In [0]:
#@title Define functions for value conversion
def _float_feature(value):
  """Returns a float_list from a float / double."""
  return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))

def _bytes_feature(value):
  """Returns a bytes_list from a string / byte."""
  if isinstance(value, type(tf.constant(0))):
    value = value.numpy() # BytesList won't unpack a string from an EagerTensor.
  return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def _int64_feature(value):
  """Returns an int64_list from a bool / enum / int / uint."""
  return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

In [0]:
#@title Serializing functions
def serialize_example(label, video):
    feature = {
        'label' : _int64_feature(label),
        'video' : _bytes_feature(tf.io.serialize_tensor(video))
    }

    example_proto = tf.train.Example(features=tf.train.Features(feature=feature))
    return example_proto.SerializeToString()

def tf_serialize_example(example):
    tf_string = tf.py_function(
        serialize_example,
        [example['label'], example['video']],
        tf.string)
    return tf.reshape(tf_string, ())

In [0]:
#@title Writing function
def write_dataset(dataset, curr, prec):
    def filter_fn(x):
        frames = tf.shape(x['video'])[0]
        return frames <= curr and frames > prec

    dataset_temp = dataset.filter(filter_fn)
    directory = "/content/drive/My Drive/splitted_dataset/random_2/pck_{}_{}".format(prec+1, curr)
    filename = os.path.join(directory, "tfrecord")
    try:
        os.mkdir(directory)
    except:
        print("--- Exception: directory already exists")
    serialized_features_dataset = dataset_temp.map(tf_serialize_example)
    writer = tf.data.experimental.TFRecordWriter(filename)
    writer.write(serialized_features_dataset)

## Data reading, pipelining and writing

In [0]:
#@title Get the initial dataset and cache it
dataset, info = tfds.load('ucf101',
                            with_info=True,
                            data_dir="/content/drive/My Drive/",
                            download=False,
                            split='train')

dataset = dataset.map(lambda x: {'video':tf.py_function(func=pack_resize_frames,
                                                                        inp=[x['video']],
                                                                        Tout=tf.float32),
                                                'label':x['label']},
                                    num_parallel_calls=4)

def filter_whole(x):
        frames = tf.shape(x['video'])[0]
        return frames > 16

#dataset = dataset.filter(filter_whole)

dataset = dataset.cache(filename="/content/drive/My Drive/temp/whole")

In [9]:
#@title Get the number of examples having a certain dimension (already computed and written to a file)
count = open("drive/My Drive/utils/count.txt", mode='r')
indexes = list(map(lambda x: [int(x.split("\t")[0]), int(x.split("\t")[1])], count.readlines()))
print(indexes)

[[1, 1], [2, 4], [3, 100], [4, 382], [5, 622], [6, 807], [7, 703], [8, 741], [9, 593], [10, 622], [11, 635], [12, 465], [13, 551], [14, 339], [15, 291], [16, 811], [17, 357], [18, 187], [19, 170], [20, 385], [21, 99], [22, 109], [23, 73], [24, 64], [25, 42], [26, 63], [27, 57], [28, 44], [29, 40], [30, 27], [31, 21], [32, 34], [33, 23], [34, 11], [35, 13], [36, 4], [37, 4], [38, 6], [39, 13], [40, 4], [41, 5], [42, 4], [50, 2], [51, 1], [53, 1], [54, 2], [55, 1], [56, 3], [118, 1]]


In [0]:
#@title Equally divide the training according to its dimension
count = 0
prec = 0
for i in range(len(indexes)):
    count += indexes[i][1]
    if count >= 1000 or i == len(indexes)-1:
        print("Writing dataset having number of packets higher than {} and lower or equal to {} ...".format(prec, indexes[i][0]))
        write_dataset(dataset, indexes[i][0], prec)
        print("Done!")
        prec = indexes[i][0]
        count = 0

Writing dataset having number of packets higher than 0 and lower or equal to 5 ...
--- Exception: directory already exists


# Custom models

### Model built with Functional API

Model built using blocks of resnet layers

In [0]:
#@title Conv3D and LSTM
class MyModel(keras.Model):

    def __init__(self):
        super(MyModel, self).__init__()
        self.convLayers = []

        self.convLayers.append(keras.layers.Conv3D(filters=8,
                                            kernel_size=(1,3,3),
                                            strides=(1,2,2),
                                            padding='same',
                                            activation='relu'))

        self.convLayers.append(keras.layers.MaxPool3D(pool_size=(1,2,2)))

        self.convLayers.append(ResnetBlock(blocks=2,
                                    filters=8,
                                    kernel_size=3,
                                    activation='relu'))
        self.convLayers.append(keras.layers.BatchNormalization(axis=-1))

        self.convLayers.append(keras.layers.Conv3D(filters=24,
                                            kernel_size=(1,3,3),
                                            strides=2,
                                            padding='same',
                                            activation='relu'))

        self.convLayers.append(ResnetBlock(blocks=5,
                                    filters=24,
                                    kernel_size=3,
                                    activation='relu'))
        self.convLayers.append(keras.layers.BatchNormalization(axis=-1))

        self.convLayers.append(keras.layers.Conv3D(filters=128,
                                            kernel_size=(1,3,3),
                                            strides=(1,2,2),
                                            padding='same',
                                            activation='relu'))
        
        self.convLayers.append(keras.layers.MaxPool3D(pool_size=(2,2,2)))

        #self.convLayers.append(keras.layers.MaxPool3D(pool_size=(1,2,2)))
        
        self.lstmLayer = keras.layers.LSTM(units=101,
                                           return_sequences=False,
                                           activation='softmax',
                                           dtype=tf.float32)
        '''
        self.denseLayer = keras.layers.Dense(101,
                                             activation='softmax',
                                             dtype=tf.float32)'''

    def call(self, inputs):
        convResult = []
        for packet in range(self.numPackets):
            x = inputs[:, packet]
            for layer in self.convLayers:
                x = layer(x)
            convResult.append(x)
        x = tf.stack(convResult, axis=1)
        print("Output of the conv3D layers {}".format(x.shape))
        new_shape = x.shape[2]*x.shape[3]*x.shape[4]*x.shape[5]
        x = keras.layers.Reshape(target_shape=(self.numPackets, new_shape))(x)
        print("Input of the lstm layer {}".format(x.shape))
        x = self.lstmLayer(x)
        #x = self.denseLayer(x)
        return x

    def setPackets(self, pck):
        self.numPackets = pck


# Training

In [0]:
#@title Setting half precision for GPU
tf.keras.backend.set_floatx('float16')
loss_scale = 'dynamic'
policy = tf.keras.mixed_precision.experimental.Policy(
    "mixed_float16", loss_scale=loss_scale)
tf.keras.mixed_precision.experimental.set_policy(policy)

In [0]:
dataset.element_spec

{'label': TensorSpec(shape=(), dtype=tf.int64, name=None),
 'video': TensorSpec(shape=<unknown>, dtype=tf.float32, name=None)}

In [0]:
for i in range(4, len(indexes), 4): 
    def filter_fn(x):
        frames = tf.shape(x['video'])[0]
        return frames <= i and frames > i-4
    
    dataset_temp = dataset.filter(filter_fn)
    print(dataset_temp.element_spec)
    directory = "/content/drive/My Drive/splitted_dataset/pck={}".format(i)
    filename = directory + "/tfrecord"
    try:
        os.mkdir(directory)
    except:
        print("Directory already exists")
    serialized_features_dataset = dataset_temp.map(tf_serialize_example)

    writer = tf.data.experimental.TFRecordWriter(filename)
    writer.write(serialized_features_dataset)

{'video': TensorSpec(shape=<unknown>, dtype=tf.float32, name=None), 'label': TensorSpec(shape=(), dtype=tf.int64, name=None)}
Directory already exists
{'video': TensorSpec(shape=<unknown>, dtype=tf.float32, name=None), 'label': TensorSpec(shape=(), dtype=tf.int64, name=None)}
{'video': TensorSpec(shape=<unknown>, dtype=tf.float32, name=None), 'label': TensorSpec(shape=(), dtype=tf.int64, name=None)}
{'video': TensorSpec(shape=<unknown>, dtype=tf.float32, name=None), 'label': TensorSpec(shape=(), dtype=tf.int64, name=None)}
{'video': TensorSpec(shape=<unknown>, dtype=tf.float32, name=None), 'label': TensorSpec(shape=(), dtype=tf.int64, name=None)}
{'video': TensorSpec(shape=<unknown>, dtype=tf.float32, name=None), 'label': TensorSpec(shape=(), dtype=tf.int64, name=None)}
{'video': TensorSpec(shape=<unknown>, dtype=tf.float32, name=None), 'label': TensorSpec(shape=(), dtype=tf.int64, name=None)}


In [0]:
#@title Loading tensorboard
board = "drive/My Drive/tensorboard/tensorboard_logs/conv_lstm"
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=board,
                                                      histogram_freq=1,
                                                      update_freq='batch',
                                                      profile_batch=3)

In [0]:
#@title Training
# if uncommented uses xla (don't change much)
# tf.config.optimizer.set_jit(False)
for numPackets in indexes:
    print("\nTraining with samples having {} packets of frames".format(numPackets))
    model = MyModel()
    model.compile(optimizer='SGD',
                loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])
    if numPackets != 1:
        model.load_weights("/content/drive/My Drive/weights/final_model/weights")
    dataset_temp = get_dataset(numPackets)
    model.setPackets(numPackets)
    for features in dataset_temp:
        inputs, targets = features['video'], features['label']
        model.fit(x=inputs,
                y=targets,
                verbose=2,
                callbacks=[tensorboard_callback])
    model.save_weights("/content/drive/My Drive/weights/final_model/weights")


Training with samples having 1 packets of frames
