In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
!ls
import os
os.chdir("drive/My Drive/Diplomski rad/slow-fast/")
!ls

drive  sample_data
 chpt				  rs18-24frames
 chpt2				  rs18-v2arc
'Copy of ngrok'			  rs50
 fuse.py			  rs50-but-mob
 logs				  saved.hd5
 models.py			  slowfastv2
 ngrok				  slow-only
 ngrok-stable-linux-amd64.zip	  slow-only.ipynb
 ngrok-stable-linux-amd64.zip.1   stems.py
 __pycache__			  test.py
 resnet.py			  train.ipynb
 rs18				  Untitled.ipynb


In [0]:
import tensorflow
frames = 32

In [8]:

from tensorflow.keras import Model, Sequential

from tensorflow.keras.layers import Dense, GlobalAveragePooling3D, Input, Lambda, Concatenate, Conv3D, Dropout
from tensorflow.keras.optimizers import Adam
import numpy as np

from stems import InitialStem, FinalStem
from fuse import FuseFastConv
from resnet import ResStage, Bottleneck, res_stage, Basic
tensorflow.keras.backend.set_image_data_format('channels_last')

backbone = 18
data_dim = 224
dropout = 0.25
_RESNET_MODEL_DEPTH = {18:(2,1,1,1), 50: (3, 4, 6, 3), 101: (3, 4, 23, 3)}

_RESNET_STRIDES = [1, 2, 2, 2]

_RESNET_DILATIONS = [1, 1, 1, 1]

_RESNET_T_FUNC = Bottleneck

_CLASSES = 101

_TEMPORAL_KERNEL_SIZES = {"slowfast": [
        [[1], [5]],  # conv1 temporal kernel for slow and fast pathway.
        [[1], [3]],  # res2 temporal kernel for slow and fast pathway.
        [[1], [3]],  # res3 temporal kernel for slow and fast pathway.
        [[3], [3]],  # res4 temporal kernel for slow and fast pathway.
        [[3], [3]],  # res5 temporal kernel for slow and fast pathway.
    ],}

_RESNET_KERNEL_LIMIT = [2, 1, 1, 1]

_POOLS = {
    "slowfast": [[1, 1, 1], [1, 1, 1]],
}



alpha = 8

beta_inv = 8
fusion_channel_ratio = 2
print("FCR",fusion_channel_ratio)
fusion_kernel_size= 5
pool_size = _POOLS["slowfast"]

def prepare_input(alpha):
    def func(x):
        b = tensorflow.identity(x[:,::alpha])
        return b
    return Lambda(func)

data_channels = [3,3]
inp_layer = Input((frames,224,224,3))
p_i = prepare_input(alpha)(inp_layer)

(b1,b2,b3,b4) = _RESNET_MODEL_DEPTH[backbone]
        
        # we will just use config for RESNET, see ResNE(x)T
width_per_group = 64
number_of_groups = 1

dim_inner = width_per_group * number_of_groups

out_ratio = (beta_inv//fusion_channel_ratio)

temp_kernel = _TEMPORAL_KERNEL_SIZES["slowfast"]


d_out = [width_per_group, width_per_group // beta_inv]
k = [temp_kernel[0][0] + [7, 7], temp_kernel[0][1] + [7, 7]]
s = [(1, 2, 2), (1, 2, 2)]
pad = [
[temp_kernel[0][0][0] // 2, 3, 3],
[temp_kernel[0][1][0] // 2, 3, 3]
]
"""
p1_s = tensorflow.keras.layers.ZeroPadding3D(padding=pad[0])(p_i)
p1_f = tensorflow.keras.layers.ZeroPadding3D(padding=pad[1])(inp_layer)
conv_1_s = Conv3D(64, kernel_size=[1,7,7], strides=[1,2,2], padding='valid', dilation_rate=1, activation=None, use_bias=False)(p1_s)
conv_1_f = Conv3D(8, kernel_size=[5,7,7], strides=[1,2,2], padding='valid', dilation_rate=1, activation=None, use_bias=False)(p1_f)
bn1_s = tensorflow.keras.layers.BatchNormalization(axis=-1, epsilon=1e-5, momentum=0.1)(conv_1_s)
r1_s = tensorflow.keras.layers.ReLU()(bn1_s)
bn1_f = tensorflow.keras.layers.BatchNormalization(axis=-1, epsilon=1e-5, momentum=0.1)(conv_1_f)
r1_f = tensorflow.keras.layers.ReLU()(bn1_f)

pool1_slow = tensorflow.keras.layers.MaxPooling3D(pool_size=[1,3,3],strides=[1,2,2])(r1_s)
pool1_fast = tensorflow.keras.layers.MaxPooling3D(pool_size=[1,3,3],strides=[1,2,2])(r1_f)
"""
i_s1 = InitialStem(data_channels[0], d_out[0], k[0], s[0], pad[0] )(p_i)
i_s2 = InitialStem(data_channels[1], d_out[1], k[1], s[1], pad[1] )(inp_layer)
#r1 = SlowFastModel(50, alpha, 8, 2, 5, 32, 0.1, 224)([k,inp_layer])
# i_s2 is fast!
fused_slow_1 = FuseFastConv(width_per_group//beta_inv,fusion_channel_ratio,fusion_kernel_size, alpha)([i_s1,i_s2])

stem2_slow = res_stage(dim_in=width_per_group+width_per_group//out_ratio,
            dim_out=width_per_group*4,
            dim_inner=dim_inner,
            temp_kernel_size=temp_kernel[1][0],
            stride=_RESNET_STRIDES[0],
            number_of_blocks=b1,
            num_groups=number_of_groups,
            num_block_temp_kernel=_RESNET_KERNEL_LIMIT[0],
            t_func=_RESNET_T_FUNC,
            dilation=_RESNET_DILATIONS[0],
)(fused_slow_1)
stem2_fast = res_stage(
    dim_in = width_per_group//beta_inv,
    dim_out = width_per_group*4//beta_inv,
    dim_inner = dim_inner//beta_inv,
    temp_kernel_size=temp_kernel[1][1],
    stride=_RESNET_STRIDES[0],
    number_of_blocks=b1,
    num_groups=number_of_groups,
    num_block_temp_kernel=_RESNET_KERNEL_LIMIT[0],
    t_func=_RESNET_T_FUNC,
    dilation=_RESNET_DILATIONS[0]
)(i_s2)

fused_slow_2 = FuseFastConv(width_per_group*4//beta_inv,fusion_channel_ratio,fusion_kernel_size, alpha)([stem2_slow,stem2_fast])

stem3_slow = res_stage(dim_in=width_per_group*4+width_per_group*4//out_ratio,
            dim_out=width_per_group*8,
            dim_inner=dim_inner*2,
            temp_kernel_size=temp_kernel[2][0],
            stride=_RESNET_STRIDES[1],
            number_of_blocks=b2,
            num_groups=number_of_groups,
            num_block_temp_kernel=_RESNET_KERNEL_LIMIT[1],
            t_func=_RESNET_T_FUNC,
            dilation=_RESNET_DILATIONS[1],
)(fused_slow_2)
stem3_fast = res_stage(
    dim_in = width_per_group*4//beta_inv,
    dim_out = width_per_group*8//beta_inv,
    dim_inner = dim_inner*2//beta_inv,
    temp_kernel_size=temp_kernel[2][1],
    stride=_RESNET_STRIDES[1],
    number_of_blocks=b2,
    num_groups=number_of_groups,
    num_block_temp_kernel=_RESNET_KERNEL_LIMIT[1],
    t_func=_RESNET_T_FUNC,
    dilation=_RESNET_DILATIONS[1]
)(stem2_fast)

fused_slow_3 = FuseFastConv(width_per_group*8//beta_inv,fusion_channel_ratio,fusion_kernel_size, alpha)([stem3_slow,stem3_fast])

stem4_slow = res_stage(dim_in=width_per_group*8+width_per_group*8//out_ratio,
            dim_out=width_per_group*16,
            dim_inner=dim_inner*4,
            temp_kernel_size=temp_kernel[3][0],
            stride=_RESNET_STRIDES[2],
            number_of_blocks=b3,
            num_groups=number_of_groups,
            num_block_temp_kernel=_RESNET_KERNEL_LIMIT[2],
            t_func=_RESNET_T_FUNC,
            dilation=_RESNET_DILATIONS[2],
)(fused_slow_3)
stem4_fast = res_stage(
    dim_in = width_per_group*8//beta_inv,
    dim_out = width_per_group*16//beta_inv,
    dim_inner = dim_inner*4//beta_inv,
    temp_kernel_size=temp_kernel[3][1],
    stride=_RESNET_STRIDES[2],
    number_of_blocks=b3,
    num_groups=number_of_groups,
    num_block_temp_kernel=_RESNET_KERNEL_LIMIT[2],
    t_func=_RESNET_T_FUNC,
    dilation=_RESNET_DILATIONS[2]
)(stem3_fast)

fused_slow_4 = FuseFastConv(width_per_group*16//beta_inv,fusion_channel_ratio,fusion_kernel_size, alpha)([stem4_slow,stem4_fast])

stem5_slow = res_stage(dim_in=width_per_group*16+width_per_group*16//out_ratio,
            dim_out=width_per_group*32,
            dim_inner=dim_inner*8,
            temp_kernel_size=temp_kernel[4][0],
            stride=_RESNET_STRIDES[3],
            number_of_blocks=b4,
            num_groups=number_of_groups,
            num_block_temp_kernel=_RESNET_KERNEL_LIMIT[3],
            t_func=_RESNET_T_FUNC,
            dilation=_RESNET_DILATIONS[3],
)(fused_slow_4)
stem5_fast = res_stage(
    dim_in = width_per_group*16//beta_inv,
    dim_out = width_per_group*32//beta_inv,
    dim_inner = dim_inner*8//beta_inv,
    temp_kernel_size=temp_kernel[4][1],
    stride=_RESNET_STRIDES[3],
    number_of_blocks=b4,
    num_groups=number_of_groups,
    num_block_temp_kernel=_RESNET_KERNEL_LIMIT[3],
    t_func=_RESNET_T_FUNC,
    dilation=_RESNET_DILATIONS[3]
)(stem4_fast)

glob_av_s = GlobalAveragePooling3D()(stem5_slow)
glob_av_f = GlobalAveragePooling3D()(stem5_fast)

conc = Concatenate(axis=-1)([glob_av_s,glob_av_f])
do = Dropout(dropout)(conc)
fc = Dense(_CLASSES, use_bias=True, activation="softmax")(do)
"""stem_final = FinalStem(dim_in=[width_per_group*32, width_per_group*32//beta_inv],
        classes=_CLASSES,
        pool=[[frames//alpha//pool_size[0][0], data_dim//32//pool_size[0][1], data_dim//32//pool_size[0][2]],
        [frames//pool_size[1][0], data_dim//32//pool_size[1][1], data_dim//32//pool_size[1][2]]],
        dropout=dropout,
        pathways=2)([stem5_slow,stem5_fast])
"""
model = Model(inp_layer, fc)


model.summary(line_length=150)

data = np.array([np.random.rand(frames,data_dim,data_dim,3),np.random.rand(frames,data_dim,data_dim,3)])
print(model.predict(data).shape)

FCR 2
di 8
fk  5
do for fuse  16
TEMPORAL KERNEL SIZE [1, 1]
resnet_block_40 dim_in:80 dim_out:256
dimout-->256
BOTTLENECK []
RESBLOCK []
resnet_block_41 dim_in:256 dim_out:256
BOTTLENECK []
RESBLOCK []
TEMPORAL KERNEL SIZE [3, 3]
resnet_block_42 dim_in:8 dim_out:32
dimout-->32
BOTTLENECK []
RESBLOCK []
resnet_block_43 dim_in:32 dim_out:32
BOTTLENECK []
RESBLOCK []
di 32
fk  5
do for fuse  64
TEMPORAL KERNEL SIZE [1]
resnet_block_44 dim_in:320 dim_out:512
dimout-->512
BOTTLENECK []
RESBLOCK []
TEMPORAL KERNEL SIZE [3]
resnet_block_45 dim_in:32 dim_out:64
dimout-->64
BOTTLENECK []
RESBLOCK []
di 64
fk  5
do for fuse  128
TEMPORAL KERNEL SIZE [3]
resnet_block_46 dim_in:640 dim_out:1024
dimout-->1024
BOTTLENECK []
RESBLOCK []
TEMPORAL KERNEL SIZE [3]
resnet_block_47 dim_in:64 dim_out:128
dimout-->128
BOTTLENECK []
RESBLOCK []
di 128
fk  5
do for fuse  256
TEMPORAL KERNEL SIZE [3]
resnet_block_48 dim_in:1280 dim_out:2048
dimout-->2048
BOTTLENECK []
RESBLOCK []
TEMPORAL KERNEL SIZE [3]
resn

In [0]:
os.chdir("../data/")

In [0]:
#pip install tensorflow.keras-video-generators

In [0]:
from new_generator import FrameGenerator
import glob
import os
os.chdir("./Videos/")
classes = [i.split(os.path.sep)[1] for i in glob.glob('train/*')]
classes.sort()
print(len(classes))
SIZE = (224, 224)
CHANNELS = 3
NBFRAME = frames
BS = 4
EPOCHS = 20


glob_pattern='train/{classname}/*.avi'

data_aug = tensorflow.keras.preprocessing.image.ImageDataGenerator(
    zoom_range=.2,
    horizontal_flip=True,
    rotation_range=6,
    width_shift_range=.15,
    height_shift_range=.15)

train = FrameGenerator(
    BS,
    NBFRAME,
    classes, 
    SIZE,
    .25,
    CHANNELS,
    glob_pattern,
    data_aug,
    typ="train"
)


valid = train.get_valid()

train.write_files('trainlist2')
valid.write_files('validlist2')

Using TensorFlow backend.


101
Loaded 76 train 25 valid for class ApplyEyeMakeup .
Loaded 62 train 20 valid for class ApplyLipstick .
Loaded 78 train 26 valid for class Archery .
Loaded 73 train 24 valid for class BabyCrawling .
Loaded 58 train 19 valid for class BalanceBeam .
Loaded 84 train 28 valid for class BandMarching .
Loaded 81 train 26 valid for class BaseballPitch .
Loaded 75 train 24 valid for class Basketball .
Loaded 71 train 23 valid for class BasketballDunk .
Loaded 84 train 28 valid for class BenchPress .
Loaded 72 train 24 valid for class Biking .
Loaded 83 train 27 valid for class Billiards .
Loaded 70 train 23 valid for class BlowDryHair .
Loaded 57 train 19 valid for class BlowingCandles .
Loaded 62 train 20 valid for class BodyWeightSquats .
Loaded 84 train 28 valid for class Bowling .
Loaded 86 train 28 valid for class BoxingPunchingBag .
Loaded 73 train 24 valid for class BoxingSpeedBag .
Loaded 55 train 18 valid for class BreastStroke .
Loaded 72 train 23 valid for class BrushingTeeth .
L

In [0]:
from new_generator import FrameGenerator
import glob
import os
import pickle

os.chdir("./Videos/")
classes = [i.split(os.path.sep)[1] for i in glob.glob('train/*')]
classes.sort()
print(len(classes))
SIZE = (224, 224)
CHANNELS = 3
NBFRAME = frames
BS = 8
EPOCHS = 20


glob_pattern='train/{classname}/*.avi'

data_aug = tensorflow.keras.preprocessing.image.ImageDataGenerator(
    zoom_range=.2,
    horizontal_flip=True,
    rotation_range=6,
    width_shift_range=.15,
    height_shift_range=.15)

t_list=None
v_list=None

with open("trainlist2","rb") as fp:
    t_list=pickle.load(fp)

with open("validlist2","rb") as fp:
    v_list=pickle.load(fp)

train = FrameGenerator(
    BS,
    NBFRAME,
    classes, 
    SIZE,
    None,
    CHANNELS,
    glob_pattern,
    data_aug,
    typ="train",
    prepared_data=t_list
)

valid = FrameGenerator(
    BS,
    NBFRAME,
    classes, 
    SIZE,
    None,
    CHANNELS,
    glob_pattern,
    None,
    typ="valid",
    prepared_data=v_list
)

Using TensorFlow backend.


101
Loaded 7185 files for train generator.
Loaded 2352 files for valid generator.


In [0]:
import tensorflow
from tensorflow.keras.models import load_model

model = load_model("../../slow-fast/rs18-v2arc/")

In [0]:
from tensorflow.keras.callbacks import ModelCheckpoint

checkpoint = ModelCheckpoint("../../slow-fast/rs18-v2arc/", monitor='val_loss', verbose=1, save_best_only=True, mode='auto', save_freq="epoch")

#opt = Adam(0.001)#0.0001
#model.compile(loss=tensorflow.keras.losses.categorical_crossentropy, optimizer=opt, metrics=["accuracy"])
print(len(valid)//valid.batch_size)
model.fit(
    train,
    steps_per_epoch=len(train),
    validation_data=valid,
    validation_steps=len(valid),
    verbose=1,
    epochs=EPOCHS,
    callbacks=[checkpoint],
)

36
Epoch 1/20
191/898 [=====>........................] - ETA: 3:35:04 - loss: 0.8206 - accuracy: 0.7585Invalid frame count for video train/FrontCrawl/v_FrontCrawl_g23_c06.avi . Requested: 32 Obtained: 0
198/898 [=====>........................] - ETA: 3:34:25 - loss: 0.8175 - accuracy: 0.7601Invalid frame count for video train/FrisbeeCatch/v_FrisbeeCatch_g23_c03.avi . Requested: 32 Obtained: 0
No other option for train/PushUps/v_PushUps_g16_c04.avi
Could not get frames for train/PushUps/v_PushUps_g16_c04.avi

Epoch 00001: val_loss improved from inf to 1.17819, saving model to ../../slow-fast/rs18-v2arc/
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
INFO:tensorflow:Assets written to: ../../slow-fast/rs18-v2arc/assets
Epoch 2/20
No other option for train/PushUps/v_PushUps_g16_c04.avi
Could not get frames for train/PushUps/v_PushUps_g16_c04.avi

Epoch 00002: val_loss improved from 1.17819 to 1.08678, saving model to ../../slow-fast/rs18-v2arc/
INFO:tensor