<a href="https://colab.research.google.com/github/gibranfp/CursoAprendizajeProfundo/blob/master/notebooks/3a_rnn_class_mean_step" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>


# Reconocimiento de acciones con CNN + RNN

#### <div style="text-align: right"> Berenice Montalvo Lezama </div>

En este ejemplo veremos como realizar reconocimiento de acciones sobre [UCF11](https://www.crcv.ucf.edu/data/UCF_YouTube_Action.php) un conjunto de referencia en analisis de video recolectado por el [Center for Research in Computer Vision](https://www.crcv.ucf.edu/) de la Universidad de Central del Florida. UCF11 es un cunjunto multiclase con 1600 videos en 11 diferentes de acciones humanas. 

La arquitectura que se presenta en este ejemplo esta compuesta por una red neuronal convolucional y una recurrente. Para otorgar la clasificación de un video se toman las salidas de los pasos de la red recurrente y se promedian.


![UCF11](../figs/UCF11.jpg)


## 1 Carga de datos

### 1.1 Importando bibliotecas

In [1]:
try:
  %tensorflow_version 2.x
except Exception:
  pass

import os
from datetime import datetime

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import Dense, GlobalAveragePooling1D, GRU, LSTM
from tensorflow.keras import Model

tf.random.set_seed(2019)

### 1.2 Descarga de los datos

In [2]:
tf.keras.utils.get_file('ucf11_frames_reps_rn50v2_pca_std.tfrecords',
    'https://cloud.xibalba.com.mx/s/PFZWERd9MdHoLpz/download',
    cache_subdir='datasets/ucf11')
!ls ~/.keras/datasets/ucf11

ucf11_frames_reps_rn50v2_pca_std.tfrecords


### 1.3 Tuberia de datos

In [3]:
# se crea un diccionario para la descripción de los ejemplos
_feat_desc = {
  'x': tf.io.VarLenFeature(tf.float32),
  'y': tf.io.FixedLenFeature([], tf.int64),
  'name': tf.io.FixedLenFeature([], tf.string)
}

def _parse_function(example_proto):
  se = tf.io.parse_single_example(example_proto, _feat_desc)
  x = tf.reshape(tf.sparse.to_dense(se['x']), (50, -1))
  y = se['y'][..., np.newaxis]
  name = se['name']
  return x, y, name

# ruta de los TFRecords con las representaciones espaciales 
ds_path = '~/.keras/datasets/ucf11/ucf11_frames_reps_rn50v2_pca_std.tfrecords'
ds_path = os.path.expanduser(ds_path)
shuffle_size = 1593
batch_size = 64

# se crea un TFRecordDataset para los TFRecords con las representaciones espaciales 
train_ds = tf.data.TFRecordDataset(ds_path)
# se obtienen las representaciones espaciales de los ejemplos
train_ds = train_ds.map(_parse_function)
# se barajea el conjunto
train_ds = train_ds.shuffle(shuffle_size)
# se contruyen los lotes 
train_ds = train_ds.batch(batch_size)

for x, y_true, name in train_ds.take(1):
    print(f'x.shape={x.shape}')
    print('x[0, :5, :5]=')
    print(x[0, :5, :5].numpy().flatten())
    print(f'y_true.shape={y_true.shape}')
    print(f'y_true[0]={y_true[0].numpy()}')
    print(f'name.shape={name.shape}')
    print(f'name[0]={name[0].numpy()}')

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
x.shape=(64, 50, 512)
x[0, :5, :5]=
[-0.36228782 -1.3095968   1.1650468   0.59512883  1.4917995  -0.36076042
 -1.3810619   1.2208297   0.62461144  1.6224496  -0.35992855 -1.377344
  1.2155812   0.618533    1.6204273  -0.3592346  -1.3868256   1.2387475
  0.624294    1.6373067  -0.36634874 -1.3755231   1.238343    0.60989845
  1.6211106 ]
y_true.shape=(64, 1)
y_true[0]=[3]
name.shape=(64,)
name[0]=b'golf_swing/v_golf_03/v_golf_03_04'


## 2 Definición del modelo

![Arquitectura](../figs/rnn_mean.png)

In [4]:
class CRNNMS(tf.keras.Model):

  def __init__(self):
    super(CRNNMS, self).__init__()
    self.rl = GRU(32, return_sequences=True, name='rl')
    self.gap = GlobalAveragePooling1D(name='gap')
    self.fc = Dense(11, activation='softmax', name='fc')

  def call(self, x):
    # (N, 50, 512) =>
    # (N, 50, 32)
    x = self.rl(x)
    # (N, 50, 32) =>
    # (N, 32)
    x = self.gap(x)
    # (N, 32) =>
    # (N, 11)
    x = self.fc(x)
    return x

## 3 Entrenamiento

### 3.1 Función de pérdida y optimizador

In [5]:
# entropía cruzada binaria
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy()
# gradiente descendente
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4)

### 3.2 Métricas: pérdida y exactitud

In [6]:
# historiales a nivel época
loss_epoch = tf.keras.metrics.Mean()
acc_epoch = tf.keras.metrics.SparseCategoricalAccuracy()

# historiales a nivel entrenamiento
loss_history = []
acc_history = []

### 3.3 Ciclo de entrenamiento

In [7]:
model = CRNNMS()

model_name = datetime.now().strftime('%y%m%d-%H%M%S')
train_dir = f'logs/{model_name}/train'
train_writer = tf.summary.create_file_writer(train_dir)
print(f"Trainig model {model_name}")


for epoch in range(50):
    for step, (x, y_true, _) in enumerate(train_ds):

        with tf.GradientTape() as tape:
            
            y_pred = model(x)
            loss = loss_fn(y_true, y_pred)

        gradients = tape.gradient(loss, model.trainable_weights)
        optimizer.apply_gradients(zip(gradients, model.trainable_weights))
        
        loss_epoch(loss)
        acc_epoch(y_true, y_pred)

    loss_val = loss_epoch.result().numpy() * 100
    acc_val = acc_epoch.result().numpy() * 100
    loss_epoch.reset_states()
    acc_epoch.reset_states()
    
    with train_writer.as_default():
        tf.summary.scalar('loss', loss_val, epoch)
        tf.summary.scalar('acc', acc_val, epoch)
    
    print(f'{epoch:3d} loss={loss_val:6.2f}, acc={acc_val:6.2f}')

Trainig model 190923-212342
  0 loss=253.66, acc=  9.28
  1 loss=248.13, acc= 10.47
  2 loss=243.11, acc= 12.23
  3 loss=238.34, acc= 14.23
  4 loss=233.54, acc= 17.05
  5 loss=228.91, acc= 18.68
  6 loss=224.42, acc= 20.69
  7 loss=219.86, acc= 23.45
  8 loss=215.45, acc= 25.77
  9 loss=211.12, acc= 28.59
 10 loss=206.82, acc= 31.41
 11 loss=202.58, acc= 34.55
 12 loss=198.35, acc= 37.18
 13 loss=194.16, acc= 39.87
 14 loss=190.05, acc= 42.32
 15 loss=185.98, acc= 45.27
 16 loss=181.95, acc= 48.15
 17 loss=177.87, acc= 50.22
 18 loss=173.95, acc= 52.66
 19 loss=169.96, acc= 54.98
 20 loss=166.11, acc= 57.24
 21 loss=162.26, acc= 59.87
 22 loss=158.45, acc= 62.32
 23 loss=154.66, acc= 63.95
 24 loss=150.97, acc= 66.39
 25 loss=147.26, acc= 68.84
 26 loss=143.71, acc= 70.47
 27 loss=140.09, acc= 72.04
 28 loss=136.58, acc= 73.29
 29 loss=133.12, acc= 75.11
 30 loss=129.79, acc= 76.43
 31 loss=126.39, acc= 77.62
 32 loss=123.16, acc= 78.50
 33 loss=119.93, acc= 79.50
 34 loss=116.79, acc