# Módulo criado para codificar os vídeos em mp4 para os arquivos .tfrecords, que serão utilizados para alimentar e treinar o algoritmo generativo

In [1]:
import tensorflow as tf
import skvideo.io
import cv2
import os
import numpy as np

In [4]:
def get_frames(fname):
    """ 
    Função que lê um video em um numpy array de shape:
        (número de frames, altura, comprimento, 3)
    """
    return skvideo.io.vread(fname)

In [5]:
def _int64_feature(value):
    """ Função que cria uma Feature do tipo int64 a ser inserida em um Example. """
    if not isinstance(value, list):
        value = [value]
    return tf.train.Feature(int64_list=tf.train.Int64List(value=value))

def _bytes_feature(value):
    """ Função que cria uma Feature do tipo Bytes a ser inserida em um Example. """
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

In [1]:
def get_final_size(dim):
    """ Função que retorna a maior potência de 2 menor que uma """
    final_size = 1024
    while dim < final_size:
        final_size /= 2
    return final_size

def crop(image):
    """ Recorta o centro de uma imagem """
    height = image.shape[0]
    width = image.shape[1]
    smaller_dim = height if height < width else width
    if smaller_dim < 128:
        raise ValueError("Smallest dim of frames should be at least greater than 128px")
    final_size = get_final_size(smaller_dim)
    y = int((height - final_size)/2)
    x = int((width - final_size)/2)
    return image[y: height-y, x:width-x]

def resize(img, final_size=(128, 128)):
    return cv2.resize(img, final_size, interpolation=cv2.INTER_LINEAR)

def preprocess(img):
    return resize(crop(img))

In [7]:
def video_to_tfrecord_example(video_path):
    """ Codifica um mp4 em um Example de tfrecord """
    if not os.path.exists(video_path):
        print("{0} path not found")
        return
    
    frames = [preprocess(frame) for frame in get_frames(video_path)]
    
    encoded_frames = [tf.compat.as_bytes(cv2.imencode(".jpg", frame)[1].tobytes())
                      for frame in frames]
    frames = tf.stack(frames)
    
    features = {}
    features['num_frames']  = _int64_feature(frames.shape[0])
    features['height']      = _int64_feature(frames.shape[1])
    features['width']       = _int64_feature(frames.shape[2])
    
    for i, bytes_frame in enumerate(encoded_frames):
        features['frames/{:04d}'.format(i)] = _bytes_feature(bytes_frame)
        
    tfrecord_example = tf.train.Example(features=tf.train.Features(feature=features))
    
    print("filename: {}, num_frames: {}, height: {}, width: {}".format(
            video_path.split("/")[1], frames.shape[0], frames.shape[1], frames.shape[2]))
    return tfrecord_example

In [8]:
# tfrec_example = video_to_tfrecord_example(teaser_names[0])

filename: Ilumno Uva Vert Azul_05.mp4, num_frames: 320, height: 128, width: 128


In [10]:
def write_video_batch_to_tfrecords(video_paths, output_path):
    """ Acessa n videos e os codificae em arquivos .tfrecord """
    with tf.python_io.TFRecordWriter(output_path) as writer:
        for path in video_paths:
            try:
                tfrec_example = video_to_tfrecord_example(path)
                writer.write(tfrec_example.SerializeToString())
            except Exception as e:
                print("video path: {}, exception: {}".format(path, str(e)))

In [15]:
EXAMPLES_PER_TFRECORD = 20
def get_slice_indexes(ith, num_teasers):
    start_index = ith*EXAMPLES_PER_TFRECORD
    end_index = ith*EXAMPLES_PER_TFRECORD + EXAMPLES_PER_TFRECORD - 1
    if end_index > num_teasers:
        end_index = num_teasers
    return start_index, end_index

def create_tfrecord_fname(i):
    return '_'.join(['teasers', str(i)]) + ".tfrecords"

def teaser_dataset_to_tfrecords():
    """ Codifica o dataset todo para arquivos tfrecords """
    output_dir = '/../../../../newvolume/samples_v4.0'
    if not os.path.exists(output_dir): 
        os.mkdir(output_dir)
        
    teaser_paths = tf.gfile.Glob("teasers/*.mp4")
    num_teasers = len(teaser_paths)
    num_tfrecords = int(np.ceil(num_teasers/EXAMPLES_PER_TFRECORD))
    
    for i in range(num_tfrecords):
        if i == 0:
            batch_paths = teaser_paths[i: EXAMPLES_PER_TFRECORD-1]
        else:
            start, end = get_slice_indexes(i, num_teasers)
            batch_paths = teaser_paths[start: end]
        
        tfrecord_fname = create_tfrecord_fname(i)
        print("writing tfrecord: {}".format(tfrecord_fname))
        output_path = os.path.join(output_dir, tfrecord_fname)
        
        write_video_batch_to_tfrecords(batch_paths, output_path)

In [12]:
# for i in range(1, 4):
#     print(get_slice_indexes(i, 75))
#     print(create_tfrecord_fname(i))

(20, 39)
teasers_1.tfrecords
(40, 59)
teasers_2.tfrecords
(60, 75)
teasers_3.tfrecords


In [16]:
# teaser_dataset_to_tfrecords()

writing tfrecord: teasers_0.tfrecords
filename: Ilumno Uva Vert Azul_05.mp4, num_frames: 320, height: 128, width: 128
filename: v1_180216_Multishow_BBBMelhoresMomentos_BrigaTodoDia.mp4, num_frames: 900, height: 128, width: 128
filename: Visa_30_180919_Visa l Cross Border l Promocao Mundo Sem Fronteiras_1L.mp4, num_frames: 751, height: 128, width: 128
filename: bornlogic_15_180126_ilumnouva horizontal verde_04.mp4, num_frames: 320, height: 128, width: 128
filename: cidadania ja _30_180927_Video Institucional_Home _2C.mp4, num_frames: 900, height: 128, width: 128
filename: carro aluguel_30_181001_e possivel alugar no cpf_1b.mp4, num_frames: 900, height: 128, width: 128
filename: Ilumno Uva Hori Verde_05.mp4, num_frames: 320, height: 128, width: 128
filename: alfa men_30_180517_seu desempenho caiu.mp4, num_frames: 661, height: 128, width: 128
filename: alfamen_30_180817_dialogo homem x mulher de_2.mp4, num_frames: 719, height: 128, width: 128
filename: alfamen_30_180416_pode nao ter mais 

filename: facebook_15_180612_araujo_1.mp4, num_frames: 330, height: 128, width: 128
filename: alfamen_30_180712_video quanto tempo deve durar uma relacao.mp4, num_frames: 720, height: 128, width: 128
filename: conquista_15_180705_conquista j amaralina - horizontal_1b.mp4, num_frames: 330, height: 128, width: 128
filename: biz capital_30_180420_youtube_06.mp4, num_frames: 450, height: 128, width: 128
filename: cidadania ja _30_181009_Video resultado do formulario_2b.mp4, num_frames: 900, height: 128, width: 128
filename: Visa_30_180918_Visa l Cross Border l Promocao Mundo Sem Fronteiras_1c.mp4, num_frames: 800, height: 128, width: 128
filename: beer_30_180801_Video Teste  - Clube de Assinaturas_1b.mp4, num_frames: 901, height: 128, width: 128
filename: cidadania ja _30_181016_video de inauguracao da loja niteroi_1b.mp4, num_frames: 360, height: 128, width: 128
filename: gyramais_30_180719_Como Funciona_1g.mp4, num_frames: 900, height: 128, width: 128
filename: alfamen_30_180615_como est

filename: alfa men_30_181019_gostaria de aumentar seus momentos de prazer.mp4, num_frames: 900, height: 128, width: 128
filename: alfaman_30_180613_ quanto tempo dura o seu jogo debaixo dos lencois_1.mp4, num_frames: 661, height: 128, width: 128
video path: teasers/eventos_15_170412_warehouse_1.mp4, exception: 
filename: bit.one_30_180529_Video 01 - Passo a passo _1e.mp4, num_frames: 900, height: 128, width: 128
filename: beer_30_180808_video teste clube de assinaturas_1f.mp4, num_frames: 901, height: 128, width: 128
filename: google unlock_30_180809_cr trueview_3.mp4, num_frames: 661, height: 128, width: 128
filename: crowd mobile_15_170825_ande seguro_4.mp4, num_frames: 661, height: 128, width: 128
filename: facebook_15_180614_araujo_1d.mp4, num_frames: 450, height: 128, width: 128
filename: plataforma 10_30_180619_vacaciones de invierno_1b.mp4, num_frames: 900, height: 128, width: 128
filename: Visa_30_180919_Visa l Cross Border l Promocao Mundo Sem Fronteiras_1M.mp4, num_frames: 75

filename: alfa men_30_180515_qual a sua velocidade_02.mp4, num_frames: 662, height: 128, width: 128
filename: Visa_15_180919_Visa l Cross Border l Promocao Mundo Sem Fronteiras_15seg_HORIZONTAL_2d.mp4, num_frames: 376, height: 128, width: 128
video path: teasers/norte digital_30_180912_Verdhia Cajicá.mp4, exception: 'ascii' codec can't encode character '\u0301' in position 46: ordinal not in range(128)
video path: teasers/flapper_30_180510_segurança.mp4, exception: 'ascii' codec can't encode character '\u0327' in position 34: ordinal not in range(128)
filename: colgate_30_180507_slim soft advanced_3.mp4, num_frames: 450, height: 128, width: 128
video path: teasers/flamengo_30_170307_libertadores_2b.mp4, exception: 
filename: eventos_15_170411_reveillon gavea_2.mp4, num_frames: 476, height: 128, width: 128
filename: bitone_30_180824_video vantagens lojista_1_Vertical.mp4, num_frames: 900, height: 128, width: 128
video path: teasers/denise macedo_30_170410_pascoa_02.mp4, exception: 
file

filename: V1_180112_Ilumno - video 2 horizontal_jan_02.mp4, num_frames: 330, height: 128, width: 128
filename: beer_30_180920_eisenbahn_1f.mp4, num_frames: 450, height: 128, width: 128
filename: b ferraz_30_171219_compartilhar.mp4, num_frames: 451, height: 128, width: 128
video path: teasers/facebook_15_180622_Unboxing Clube Jorge _ Mateus_1b.mp4, exception: 
filename: facebook_15_180525_Tegra SP - Institucional v1_1.mp4, num_frames: 450, height: 128, width: 128
filename: easynvest_30_180213_trio do sobe e desce.mp4, num_frames: 360, height: 128, width: 128
filename: arcor_30_180731_Big Big Campanha_1e.mp4, num_frames: 661, height: 128, width: 128
filename: eco challenge_30_171027_eco desafios_01.mp4, num_frames: 720, height: 128, width: 128
filename: facebook_30_181016_Creative for Whatsapp_1c.mp4, num_frames: 900, height: 128, width: 128
filename: digible_30_171206_digible audio_2.mp4, num_frames: 749, height: 128, width: 128
filename: alfa men_30_180518_perdeu sua forca_03.mp4, num_

filename: easynvest_30_180207_bloco dos piratas_03.mp4, num_frames: 330, height: 128, width: 128
filename: Leadaki__30_180911_ F5 Centro de Estudios_1b.mp4, num_frames: 901, height: 128, width: 128
filename: carro aluguel_30_180814_video campanha institucional_1a.mp4, num_frames: 901, height: 128, width: 128
filename: alfa men_30_180417_mulheres tem dificuldade de alcancar o prazer.mp4, num_frames: 900, height: 128, width: 128
filename: conquista_15_180419_conquista jardim amaralina segunda fase_02.mp4, num_frames: 330, height: 128, width: 128
video path: teasers/BBB Melhores Momentos - Primeiro Lider_02.mp4, exception: 
filename: facebook_15_180622_Unboxing Clube Jorge _ Mateus_1c.mp4, num_frames: 330, height: 128, width: 128
filename: alfamen_30_180220_46_02.mp4, num_frames: 900, height: 128, width: 128
filename: V1_180112_Ilumno - video 2 horizontal_jan.mp4, num_frames: 330, height: 128, width: 128
filename: colgate_15_1800809_Emex nossa historia_1b.mp4, num_frames: 450, height: 128