In [None]:
!                                                                                                                                                                                      qapt-get install ffmpeg
!command -v ffmpeg >/dev/null || (apt update && apt install -y ffmpeg)
!pip install -q mediapy

/bin/bash: qapt-get: command not found


In [None]:
import os
from time import sleep
import glob
import pandas as pd
import uuid
import mediapy as media
import numpy as np
import tensorflow as tf
from tqdm import tqdm
from sklearn.model_selection import train_test_split

import os
from pathlib import Path

In [None]:
# Use Progress Bars in Python
tqdm.pandas()

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Global Variables

In [None]:
# Paths base
PATH_BASE = '/content/drive/MyDrive/ucf_experiments/'
PATH_CICLE = PATH_BASE + 'ciclo_experimental_1/'

# Path base de armazenamento dos augmentations
PATH_DATA = PATH_CICLE + 'data/'

# Dados de treino augmentados
PATH_AUG_BASE = PATH_DATA + 'augmented_train/'
# Dados de teste redimensionados (visto que não popdem ser augmentados)
PATH_TEST_RESIZED = PATH_DATA + 'resized_test/'

# Paths Configuration

In [None]:
# Create target directory & all intermediate directories if don't exists
def create_dir(dirName):
  try:
      os.makedirs(dirName)    
      print("Directory " , dirName ,  " Created ")
  except FileExistsError:
      print("Directory " , dirName ,  " already exists")  

In [None]:
create_dir(PATH_DATA)
create_dir(PATH_AUG_BASE)
create_dir(PATH_TEST_RESIZED)

Directory  /content/drive/MyDrive/ucf_experiments/ciclo_experimental_1/data/  already exists
Directory  /content/drive/MyDrive/ucf_experiments/ciclo_experimental_1/data/augmented_train/  already exists
Directory  /content/drive/MyDrive/ucf_experiments/ciclo_experimental_1/data/resized_test/  already exists


## Data Loading

In [None]:
def load_ucf_crime_splits(path):
  # Load files
  ucfcrime_paths = glob.glob(path)
  ucfcrime_annotated = pd.concat([pd.read_csv(data_path, index_col=0) for data_path in ucfcrime_paths])

  # Remove Classes
  EXCLUDE = ['RoadAccidents', 'Burglary', 'Shooting', 'Stealing', 'Vandalism']
  ucfcrime_annotated = ucfcrime_annotated.loc[~ucfcrime_annotated.video_class.isin(EXCLUDE)]

  # Select columns
  ucfcrime_annotated = ucfcrime_annotated[['new_label', 'segment_path','video_class', 'video_name']].reset_index()
  ucfcrime_annotated = ucfcrime_annotated.drop(columns=['Unnamed: 0'])

  # Correção das anotações manuais
  ucfcrime_annotated.loc[ucfcrime_annotated.new_label >= 2, 'new_label'] = 1

  # Set class dataset
  ucfcrime_annotated['dataset'] = 'ucfcrime'
  ucfcrime_annotated['video_name'] = ucfcrime_annotated['video_name'].str.replace('.txt', '')

  return ucfcrime_annotated

def get_name_video(video, ext):
  return video.split('/')[-1].replace(ext, '')


def load_rwf2000(path):
  # Load files
  rwf2000_paths = glob.glob(path)

  rwf_df = pd.DataFrame()

  # Create dataframe with videos
  for video in rwf2000_paths:
    temp_row = {
        'segment_path': video,
        'video_class': 'Fighting',
        'new_label': 0 if 'NonFight' in video else 1,
        'video_name': get_name_video(video, '.avi')
    }

    rwf_df = rwf_df.append(temp_row, ignore_index=True)

  # Set class dataset
  rwf_df['dataset'] = 'rwf2000'
    
  return rwf_df

### Load UCF-Crime Splits (annotated videos)

In [None]:
path_ucf_crime = '/content/drive/MyDrive/ucf_experiments/data/annotations/anomaly/*.csv'
ucfcrime_annotated = load_ucf_crime_splits(path_ucf_crime)
print(ucfcrime_annotated.video_class.value_counts())
ucfcrime_annotated.sample(2, random_state=46)

Robbery     1623
Assault     1057
Fighting    1048
Abuse        940
Arrest       845
Name: video_class, dtype: int64




Unnamed: 0,new_label,segment_path,video_class,video_name,dataset
122,0.0,/content/drive/MyDrive/ucf_experiments/data/uc...,Abuse,Abuse006_x264,ucfcrime
668,0.0,/content/drive/MyDrive/ucf_experiments/data/uc...,Abuse,Abuse036_x264,ucfcrime


### Load RWF-2000

In [None]:
path_rwf2000_train = '/content/drive/MyDrive/ucf_experiments/data/rwf/RWF-2000/train/**/*.avi'
rwf_df_train = load_rwf2000(path_rwf2000_train)
print(rwf_df_train.new_label.value_counts())
rwf_df_train.sample(2, random_state=46)

1.0    800
0.0    800
Name: new_label, dtype: int64


Unnamed: 0,segment_path,video_class,new_label,video_name,dataset
876,/content/drive/MyDrive/ucf_experiments/data/rw...,Fighting,0.0,4HH7yMU8y9A_3,rwf2000
1385,/content/drive/MyDrive/ucf_experiments/data/rw...,Fighting,0.0,Qqfn1urQ_0,rwf2000


In [None]:
print(rwf_df_train.segment_path.str.split('/')[0])

['', 'content', 'drive', 'MyDrive', 'ucf_experiments', 'data', 'rwf', 'RWF-2000', 'train', 'Fight', '-1l5631l3fg_0.avi']


In [None]:
path_rwf2000_test = '/content/drive/MyDrive/ucf_experiments/data/rwf/RWF-2000/val/**/*.avi'

rwf_df_test = load_rwf2000(path_rwf2000_test)
print(rwf_df_test.new_label.value_counts())
rwf_df_test.sample(2, random_state=46)

1.0    200
0.0    200
Name: new_label, dtype: int64


Unnamed: 0,segment_path,video_class,new_label,video_name,dataset
342,/content/drive/MyDrive/ucf_experiments/data/rw...,Fighting,0.0,SJRnWfHAx_E_1,rwf2000
153,/content/drive/MyDrive/ucf_experiments/data/rw...,Fighting,1.0,SJRnWfHAx_E_2,rwf2000


## Preprocessing




In [None]:
def df_preprocessing(df):
  # Concatenação de dataframes
  # full_dataframe = pd.concat([ucfcrime_annotated, rwf_df]) 

  # Tratamento de dados
  df = df.rename(columns={'new_label': 'label'})
  df = df.dropna()
  df = df.drop_duplicates()
  df['label'] = df.label.astype(int)

  return df

In [None]:
rwf_df_train = df_preprocessing(rwf_df_train)
rwf_df_test = df_preprocessing(rwf_df_test)

ucfcrime_annotated = df_preprocessing(ucfcrime_annotated)

In [None]:
display(rwf_df_train.sample(2))
print(rwf_df_train.label.value_counts())

display(rwf_df_test.sample(2))
print(rwf_df_test.label.value_counts())

display(ucfcrime_annotated.sample(2))
print(ucfcrime_annotated.label.value_counts())

Unnamed: 0,segment_path,video_class,label,video_name,dataset
1597,/content/drive/MyDrive/ucf_experiments/data/rw...,Fighting,0,_q5Nwh4Z6ao_6,rwf2000
1450,/content/drive/MyDrive/ucf_experiments/data/rw...,Fighting,0,tILABXoHE7s_2,rwf2000


1    800
0    800
Name: label, dtype: int64


Unnamed: 0,segment_path,video_class,label,video_name,dataset
292,/content/drive/MyDrive/ucf_experiments/data/rw...,Fighting,0,MrEU15Ug_0,rwf2000
287,/content/drive/MyDrive/ucf_experiments/data/rw...,Fighting,0,l82NDOvk_0,rwf2000


1    200
0    200
Name: label, dtype: int64


Unnamed: 0,label,segment_path,video_class,video_name,dataset
3197,1,/content/drive/MyDrive/ucf_experiments/data/uc...,Fighting,Fighting024_x264,ucfcrime
22,0,/content/drive/MyDrive/ucf_experiments/data/uc...,Abuse,Abuse003_x264,ucfcrime


0    3572
1    1642
Name: label, dtype: int64


In [None]:
#full_dataframe.groupby(['video_class', 'label'], group_keys=True).count()

## Data Balanced UCF-Crime

In [None]:
def get_balanced_sample(part):
  # Balanceando conforme a classe com a menor quantidade de amostras
  negative_label = part.loc[part.label == 0]
  positive_label = part.loc[part.label == 1]

  sample_size = min(len(negative_label), len(positive_label))

  df = pd.concat([negative_label.sample(sample_size, random_state=42), positive_label.sample(sample_size, random_state=42)])
  return df


In [None]:
ucf_crime_balanced = ucfcrime_annotated.groupby(['video_class']).apply(get_balanced_sample)

In [None]:
ucf_crime_balanced = ucf_crime_balanced.reset_index(drop=True)
ucf_crime_balanced.head(2)

Unnamed: 0,label,segment_path,video_class,video_name,dataset
0,0,/content/drive/MyDrive/ucf_experiments/data/uc...,Abuse,Abuse049_x264,ucfcrime
1,0,/content/drive/MyDrive/ucf_experiments/data/uc...,Abuse,Abuse018_x264,ucfcrime


In [None]:
ucf_crime_balanced.label.value_counts()

0    1642
1    1642
Name: label, dtype: int64

## Spliting UCF-Crime

In [None]:
train_ratio = 0.8

# train is now 80% of the entire data set
# the _junk suffix means that we drop that variable completely
x_train_ucfcrime, x_test_ucfcrime = train_test_split(ucf_crime_balanced, test_size=1 - train_ratio, random_state=42)

In [None]:
print(len(x_train_ucfcrime))
print(x_train_ucfcrime.label.value_counts())
x_train_ucfcrime.sample(3, random_state=2)

2627
1    1335
0    1292
Name: label, dtype: int64


Unnamed: 0,label,segment_path,video_class,video_name,dataset
248,0,/content/drive/MyDrive/ucf_experiments/data/uc...,Abuse,Abuse008_x264,ucfcrime
3126,1,/content/drive/MyDrive/ucf_experiments/data/uc...,Robbery,Robbery081_x264,ucfcrime
882,1,/content/drive/MyDrive/ucf_experiments/data/uc...,Arrest,Arrest023_x264,ucfcrime


## Concatenate Datasets (Train and Test)

In [None]:
x_train = pd.concat([x_train_ucfcrime[['label', 'segment_path', 'video_class', 'video_name', 'dataset']], rwf_df_train[['label', 'segment_path', 'video_class', 'video_name', 'dataset']]]) 
x_train = x_train.reset_index()
print(len(x_train))
print(x_train.label.value_counts())
x_train.sample(3, random_state=2)

4227
1    2135
0    2092
Name: label, dtype: int64


Unnamed: 0,index,label,segment_path,video_class,video_name,dataset
765,607,0,/content/drive/MyDrive/ucf_experiments/data/uc...,Arrest,Arrest008_x264,ucfcrime
862,2350,0,/content/drive/MyDrive/ucf_experiments/data/uc...,Robbery,Robbery057_x264,ucfcrime
749,529,1,/content/drive/MyDrive/ucf_experiments/data/uc...,Abuse,Abuse005_x264,ucfcrime


In [None]:
x_train.dataset.value_counts()

ucfcrime    2627
rwf2000     1600
Name: dataset, dtype: int64

In [None]:
x_test = pd.concat([x_test_ucfcrime[['label', 'segment_path', 'video_class', 'video_name', 'dataset']], rwf_df_test[['label', 'segment_path', 'video_class', 'video_name', 'dataset']]]) 
x_test = x_test.reset_index()
print(len(x_test))
print(x_test.label.value_counts())
x_test.sample(3, random_state=2)

1057
0    550
1    507
Name: label, dtype: int64


Unnamed: 0,index,label,segment_path,video_class,video_name,dataset
691,34,1,/content/drive/MyDrive/ucf_experiments/data/rw...,Fighting,7gLKFV5voOg_0,rwf2000
66,203,0,/content/drive/MyDrive/ucf_experiments/data/uc...,Abuse,Abuse007_x264,ucfcrime
397,3089,1,/content/drive/MyDrive/ucf_experiments/data/uc...,Robbery,Robbery019_x264,ucfcrime


In [None]:
x_test.dataset.value_counts()

ucfcrime    657
rwf2000     400
Name: dataset, dtype: int64

In [None]:
## Save unaugmented data
x_train.to_csv(
    os.path.join(PATH_DATA, 'x_train_unaugmented.csv'),
)
x_test.to_csv(
    os.path.join(PATH_DATA, 'x_test_unaugmented.csv'),
)

# Data Augmentation

In [None]:
# Normalização do dimensionamento do vídeo
def test_video_normalize(video_numpy):
  with tf.device('/gpu:0'):
    augs = ['resized']
    video_tensor = tf.constant(video_numpy)
    resized = tf.image.resize(video_numpy, size=(224, 224))
    stacked  = tf.stack([resized])

    return augs, stacked


def test_normalization(row):
  video_name = str(row['video_name']) + '-' + uuid.uuid4().hex[:10] + '_' + str(row['label'])
  raw_video = media.read_video(row['segment_path'])
  raw_video = media.to_float01(raw_video)

  augs, videos = test_video_normalize(raw_video)
  
  for idx, name in enumerate(augs):
    aug_video_name = video_name + '_' + name + '.mp4'
    aug_path = os.path.join(PATH_TEST_RESIZED, aug_video_name)

    media.write_video(aug_path, videos[idx].numpy())

    row[name] = aug_path

  return row

# Pipeline de Augmentação de Dados
def video_augmentation(video_numpy):
  with tf.device('/gpu:0'):
    augs = ['resized', 'bright', 'contrast', 'flip_lr', 'hue_adjust']
    video_tensor = tf.constant(video_numpy)
    resized = tf.image.resize(video_numpy, size=(224, 224))

    bright   = tf.image.adjust_brightness(resized, delta=0.4)
    contrast = tf.image.adjust_contrast(resized, contrast_factor=0.6)
    flip_lr  = tf.image.flip_left_right(resized)
    hue_adjust   = tf.image.adjust_hue(resized, delta=1.5)

    stacked  = tf.stack([resized, bright, contrast, flip_lr, hue_adjust])

    return augs, stacked


def data_augmentation(row):
  video_name = str(row['video_name']) + '-' + uuid.uuid4().hex[:10] + '_' + str(row['label'])

  raw_video = media.read_video(row['segment_path'])
  raw_video = media.to_float01(raw_video)
  
  augs, videos = video_augmentation(raw_video)
  
  for idx, name in enumerate(augs):
    aug_video_name = video_name + '_' + name + '.mp4'
    aug_path = os.path.join(PATH_AUG_BASE, aug_video_name)

    media.write_video(aug_path, videos[idx].numpy())
    row[name] = aug_path

  return row

### Treino

In [None]:
len(x_train)

4227

In [None]:
x_train

Unnamed: 0,index,label,segment_path,video_class,video_name,dataset
0,2766,0,/content/drive/MyDrive/ucf_experiments/data/uc...,Robbery,Robbery097_x264,ucfcrime
1,2984,1,/content/drive/MyDrive/ucf_experiments/data/uc...,Robbery,Robbery035_x264,ucfcrime
2,2069,1,/content/drive/MyDrive/ucf_experiments/data/uc...,Fighting,Fighting028_x264,ucfcrime
3,208,0,/content/drive/MyDrive/ucf_experiments/data/uc...,Abuse,Abuse022_x264,ucfcrime
4,997,1,/content/drive/MyDrive/ucf_experiments/data/uc...,Arrest,Arrest044_x264,ucfcrime
...,...,...,...,...,...,...
4222,1595,0,/content/drive/MyDrive/ucf_experiments/data/rw...,Fighting,_q5Nwh4Z6ao_4,rwf2000
4223,1596,0,/content/drive/MyDrive/ucf_experiments/data/rw...,Fighting,_q5Nwh4Z6ao_5,rwf2000
4224,1597,0,/content/drive/MyDrive/ucf_experiments/data/rw...,Fighting,_q5Nwh4Z6ao_6,rwf2000
4225,1598,0,/content/drive/MyDrive/ucf_experiments/data/rw...,Fighting,_q5Nwh4Z6ao_7,rwf2000


In [None]:
x_train[2:]

Unnamed: 0,index,label,segment_path,video_class,video_name,dataset
2,2069,1,/content/drive/MyDrive/ucf_experiments/data/uc...,Fighting,Fighting028_x264,ucfcrime
3,208,0,/content/drive/MyDrive/ucf_experiments/data/uc...,Abuse,Abuse022_x264,ucfcrime
4,997,1,/content/drive/MyDrive/ucf_experiments/data/uc...,Arrest,Arrest044_x264,ucfcrime
5,565,1,/content/drive/MyDrive/ucf_experiments/data/uc...,Abuse,Abuse017_x264,ucfcrime
6,313,1,/content/drive/MyDrive/ucf_experiments/data/uc...,Abuse,Abuse017_x264,ucfcrime
...,...,...,...,...,...,...
4222,1595,0,/content/drive/MyDrive/ucf_experiments/data/rw...,Fighting,_q5Nwh4Z6ao_4,rwf2000
4223,1596,0,/content/drive/MyDrive/ucf_experiments/data/rw...,Fighting,_q5Nwh4Z6ao_5,rwf2000
4224,1597,0,/content/drive/MyDrive/ucf_experiments/data/rw...,Fighting,_q5Nwh4Z6ao_6,rwf2000
4225,1598,0,/content/drive/MyDrive/ucf_experiments/data/rw...,Fighting,_q5Nwh4Z6ao_7,rwf2000


In [None]:
def generate_augmentations_videos(x_train, initial_step=0, incremento_step=0):

  # Iremos quebrar esse processo em steps menores, por conta do tempo de processamento requerido (salvaremos os dados por partes)

  #files = glob.glob(PATH_AUG_BASE + '*.mp4')
  #for f in files:
  #    os.remove(f)

  # Para a primeira vez de execução, initial_step = 0. Caso não, use o último indice do step salvo
  #initial_step = 2532

  if(initial_step == 0):

    step_size = int(len(x_train) / 10)
    final_step = step_size
    create_dir(PATH_AUG_BASE +'steps/')

    # Step 0 a 9
    for step in range(10):
      print(initial_step,final_step)
      x_train_augmented = x_train.iloc[initial_step:final_step].progress_apply(data_augmentation, axis=1)

      initial_step += step_size
      final_step += step_size
      x_train_augmented.to_csv(
          PATH_AUG_BASE + 'steps/train_aug_step_{}.csv'.format(str(step))
      )

      sleep(3)

  else:
    print('else')

    #x_train = x_train[initial_step:]

    step_size = int(len(x_train) / 10)
    final_step = initial_step + step_size

    create_dir(PATH_AUG_BASE +'steps/')

    for step in range(10-incremento_step):
      print(initial_step, final_step, 'steps/train_aug_step_{}.csv'.format(str(step+incremento_step)))
      x_train_augmented = x_train.iloc[initial_step:final_step].progress_apply(data_augmentation, axis=1)

      initial_step += step_size
      final_step += step_size
      x_train_augmented.to_csv(
          PATH_AUG_BASE + 'steps/train_aug_step_{}.csv'.format(str(step+incremento_step))
      )

      sleep(3)

  return 1


def v2_generate_augmentations_videos(x_train, start_index=0,  base_path=PATH_AUG_BASE):

  # Iremos quebrar esse processo em steps menores, por conta do tempo de processamento requerido (salvaremos os dados por partes)

  chunks = np.array_split(x_train, 10)

  for iternal_index, chunk in enumerate(chunks[start_index:]):
      final_index = iternal_index + start_index
      print('index: ' + str(final_index))
      x_train_augmented = chunk.progress_apply(data_augmentation, axis=1)

      x_train_augmented.to_csv(
          base_path + 'steps/train_aug_step_{}.csv'.format(str(final_index))
      )

      sleep(3)


In [None]:
## generate_augmentations_videos(x_train, 0, 0) DEPRECATED, use a função v2 abaixo

# Gerador de augmentações corrigido 
# DESCOMEEENTEEEEE abaixo - OFICIAL

#v2_generate_augmentations_videos(x_train, start_index=0)

Directory  /content/drive/MyDrive/ucf_experiments/ciclo_experimental_1/data/augmented_train/steps/  Created 
0 422


100%|██████████| 422/422 [31:34<00:00,  4.49s/it]


422 844


100%|██████████| 422/422 [33:45<00:00,  4.80s/it]


844 1266


100%|██████████| 422/422 [39:08<00:00,  5.57s/it]


1266 1688


100%|██████████| 422/422 [39:40<00:00,  5.64s/it]


1688 2110


100%|██████████| 422/422 [44:46<00:00,  6.37s/it]


2110 2532


100%|██████████| 422/422 [41:31<00:00,  5.90s/it]


2532 2954


100%|██████████| 422/422 [58:51<00:00,  8.37s/it]


2954 3376


100%|██████████| 422/422 [1:02:02<00:00,  8.82s/it]


3376 3798


100%|██████████| 422/422 [1:00:30<00:00,  8.60s/it]


3798 4220


100%|██████████| 422/422 [57:42<00:00,  8.21s/it]


1

In [None]:
paths = sorted(Path(PATH_AUG_BASE).iterdir(), key=os.path.getmtime, reverse=True)
len(paths)

21167

In [None]:
def preprocessing_augmentations(size_train, steps=[]):
  
  # Leia todos os files .csv salvos no passo anterior
  x_train_aug = pd.concat([pd.read_csv(step_path) for step_path in steps])

  # Categorizando o dataset
  x_train_aug['dataset'] = x_train_aug.segment_path.apply(lambda s_path : 'rwf' if 'RWF-2000' in s_path else 'ucfCrime' )

  # Salvando o arquivo unificado apenas para validação
  x_train_aug.to_csv(PATH_AUG_BASE + 'x_train_augmented.csv')

  # Preprocessing files augmentations (transformando os valores das colunas de augmentations em linhas)
  resized = x_train_aug[['resized', 'label']].rename(columns={'resized': 'path'})
  bright = x_train_aug[['bright', 'label']].rename(columns={'bright': 'path'})
  contrast = x_train_aug[['contrast', 'label']].rename(columns={'contrast': 'path'})
  flip_lr = x_train_aug[['flip_lr', 'label']].rename(columns={'flip_lr': 'path'})
  hue_adjust = x_train_aug[['hue_adjust', 'label']].rename(columns={'hue_adjust': 'path'})
  augmentations = [resized, bright, contrast, flip_lr, hue_adjust]
  print(resized.shape, bright.shape, contrast.shape, flip_lr.shape, hue_adjust.shape)

  
  final_binary = pd.concat(augmentations)
  print(len(augmentations), size_train, len(final_binary))
  assert len(final_binary) == size_train*len(augmentations)

  return final_binary

In [None]:
#v2_generate_augmentations_videos(non_agumented_videos, start_index=4, base_path='/content/drive/MyDrive/ucf_experiments/ciclo_experimental_1/non_augmented_videos/')

#unaug_steps = glob.glob('/content/drive/MyDrive/ucf_experiments/ciclo_experimental_1/non_augmented_videos/steps/train_aug_step_*.csv')
#aug_steps = glob.glob(PATH_AUG_BASE + 'steps/train_aug_step_*.csv')
#steps = unaug_steps + aug_steps

## No experimento oficial, na pasta dos steps terão mais arquivos csvs, do que a quantidade 10.
#Os arquivos que começam com 0 antes do dígito do step, corresponderam aos 7 arquivos que sobraram e tive que fazer uma adaptação para incluí-load_ucf_crime_splits

#Na função generate_augmentations_videos(), na linha step_size = int(len(x_train) / 10), resulta no seguinte erro: int(422,7) = 422, logo resta 0,7 de vídeo...
#Se pergarmos esse 0,70 vídeos * 10 steps, teremos 7 vídeos sobrando, esse é o erro do asset de dimensões.

#Usando a função nova v2_generate_augmentations_videos(), esse problema de vídeos que sobram, não deve acontecer.


In [None]:
steps = glob.glob(PATH_AUG_BASE + 'steps/train_aug_step_*.csv')
x_train_aug = pd.concat([pd.read_csv(step_path) for step_path in steps])

In [None]:
final_binary = preprocessing_augmentations(len(x_train), steps=steps)

print(final_binary.label.value_counts())
print(len(final_binary))

(4227, 2) (4227, 2) (4227, 2) (4227, 2) (4227, 2)
5 4227 21135
1    10675
0    10460
Name: label, dtype: int64
21135


In [None]:
4227*5

21135

In [None]:
final_binary.head(2)

Unnamed: 0,path,label
0,/content/drive/MyDrive/ucf_experiments/ciclo_e...,0
1,/content/drive/MyDrive/ucf_experiments/ciclo_e...,1


In [None]:
final_binary.to_csv(
    PATH_AUG_BASE + 'train_for_network.txt',
    header=None,
    index=False,
    sep=' '
)

In [None]:
print((glob.glob(PATH_AUG_BASE + '*.txt')))

['/content/drive/MyDrive/ucf_experiments/ciclo_experimental_1/data/augmented_train/train_for_network.txt']


### Teste

In [None]:
for f in glob.glob(PATH_TEST_RESIZED + '*.mp4'):
    os.remove(f)

In [None]:
# DESCOMENTEEEEEEEE a linha para gerar os augmentations
x_test_resized = x_test.progress_apply(test_normalization, axis=1)

100%|██████████| 1057/1057 [39:43<00:00,  2.25s/it]


In [None]:
x_test_resized.to_csv(
    PATH_TEST_RESIZED + 'x_test_resized.csv'
)

In [None]:
paths = sorted(Path(PATH_TEST_RESIZED).iterdir(), key=os.path.getmtime, reverse=True)
len(paths)

1058

In [None]:
print(len(glob.glob(PATH_TEST_RESIZED + '*.mp4')))

1057


In [None]:
test_data = pd.read_csv(PATH_TEST_RESIZED+'x_test_resized.csv')

In [None]:
test_data = test_data[['resized', 'label']]

In [None]:
test_data.sample(1)

Unnamed: 0,resized,label
446,/content/drive/MyDrive/ucf_experiments/ciclo_e...,1


In [None]:
test_data.to_csv(
    PATH_TEST_RESIZED + 'test_for_network.txt',
    header=None,
    index=False,
    sep=' '
)

In [None]:
print((glob.glob(PATH_TEST_RESIZED + '*.txt')))

['/content/drive/MyDrive/ucf_experiments/ciclo_experimental_1/data/resized_test/test_for_network.txt']


In [None]:
pd.read_csv(PATH_TEST_RESIZED+'test_for_network.txt').shape

(1056, 1)

In [None]:
print((glob.glob(PATH_TEST_RESIZED + '*.csv')))

['/content/drive/MyDrive/ucf_experiments/ciclo_experimental_1/data/resized_test/x_test_resized.csv']


In [None]:
pd.read_csv(PATH_TEST_RESIZED+'x_test_resized.csv').shape

(1057, 8)

In [None]:
FIM

# FIM

## Load CSV Steps

In [None]:
steps = glob.glob('/content/drive/MyDrive/ucf_experiments/experiment_a/data/steps/train_aug_step_*.csv')

In [None]:
x_train_aug = pd.concat([pd.read_csv(step_path) for step_path in steps])

In [None]:
x_train_aug['dataset'] = x_train_aug.segment_path.apply(lambda s_path : 'rwf' if 'RWF-2000' in s_path else 'ucfCrime' )

In [None]:
x_train_aug.to_csv('/content/drive/MyDrive/ucf_experiments/experiment_a/data/x_train_augmented.csv')

In [None]:
resized = x_train_aug[['resized', 'label']].rename(columns={'resized': 'path'})
bright = x_train_aug[['bright', 'label']].rename(columns={'bright': 'path'})
contrast = x_train_aug[['contrast', 'label']].rename(columns={'contrast': 'path'})
flip_lr = x_train_aug[['flip_lr', 'label']].rename(columns={'flip_lr': 'path'})
hue_adjust = x_train_aug[['hue_adjust', 'label']].rename(columns={'hue_adjust': 'path'})
augmentations = [resized, bright, contrast, flip_lr, hue_adjust]

final_binary = pd.concat(augmentations)

In [None]:
assert len(final_binary) == 3900*len(augmentations)

In [None]:
final_binary.label.value_counts()

In [None]:
final_binary.to_csv(
    '/content/drive/MyDrive/ucf_experiments/experiment_2/data/train_for_network.txt',
    header=None,
    index=False,
    sep=' '
)

 ## Saving test
 

In [None]:
test_data = pd.read_csv('/content/drive/MyDrive/ucf_experiments/experiment_2/data/x_test_resized.csv')

In [None]:
test_data = test_data[['resized', 'label']]

In [None]:
test_data.sample(1)

In [None]:
test_data.to_csv(
    '/content/drive/MyDrive/ucf_experiments/experiment_2/data/test_for_network.txt',
    header=None,
    index=False,
    sep=' '
)