In [None]:
!                                                                                                                                                                                      qapt-get install ffmpeg
!command -v ffmpeg >/dev/null || (apt update && apt install -y ffmpeg)
!pip install -q mediapy

/bin/bash: qapt-get: command not found


In [None]:
import os
from time import sleep
import glob
import pandas as pd
import uuid
import mediapy as media
import tensorflow as tf
from tqdm import tqdm
from sklearn.model_selection import train_test_split

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Data Loading

### Load ucf-crime annotated data

In [None]:
ucfcrime_paths = glob.glob('/content/drive/MyDrive/ucf_experiments/data/annotations/anomaly/*.csv')
ucfcrime_annotated = pd.concat([pd.read_csv(data_path, index_col=0) for data_path in ucfcrime_paths])

ValueError: ignored

Filter classes 

In [None]:
ucfcrime_annotated.video_class.unique()

In [None]:
## Remove Classes
EXCLUDE = ['RoadAccidents', 'Burglary', 'Shooting', 'Stealing', 'Vandalism']

In [None]:
ucfcrime_annotated = ucfcrime_annotated.loc[~ucfcrime_annotated.video_class.isin(EXCLUDE)]

In [None]:
ucfcrime_annotated.video_class.unique()

array(['Abuse', 'Arrest', 'Assault', 'Fighting', 'Robbery'], dtype=object)

In [None]:
ucfcrime_annotated.video_class.value_counts()

Robbery     1623
Assault     1057
Fighting    1048
Abuse        940
Arrest       845
Name: video_class, dtype: int64

In [None]:
ucfcrime_annotated = ucfcrime_annotated[['new_label', 'segment_path','video_class']].reset_index()
ucfcrime_annotated = ucfcrime_annotated.drop(columns=['Unnamed: 0'])

In [None]:
ucfcrime_annotated

Unnamed: 0,new_label,segment_path,video_class
0,0.0,/content/drive/MyDrive/ucf_experiments/data/uc...,Abuse
1,0.0,/content/drive/MyDrive/ucf_experiments/data/uc...,Abuse
2,0.0,/content/drive/MyDrive/ucf_experiments/data/uc...,Abuse
3,0.0,/content/drive/MyDrive/ucf_experiments/data/uc...,Abuse
4,0.0,/content/drive/MyDrive/ucf_experiments/data/uc...,Abuse
...,...,...,...
5508,0.0,/content/drive/MyDrive/ucf_experiments/data/uc...,Robbery
5509,0.0,/content/drive/MyDrive/ucf_experiments/data/uc...,Robbery
5510,0.0,/content/drive/MyDrive/ucf_experiments/data/uc...,Robbery
5511,0.0,/content/drive/MyDrive/ucf_experiments/data/uc...,Robbery


In [None]:
ucfcrime_annotated.sample(2, random_state=46)

Unnamed: 0,new_label,segment_path,video_class
122,0.0,/content/drive/MyDrive/ucf_experiments/data/uc...,Abuse
668,0.0,/content/drive/MyDrive/ucf_experiments/data/uc...,Abuse


### Load RWF-2000 annotated data

In [None]:
rwf2000_paths = glob.glob('/content/drive/MyDrive/ucf_experiments/data/rwf/RWF-2000/train/**/*.avi')

In [None]:
rwf_df = pd.DataFrame()

for video in rwf2000_paths:
  temp_row = {
      'segment_path': video,
      'video_class': 'Fighting',
      'new_label': 0 if 'NonFight' in video else 1
  }

  rwf_df = rwf_df.append(temp_row, ignore_index=True)

In [None]:
rwf_df

Unnamed: 0,new_label,segment_path,video_class
0,1.0,/content/drive/MyDrive/ucf_experiments/data/rw...,Fighting
1,1.0,/content/drive/MyDrive/ucf_experiments/data/rw...,Fighting
2,1.0,/content/drive/MyDrive/ucf_experiments/data/rw...,Fighting
3,1.0,/content/drive/MyDrive/ucf_experiments/data/rw...,Fighting
4,1.0,/content/drive/MyDrive/ucf_experiments/data/rw...,Fighting
...,...,...,...
1595,0.0,/content/drive/MyDrive/ucf_experiments/data/rw...,Fighting
1596,0.0,/content/drive/MyDrive/ucf_experiments/data/rw...,Fighting
1597,0.0,/content/drive/MyDrive/ucf_experiments/data/rw...,Fighting
1598,0.0,/content/drive/MyDrive/ucf_experiments/data/rw...,Fighting


### Concatenating datasets

In [None]:
full_dataframe = pd.concat([ucfcrime_annotated, rwf_df])

In [None]:
full_dataframe = full_dataframe.rename(columns={'new_label': 'label'})
full_dataframe = full_dataframe.dropna()
full_dataframe = full_dataframe.drop_duplicates()
full_dataframe['label'] = full_dataframe.label.astype(int)

full_dataframe.loc[full_dataframe.label >= 2, 'label'] = 1

In [None]:
full_dataframe

Unnamed: 0,label,segment_path,video_class
0,0,/content/drive/MyDrive/ucf_experiments/data/uc...,Abuse
1,0,/content/drive/MyDrive/ucf_experiments/data/uc...,Abuse
2,0,/content/drive/MyDrive/ucf_experiments/data/uc...,Abuse
3,0,/content/drive/MyDrive/ucf_experiments/data/uc...,Abuse
4,0,/content/drive/MyDrive/ucf_experiments/data/uc...,Abuse
...,...,...,...
1595,0,/content/drive/MyDrive/ucf_experiments/data/rw...,Fighting
1596,0,/content/drive/MyDrive/ucf_experiments/data/rw...,Fighting
1597,0,/content/drive/MyDrive/ucf_experiments/data/rw...,Fighting
1598,0,/content/drive/MyDrive/ucf_experiments/data/rw...,Fighting


In [None]:
len(full_dataframe)

6814

In [None]:
full_dataframe.label.value_counts()

0    4372
1    2442
Name: label, dtype: int64

In [None]:
full_dataframe.groupby(['video_class', 'label'], group_keys=True).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,segment_path
video_class,label,Unnamed: 2_level_1
Abuse,0,648
Abuse,1,284
Arrest,0,614
Arrest,1,221
Assault,0,733
Assault,1,307
Fighting,0,1280
Fighting,1,1122
Robbery,0,1097
Robbery,1,508


In [None]:
def get_balanced_sample(part):
  negative_label = part.loc[part.label == 0]
  positive_label = part.loc[part.label == 1]

  sample_size = min(len(negative_label), len(positive_label))

  return pd.concat([negative_label.sample(sample_size, random_state=42), positive_label.sample(sample_size, random_state=42)])


In [None]:
balanced_dataset = full_dataframe.groupby(['video_class']).apply(get_balanced_sample)

In [None]:
balanced_dataset.label.value_counts()

1    2442
0    2442
Name: label, dtype: int64

### Spliting

In [None]:
train_ratio = 0.80

# train is now 80% of the entire data set
# the _junk suffix means that we drop that variable completely
x_train, x_test = train_test_split(balanced_dataset, test_size=1 - train_ratio, random_state=12)

In [None]:
len(x_train)

3907

In [None]:
x_train.label.value_counts()

0    1963
1    1944
Name: label, dtype: int64

In [None]:
len(x_test)

977

In [None]:
## Save unaugmented data
output_path = '/content/drive/MyDrive/ucf_experiments/experiment_a/data/'

x_train.to_csv(
    os.path.join(output_path, 'x_train_unaugmented.csv'),
)
x_test.to_csv(
    os.path.join(output_path, 'x_test_unaugmented.csv'),
)

## Data Augmentation

In [None]:
def test_video_normalize(video_numpy):
  with tf.device('/gpu:0'):
    augs = ['resized']
    video_tensor = tf.constant(video_numpy)
    resized = tf.image.resize(video_numpy, size=(224, 224))
    stacked  = tf.stack([resized])

    return augs, stacked

def video_augmentation(video_numpy):
  with tf.device('/gpu:0'):
    augs = ['resized', 'bright', 'contrast', 'flip_lr', 'hue_adjust']
    video_tensor = tf.constant(video_numpy)
    resized = tf.image.resize(video_numpy, size=(224, 224))

    bright   = tf.image.adjust_brightness(resized, delta=0.4)
    contrast = tf.image.adjust_contrast(resized, contrast_factor=0.6)
    flip_lr  = tf.image.flip_left_right(resized)
    hue_adjust   = tf.image.adjust_hue(resized, delta=1.5)

    stacked  = tf.stack([resized, bright, contrast, flip_lr, hue_adjust])

    return augs, stacked

In [None]:
AUG_BASE_PATH = '/content/drive/MyDrive/ucf_experiments/experiment_a/data/augmented_train'
TEST_RESIZED_PATH = '/content/drive/MyDrive/ucf_experiments/experiment_a/data/resized_test'

def test_normalization(row):
  video_name = uuid.uuid4().hex[:10] + '_' + str(row['label'])
  raw_video = media.read_video(row['segment_path'])
  raw_video = media.to_float01(raw_video)

  augs, videos = test_video_normalize(raw_video)
  
  for idx, name in enumerate(augs):
    aug_video_name = video_name + '_' + name + '.mp4'
    aug_path = os.path.join(TEST_RESIZED_PATH, aug_video_name)

    media.write_video(aug_path, videos[idx].numpy())

    row[name] = aug_path

  return row

def data_augmentation(row):
  video_name = uuid.uuid4().hex[:10] + '_' + str(row['label'])

  raw_video = media.read_video(row['segment_path'])
  raw_video = media.to_float01(raw_video)
  
  augs, videos = video_augmentation(raw_video)
  
  for idx, name in enumerate(augs):
    aug_video_name = video_name + '_' + name + '.mp4'
    aug_path = os.path.join(AUG_BASE_PATH, aug_video_name)

    media.write_video(aug_path, videos[idx].numpy())

    row[name] = aug_path

  return row

In [None]:
tqdm.pandas()

In [None]:
x_train.sample(3, random_state=2)

Unnamed: 0_level_0,Unnamed: 1_level_0,label,segment_path,video_class
video_class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Robbery,5005,0,/content/drive/MyDrive/ucf_experiments/data/uc...,Robbery
Assault,2725,0,/content/drive/MyDrive/ucf_experiments/data/uc...,Assault
Robbery,4158,1,/content/drive/MyDrive/ucf_experiments/data/uc...,Robbery


In [None]:
files = glob.glob('/content/drive/MyDrive/ucf_experiments/experiment_a/data/augmented_train/*.mp4')
for f in files:
    os.remove(f)

initial_step = 0
step_size = int(len(x_train) / 10)
final_step = step_size

for step in range(10):
  print(initial_step,final_step)
  x_train_augmented = x_train.iloc[initial_step:final_step].progress_apply(data_augmentation, axis=1)

  initial_step += step_size
  final_step += step_size
  x_train_augmented.to_csv(
      '/content/drive/MyDrive/ucf_experiments/experiment_a/data/steps/train_aug_step_{}.csv'.format(str(step))
  )

  sleep(3)


0 390


100%|██████████| 390/390 [24:48<00:00,  3.82s/it]


390 780


100%|██████████| 390/390 [24:52<00:00,  3.83s/it]


780 1170


100%|██████████| 390/390 [25:15<00:00,  3.88s/it]


1170 1560


100%|██████████| 390/390 [27:43<00:00,  4.27s/it]


1560 1950


100%|██████████| 390/390 [30:20<00:00,  4.67s/it]


1950 2340


100%|██████████| 390/390 [33:49<00:00,  5.21s/it]


2340 2730


100%|██████████| 390/390 [34:51<00:00,  5.36s/it]


2730 3120


100%|██████████| 390/390 [35:34<00:00,  5.47s/it]


3120 3510


100%|██████████| 390/390 [34:55<00:00,  5.37s/it]


3510 3900


100%|██████████| 390/390 [36:35<00:00,  5.63s/it]


In [None]:
files = glob.glob('/content/drive/MyDrive/ucf_experiments/experiment_a/data/resized_test/*.mp4')
for f in files:
    os.remove(f)

x_test_resized = x_test.progress_apply(test_normalization, axis=1)

100%|██████████| 977/977 [28:33<00:00,  1.75s/it]


In [None]:
x_test_resized['dataset'] = x_test_resized.segment_path.apply(lambda s_path : 'rwf' if 'RWF-2000' in s_path else 'ucfCrime' )

In [None]:
x_test_resized.to_csv(
    '/content/drive/MyDrive/ucf_experiments/experiment_2/data/x_test_resized.csv'
)

## Load CSV Steps

In [None]:
steps = glob.glob('/content/drive/MyDrive/ucf_experiments/experiment_a/data/steps/train_aug_step_*.csv')

In [None]:
x_train_aug = pd.concat([pd.read_csv(step_path) for step_path in steps])

In [None]:
x_train_aug['dataset'] = x_train_aug.segment_path.apply(lambda s_path : 'rwf' if 'RWF-2000' in s_path else 'ucfCrime' )

In [None]:
x_train_aug.to_csv('/content/drive/MyDrive/ucf_experiments/experiment_a/data/x_train_augmented.csv')

In [None]:
resized = x_train_aug[['resized', 'label']].rename(columns={'resized': 'path'})
bright = x_train_aug[['bright', 'label']].rename(columns={'bright': 'path'})
contrast = x_train_aug[['contrast', 'label']].rename(columns={'contrast': 'path'})
flip_lr = x_train_aug[['flip_lr', 'label']].rename(columns={'flip_lr': 'path'})
hue_adjust = x_train_aug[['hue_adjust', 'label']].rename(columns={'hue_adjust': 'path'})
augmentations = [resized, bright, contrast, flip_lr, hue_adjust]

final_binary = pd.concat(augmentations)

In [None]:
assert len(final_binary) == 3900*len(augmentations)

In [None]:
final_binary.label.value_counts()

0    9800
1    9700
Name: label, dtype: int64

In [None]:
final_binary.to_csv(
    '/content/drive/MyDrive/ucf_experiments/experiment_2/data/train_for_network.txt',
    header=None,
    index=False,
    sep=' '
)

 ## Saving test
 

In [None]:
test_data = pd.read_csv('/content/drive/MyDrive/ucf_experiments/experiment_2/data/x_test_resized.csv')

In [None]:
test_data = test_data[['resized', 'label']]

In [None]:
test_data.sample(1)

Unnamed: 0,resized,label
619,/content/drive/MyDrive/ucf_experiments/experim...,1


In [None]:
test_data.to_csv(
    '/content/drive/MyDrive/ucf_experiments/experiment_2/data/test_for_network.txt',
    header=None,
    index=False,
    sep=' '
)