In [5]:
import os
import glob
import pandas as pd
import numpy as np
from moviepy import editor
from tqdm.notebook import tqdm_notebook

In [2]:
def get_time(timestamp, e=0, max_t=None):
    if e>0 and max_t is None:
        print("Please provide max_t")
        return
    if ":" in timestamp:
        minute = int(timestamp.split(":")[0])*60
        second = float(timestamp.split(":")[1])
        t = minute + second
        return max(t+e, 0) if e<0 else min(t+e, max_t)
    else:
        try:
            return max(float(timestamp)+e, 0) if e<0 else min(float(timestamp)+e, max_t)
        except Exception as e:
            print(e)

In [3]:
def get_filename(file, n):
    ext = "."+file.split('.')[-1]
    new_name = file[:-len(ext)] + "_" + str(n) + ext
    return new_name

In [4]:
def extract_clips(data_dir, root="Data", output_root="Processed-Videos", gap=2, skip_classes=[]):
    base_path = os.path.join(root, data_dir)
    if not os.path.exists(base_path):
        print(f"{base_path} does not exists.")
        return
    subdirs = []
    for d in os.listdir(base_path):
        if os.path.isdir(os.path.join(base_path, d)):
            subdirs.append(d)
    for d in subdirs:
        if d in skip_classes:
            continue
        os.mkdir(os.path.join(root, output_root, d))
        file = open(os.path.join(base_path, f"{d}_Labels.txt"), mode='r', encoding='utf-8')
        lines = file.readlines()
        print(f"Processing {d} category videos:")
        for line in tqdm_notebook(lines):
            l = line.strip().split()
            vid_name = l[0]
            vid_path = os.path.join(base_path, d, vid_name)
            if os.path.exists(vid_path):
                video = editor.VideoFileClip(vid_path)
                if len(l[1:])%2 != 0:
                    print(f"Timestamps are not in complete pairs. Ignoring last timestamp for video {os.path.join(base_path, d, vid_name)}")
                anomaly_timestamps = []
                # Anamoly part extraction
                n_pairs = len(l[1:])//2
                for n in range(n_pairs):
                    new_vid = f"{os.path.join(root, output_root, d, get_filename(vid_name, n))}"
                    t1, t2 = get_time(l[n*2+1], e=-0.7), get_time(l[n*2+2], e=0.7, max_t=video.duration)
                    if t1 is not None and t2 is not None:
                        clip = video.subclip(t1, t2)
                        clip.write_videofile(new_vid, logger=None)
                        anomaly_timestamps.append((t1, t2))
                    else:
                        print(f"Skipping timestamps pairs {l[n*2+1]} and {l[n*2+2]} for {vid_path}")
                # Normal part extraction
                normal_output_dir = os.path.join(root, output_root, "Normal")
                if not os.path.exists(normal_output_dir):
                    os.mkdir(normal_output_dir)
                normal_timestamps = []
                if len(anomaly_timestamps) > 0:
                    if anomaly_timestamps[0][0]-gap > 0:   # gap b/w normal & anomaly
                        normal_timestamps.append((0, anomaly_timestamps[0][0]-gap))
                    for i in range(len(anomaly_timestamps)-1):
                        start = anomaly_timestamps[i][1]+gap
                        end = anomaly_timestamps[i+1][0]-gap
                        if end-start > 0:
                            normal_timestamps.append((start, end))
                    if anomaly_timestamps[-1][1]+gap < video.duration:
                        normal_timestamps.append((anomaly_timestamps[-1][1]+gap, video.duration))
                for i, t in enumerate(normal_timestamps):
                    if t[1]-t[0]>=1.3:
                        clip = video.subclip(t[0], t[1])
                        new_vid = f"{os.path.join(normal_output_dir, get_filename('Normal_'+vid_name, i))}"
                        clip.write_videofile(new_vid, logger=None)
            else:
                print(f"{vid_path} doesnot exists. Skipping...")

In [9]:
extract_clips("Anomaly-Videos-Part-3")

Processing RoadAccidents category videos:


  0%|          | 0/149 [00:00<?, ?it/s]

Processing Robbery category videos:


  0%|          | 0/150 [00:00<?, ?it/s]

Processing Shooting category videos:


  0%|          | 0/50 [00:00<?, ?it/s]

In [10]:
extract_clips("Anomaly-Videos-Part-2")

Processing Burglary category videos:


  0%|          | 0/106 [00:00<?, ?it/s]

Processing Explosion category videos:


  0%|          | 0/51 [00:00<?, ?it/s]

IndexError: list index out of range

In [12]:
extract_clips("Anomaly-Videos-Part-2", skip_classes=["Burglary", "Explosion"])

Processing Fighting category videos:


  0%|          | 0/58 [00:00<?, ?it/s]

In [13]:
extract_clips("Anomaly-Videos-Part-1")

Processing Abuse category videos:


  0%|          | 0/50 [00:00<?, ?it/s]

Processing Arson category videos:


  0%|          | 0/48 [00:00<?, ?it/s]

Processing Assault category videos:


  0%|          | 0/50 [00:00<?, ?it/s]

## Splitting Videos into 64 frames clips

In [19]:
def trim_clips(MAX_SEQ_LENGTH=40, output_dir="Trimmed-Videos", input_dir="Processed-Videos", root="Data"):
    if not os.path.exists(os.path.join(root, output_dir)):
        os.mkdir(os.path.join(root, output_dir))
    for cat in os.listdir(f"{root}/{input_dir}/"):
        os.mkdir(os.path.join(root, output_dir, cat))
        print(f"Processing {cat} category videos:")
        for file in tqdm_notebook(glob.glob(f"{root}/{input_dir}/{cat}/*")):
            video = editor.VideoFileClip(file)
            vid_name = file.split("\\")[-1].strip()
            fps = video.fps
            interval = (1/fps)*MAX_SEQ_LENGTH
            t = 0
            counter = 0
            while (video.duration-t)>= interval:
                clip = video.subclip(t, t+interval)
                clip.write_videofile(f"{root}/{output_dir}/{cat}/{get_filename(vid_name, counter)}", logger=None)
                counter += 1
                t += interval
            if (video.duration-t) > 1.4:
                clip = video.subclip(video.duration-interval, video.duration)
                clip.write_videofile(f"{root}/{output_dir}/{cat}/{get_filename(vid_name, counter)}", logger=None)
            video.close()

In [20]:
trim_clips(MAX_SEQ_LENGTH=64)

Processing Abuse category videos:


  0%|          | 0/77 [00:00<?, ?it/s]

Processing Arson category videos:


  0%|          | 0/75 [00:00<?, ?it/s]

Processing Assault category videos:


  0%|          | 0/61 [00:00<?, ?it/s]

Processing Burglary category videos:


  0%|          | 0/108 [00:00<?, ?it/s]

Processing Explosion category videos:


  0%|          | 0/50 [00:00<?, ?it/s]

Processing Fighting category videos:


  0%|          | 0/68 [00:00<?, ?it/s]

Processing Normal category videos:


  0%|          | 0/1032 [00:00<?, ?it/s]

Processing RoadAccidents category videos:


  0%|          | 0/161 [00:00<?, ?it/s]

Processing Robbery category videos:


  0%|          | 0/151 [00:00<?, ?it/s]

Processing Shooting category videos:


  0%|          | 0/55 [00:00<?, ?it/s]

## Creating dataframe of filenames and their labels

In [2]:
rooms = []
for item in os.listdir('Data/Trimmed-Videos'):
     files = os.listdir(f'Data/Trimmed-Videos/{item}')

     # Add them to the list
     for file in files:
            rooms.append((item, f'Data/Trimmed-Videos/{item}/{file}'))
    
# Build a dataframe        
df = pd.DataFrame(data=rooms, columns=['label', 'filepath'])
df

Unnamed: 0,label,filepath
0,Abuse,Data/Trimmed-Videos/Abuse/Abuse001_x264_0_0.mp4
1,Abuse,Data/Trimmed-Videos/Abuse/Abuse001_x264_0_1.mp4
2,Abuse,Data/Trimmed-Videos/Abuse/Abuse001_x264_0_2.mp4
3,Abuse,Data/Trimmed-Videos/Abuse/Abuse001_x264_1_0.mp4
4,Abuse,Data/Trimmed-Videos/Abuse/Abuse001_x264_1_1.mp4
...,...,...
32701,Shooting,Data/Trimmed-Videos/Shooting/Shooting053_x264_...
32702,Shooting,Data/Trimmed-Videos/Shooting/Shooting054_x264_...
32703,Shooting,Data/Trimmed-Videos/Shooting/Shooting054_x264_...
32704,Shooting,Data/Trimmed-Videos/Shooting/Shooting054_x264_...


In [3]:
df_normal = df[df['label']=='Normal']
df_normal

Unnamed: 0,label,filepath
11951,Normal,Data/Trimmed-Videos/Normal/Normal_Abuse001_x26...
11952,Normal,Data/Trimmed-Videos/Normal/Normal_Abuse001_x26...
11953,Normal,Data/Trimmed-Videos/Normal/Normal_Abuse001_x26...
11954,Normal,Data/Trimmed-Videos/Normal/Normal_Abuse001_x26...
11955,Normal,Data/Trimmed-Videos/Normal/Normal_Abuse001_x26...
...,...,...
27960,Normal,Data/Trimmed-Videos/Normal/Normal_Shooting054_...
27961,Normal,Data/Trimmed-Videos/Normal/Normal_Shooting054_...
27962,Normal,Data/Trimmed-Videos/Normal/Normal_Shooting054_...
27963,Normal,Data/Trimmed-Videos/Normal/Normal_Shooting054_...


In [4]:
df_anomaly = df[df['label']!='Normal']
df_anomaly

Unnamed: 0,label,filepath
0,Abuse,Data/Trimmed-Videos/Abuse/Abuse001_x264_0_0.mp4
1,Abuse,Data/Trimmed-Videos/Abuse/Abuse001_x264_0_1.mp4
2,Abuse,Data/Trimmed-Videos/Abuse/Abuse001_x264_0_2.mp4
3,Abuse,Data/Trimmed-Videos/Abuse/Abuse001_x264_1_0.mp4
4,Abuse,Data/Trimmed-Videos/Abuse/Abuse001_x264_1_1.mp4
...,...,...
32701,Shooting,Data/Trimmed-Videos/Shooting/Shooting053_x264_...
32702,Shooting,Data/Trimmed-Videos/Shooting/Shooting054_x264_...
32703,Shooting,Data/Trimmed-Videos/Shooting/Shooting054_x264_...
32704,Shooting,Data/Trimmed-Videos/Shooting/Shooting054_x264_...


In [6]:
# Downsampling to balance dataset
drop_indices = np.random.choice(df_normal.index, 5000, replace=False)
df_normal_subset = df_normal.drop(drop_indices)

In [7]:
df_normal_subset

Unnamed: 0,label,filepath
11951,Normal,Data/Trimmed-Videos/Normal/Normal_Abuse001_x26...
11954,Normal,Data/Trimmed-Videos/Normal/Normal_Abuse001_x26...
11955,Normal,Data/Trimmed-Videos/Normal/Normal_Abuse001_x26...
11957,Normal,Data/Trimmed-Videos/Normal/Normal_Abuse001_x26...
11958,Normal,Data/Trimmed-Videos/Normal/Normal_Abuse001_x26...
...,...,...
27958,Normal,Data/Trimmed-Videos/Normal/Normal_Shooting052_...
27961,Normal,Data/Trimmed-Videos/Normal/Normal_Shooting054_...
27962,Normal,Data/Trimmed-Videos/Normal/Normal_Shooting054_...
27963,Normal,Data/Trimmed-Videos/Normal/Normal_Shooting054_...


In [12]:
df_final = pd.concat([df_normal_subset, df_anomaly])
df_final

Unnamed: 0,label,filepath
11951,Normal,Data/Trimmed-Videos/Normal/Normal_Abuse001_x26...
11954,Normal,Data/Trimmed-Videos/Normal/Normal_Abuse001_x26...
11955,Normal,Data/Trimmed-Videos/Normal/Normal_Abuse001_x26...
11957,Normal,Data/Trimmed-Videos/Normal/Normal_Abuse001_x26...
11958,Normal,Data/Trimmed-Videos/Normal/Normal_Abuse001_x26...
...,...,...
32701,Shooting,Data/Trimmed-Videos/Shooting/Shooting053_x264_...
32702,Shooting,Data/Trimmed-Videos/Shooting/Shooting054_x264_...
32703,Shooting,Data/Trimmed-Videos/Shooting/Shooting054_x264_...
32704,Shooting,Data/Trimmed-Videos/Shooting/Shooting054_x264_...


In [13]:
df_final = df_final.reset_index(drop=True)
df_final

Unnamed: 0,label,filepath
0,Normal,Data/Trimmed-Videos/Normal/Normal_Abuse001_x26...
1,Normal,Data/Trimmed-Videos/Normal/Normal_Abuse001_x26...
2,Normal,Data/Trimmed-Videos/Normal/Normal_Abuse001_x26...
3,Normal,Data/Trimmed-Videos/Normal/Normal_Abuse001_x26...
4,Normal,Data/Trimmed-Videos/Normal/Normal_Abuse001_x26...
...,...,...
27701,Shooting,Data/Trimmed-Videos/Shooting/Shooting053_x264_...
27702,Shooting,Data/Trimmed-Videos/Shooting/Shooting054_x264_...
27703,Shooting,Data/Trimmed-Videos/Shooting/Shooting054_x264_...
27704,Shooting,Data/Trimmed-Videos/Shooting/Shooting054_x264_...


In [15]:
# shuffle the DataFrame rows
df_final = df_final.sample(frac = 1)
df_final

Unnamed: 0,label,filepath
4174,Normal,Data/Trimmed-Videos/Normal/Normal_Explosion040...
27540,Shooting,Data/Trimmed-Videos/Shooting/Shooting046_x264_...
18735,Burglary,Data/Trimmed-Videos/Burglary/Burglary095_x264_...
20713,Explosion,Data/Trimmed-Videos/Explosion/Explosion046_x26...
5863,Normal,Data/Trimmed-Videos/Normal/Normal_Fighting041_...
...,...,...
9706,Normal,Data/Trimmed-Videos/Normal/Normal_Robbery112_x...
13310,Assault,Data/Trimmed-Videos/Assault/Assault008_x264_0_...
1796,Normal,Data/Trimmed-Videos/Normal/Normal_Arson019_x26...
12275,Arson,Data/Trimmed-Videos/Arson/Arson019_x264_0_20.mp4


In [16]:
df_final = df_final.reset_index(drop=True)
df_final

Unnamed: 0,label,filepath
0,Normal,Data/Trimmed-Videos/Normal/Normal_Explosion040...
1,Shooting,Data/Trimmed-Videos/Shooting/Shooting046_x264_...
2,Burglary,Data/Trimmed-Videos/Burglary/Burglary095_x264_...
3,Explosion,Data/Trimmed-Videos/Explosion/Explosion046_x26...
4,Normal,Data/Trimmed-Videos/Normal/Normal_Fighting041_...
...,...,...
27701,Normal,Data/Trimmed-Videos/Normal/Normal_Robbery112_x...
27702,Assault,Data/Trimmed-Videos/Assault/Assault008_x264_0_...
27703,Normal,Data/Trimmed-Videos/Normal/Normal_Arson019_x26...
27704,Arson,Data/Trimmed-Videos/Arson/Arson019_x264_0_20.mp4


In [17]:
test_size = 5000

In [32]:
test_df = df_final.iloc[:5000].reset_index(drop=True)
test_df

Unnamed: 0,label,filepath
0,Normal,Data/Trimmed-Videos/Normal/Normal_Explosion040...
1,Shooting,Data/Trimmed-Videos/Shooting/Shooting046_x264_...
2,Burglary,Data/Trimmed-Videos/Burglary/Burglary095_x264_...
3,Explosion,Data/Trimmed-Videos/Explosion/Explosion046_x26...
4,Normal,Data/Trimmed-Videos/Normal/Normal_Fighting041_...
...,...,...
4995,Normal,Data/Trimmed-Videos/Normal/Normal_Robbery032_x...
4996,Normal,Data/Trimmed-Videos/Normal/Normal_Fighting048_...
4997,Normal,Data/Trimmed-Videos/Normal/Normal_Abuse039_x26...
4998,Normal,Data/Trimmed-Videos/Normal/Normal_Burglary083_...


In [33]:
train_df = df_final.iloc[5000:].reset_index(drop=True)
train_df

Unnamed: 0,label,filepath
0,Explosion,Data/Trimmed-Videos/Explosion/Explosion051_x26...
1,Normal,Data/Trimmed-Videos/Normal/Normal_Assault010_x...
2,Normal,Data/Trimmed-Videos/Normal/Normal_Arson010_x26...
3,Arson,Data/Trimmed-Videos/Arson/Arson051_x264_0_13.mp4
4,Normal,Data/Trimmed-Videos/Normal/Normal_Explosion046...
...,...,...
22701,Normal,Data/Trimmed-Videos/Normal/Normal_Robbery112_x...
22702,Assault,Data/Trimmed-Videos/Assault/Assault008_x264_0_...
22703,Normal,Data/Trimmed-Videos/Normal/Normal_Arson019_x26...
22704,Arson,Data/Trimmed-Videos/Arson/Arson019_x264_0_20.mp4


In [34]:
train_df.to_csv("Data/train_df.csv", index=False)

In [35]:
test_df.to_csv("Data/test_df.csv", index=False)