# 1.Data Preprocessing

전처리 준비를 위한 디렉토리 세팅
- 현재 실행중인 ipynb 파일과 zip 폴더가 같은 경로에 있으면 됨
- FOLDER(폴더명 상관 없음)
    - .git
    - FILENAME.ipynb(현재 파일)
    - zip/
        - Abuse/
        - Arrest/
        - Arson/
        - Assault/
        - Burglary/
        - Explosion/
        - Fighting/
        - Normal_Videos-Part-1/
    - (NP)

## 1.1 Transform Video into Numpy Array
    mp4file -> Numpy
    save as 
     './NP/Abuse',
     './NP/Normal-Videos-Part-1',
     './NP/Assault',
     './NP/Arson',
     './NP/Burglary',
     './NP/Fighting',
     './NP/Explosion',
     './NP/Arrest

In [8]:
import cv2  

import numpy as np
import matplotlib.pyplot as plt

import os 
from tqdm import tqdm

In [9]:
SCALE_RGB = cv2.COLOR_BGR2RGB
CHANNEL_RGB = 3
IMG_WIDTH = 224
IMG_HEIGHT = 224

In [10]:
def Video2Npy(file_path, resize=(128,128)): # 비디오파일에서 프레임 개수만큼 이미지 추출 후 Npy로 변경

    cap = cv2.VideoCapture(file_path) # 해당 경로 파일로부터 VideoCapture 객체 생성
    
    len_frames = int(cap.get(7)) # 총 프레임 수
    
    # 프레임 추출
    try:
        frames = []
        for i in range(len_frames - 1):
            _, frame = cap.read() # 비디오를 한 프레임씩 읽음

            frame = cv2.resize(frame, resize, interpolation=cv2.INTER_AREA) # 사이즈 다운 시 보간법 사용 : cv2.INTER_AREA
            frame = cv2.cvtColor(frame, SCALE_RGB) # RGB
            frame = np.reshape(frame, (IMG_HEIGHT, IMG_WIDTH, 3)) # RGB 3채널 벡터로 변환
            frames.append(frame)
    except:
        print("Error: ", file_path, len_frames, i)
    finally:
        frames = np.array(frames)
        cap.release() # VideoCapture 객체 해제

    
    return frames

In [11]:
def Save2Npy(file_dir, save_dir): # Video2Npy 결과(.npy)를 디렉토리에 저장

  
    if not os.path.exists(save_dir): # 저장 디렉토리 생성
        os.makedirs(save_dir)

    videos = os.listdir(file_dir) 

    for v in tqdm(videos):
        # Split video name
        video_name = v.split('.')[0]

        # Get src 
        video_path = os.path.join(file_dir, v)

        # Get dest 
        save_path = os.path.join(save_dir, video_name + '.npy') 
        print(video_name)

        # Load and preprocess video
        data = Video2Npy(file_path=video_path, resize=(IMG_HEIGHT, IMG_WIDTH))
        data = np.uint8(data)
        
        # Save as .npy file
        np.save(save_path, data)
    
    return None

In [12]:
import os

dirlist = []

for dirname, _, filenames in os.walk('./zip'):
    dirlist.append(os.path.join(dirname))

dirlist

# os.walk
# os.walk() 인자로 전달된 path에 대해서 다음 3개의 값이 있는 tuple을 넘겨줍니다.
# root : dir와 files가 있는 path
# dirs : root 아래에 있는 폴더들
# files : root 아래에 있는 파일들

['./zip',
 './zip/Abuse',
 './zip/Assault',
 './zip/Arson',
 './zip/Burglary',
 './zip/Normal_Videos-Part-1',
 './zip/Fighting',
 './zip/Explosion',
 './zip/Arrest']

In [13]:
if not os.path.exists('./NP_224_224'):   
    for i in range(1, 9): # 
        source_path = dirlist[i]
        target_path = './NP_224_224/' + dirlist[i].split('/')[2] 

        Save2Npy(file_dir=source_path, save_dir=target_path)
else:
    print('Already done')

  0%|          | 0/50 [00:00<?, ?it/s]

Abuse048_x264


  2%|▏         | 1/50 [00:10<08:19, 10.19s/it]

Abuse041_x264


  4%|▍         | 2/50 [00:14<05:12,  6.52s/it]

Abuse035_x264


  6%|▌         | 3/50 [00:22<05:35,  7.13s/it]

Abuse032_x264


  8%|▊         | 4/50 [00:35<07:27,  9.73s/it]

Abuse027_x264


 10%|█         | 5/50 [00:48<08:05, 10.78s/it]

Abuse042_x264


 12%|█▏        | 6/50 [02:14<26:38, 36.33s/it]

Abuse044_x264


 14%|█▍        | 7/50 [02:26<20:21, 28.40s/it]

Abuse033_x264


 16%|█▌        | 8/50 [02:34<15:26, 22.06s/it]

Abuse007_x264


 18%|█▊        | 9/50 [02:37<11:01, 16.14s/it]

Abuse039_x264


 20%|██        | 10/50 [03:21<16:20, 24.50s/it]

Abuse019_x264


 22%|██▏       | 11/50 [03:33<13:25, 20.65s/it]

Abuse029_x264


 24%|██▍       | 12/50 [03:34<09:16, 14.64s/it]

Abuse012_x264


 26%|██▌       | 13/50 [03:49<09:11, 14.90s/it]

Abuse028_x264


 28%|██▊       | 14/50 [03:53<06:59, 11.64s/it]

Abuse013_x264


 30%|███       | 15/50 [04:08<07:20, 12.59s/it]

Abuse008_x264


 32%|███▏      | 16/50 [04:32<09:08, 16.13s/it]

Abuse021_x264


 34%|███▍      | 17/50 [04:37<07:02, 12.80s/it]

Abuse004_x264


 36%|███▌      | 18/50 [05:26<12:31, 23.50s/it]

Abuse009_x264


 38%|███▊      | 19/50 [05:29<08:55, 17.28s/it]

Abuse030_x264


 40%|████      | 20/50 [05:33<06:45, 13.52s/it]

Abuse024_x264


 42%|████▏     | 21/50 [05:36<05:00, 10.35s/it]

Abuse036_x264


 44%|████▍     | 22/50 [05:51<05:27, 11.69s/it]

Abuse006_x264


 46%|████▌     | 23/50 [06:05<05:32, 12.33s/it]

Abuse010_x264


 48%|████▊     | 24/50 [06:08<04:11,  9.66s/it]

Abuse005_x264


 50%|█████     | 25/50 [06:11<03:10,  7.64s/it]

Abuse017_x264


 52%|█████▏    | 26/50 [06:17<02:48,  7.03s/it]

Abuse049_x264


 54%|█████▍    | 27/50 [06:23<02:36,  6.79s/it]

Abuse011_x264


 56%|█████▌    | 28/50 [06:34<02:57,  8.07s/it]

Abuse040_x264


 58%|█████▊    | 29/50 [06:45<03:09,  9.02s/it]

Abuse034_x264


 60%|██████    | 30/50 [06:50<02:33,  7.67s/it]

Abuse047_x264


 62%|██████▏   | 31/50 [06:54<02:06,  6.66s/it]

Abuse020_x264


 64%|██████▍   | 32/50 [07:04<02:15,  7.52s/it]

Abuse037_x264


 66%|██████▌   | 33/50 [07:09<01:55,  6.78s/it]

Abuse038_x264


 68%|██████▊   | 34/50 [07:11<01:28,  5.54s/it]

Abuse045_x264


 70%|███████   | 35/50 [07:26<02:05,  8.35s/it]

Abuse018_x264


 72%|███████▏  | 36/50 [07:33<01:50,  7.89s/it]

Abuse014_x264


 74%|███████▍  | 37/50 [07:40<01:40,  7.72s/it]

Abuse002_x264


 76%|███████▌  | 38/50 [07:43<01:15,  6.29s/it]

Abuse003_x264


 78%|███████▊  | 39/50 [07:55<01:25,  7.81s/it]

Abuse001_x264


 80%|████████  | 40/50 [08:03<01:18,  7.88s/it]

Abuse023_x264


 82%|████████▏ | 41/50 [08:06<00:57,  6.43s/it]

Abuse026_x264


 84%|████████▍ | 42/50 [08:12<00:51,  6.49s/it]

Abuse031_x264


 86%|████████▌ | 43/50 [08:25<00:58,  8.35s/it]

Abuse022_x264


 88%|████████▊ | 44/50 [08:34<00:50,  8.45s/it]

Abuse025_x264


 90%|█████████ | 45/50 [08:39<00:37,  7.59s/it]

Abuse015_x264


 92%|█████████▏| 46/50 [09:05<00:51, 12.96s/it]

Abuse046_x264


 94%|█████████▍| 47/50 [09:08<00:30, 10.09s/it]

Abuse016_x264


 96%|█████████▌| 48/50 [09:12<00:16,  8.31s/it]

Abuse043_x264


 98%|█████████▊| 49/50 [09:25<00:09,  9.53s/it]

Abuse050_x264


100%|██████████| 50/50 [09:41<00:00, 11.63s/it]
  0%|          | 0/50 [00:00<?, ?it/s]

Assault029_x264


  2%|▏         | 1/50 [00:01<01:32,  1.88s/it]

Assault051_x264


  4%|▍         | 2/50 [00:03<01:25,  1.79s/it]

Assault050_x264


  6%|▌         | 3/50 [00:05<01:22,  1.76s/it]

Assault038_x264


  8%|▊         | 4/50 [00:05<00:57,  1.25s/it]

Assault004_x264


 10%|█         | 5/50 [00:15<03:08,  4.20s/it]

Assault033_x264


 12%|█▏        | 6/50 [00:19<02:58,  4.07s/it]

Assault040_x264


 14%|█▍        | 7/50 [00:30<04:39,  6.51s/it]

Assault007_x264


 16%|█▌        | 8/50 [00:36<04:24,  6.30s/it]

Assault006_x264


 18%|█▊        | 9/50 [01:04<08:51, 12.97s/it]

Assault001_x264


 20%|██        | 10/50 [01:11<07:27, 11.18s/it]

Assault003_x264


 22%|██▏       | 11/50 [01:24<07:40, 11.80s/it]

Assault044_x264


 24%|██▍       | 12/50 [01:27<05:46,  9.11s/it]

Assault024_x264


 26%|██▌       | 13/50 [01:28<04:08,  6.71s/it]

Assault048_x264


 28%|██▊       | 14/50 [01:34<03:52,  6.45s/it]

Assault025_x264


 30%|███       | 15/50 [01:38<03:15,  5.58s/it]

Assault026_x264


 32%|███▏      | 16/50 [01:40<02:34,  4.54s/it]

Assault005_x264


 34%|███▍      | 17/50 [01:43<02:19,  4.23s/it]

Assault011_x264


 36%|███▌      | 18/50 [01:50<02:39,  5.00s/it]

Assault027_x264


 38%|███▊      | 19/50 [02:14<05:34, 10.78s/it]

Assault032_x264


 40%|████      | 20/50 [02:16<04:02,  8.10s/it]

Assault037_x264


 42%|████▏     | 21/50 [02:25<04:05,  8.46s/it]

Assault018_x264


 44%|████▍     | 22/50 [02:26<02:55,  6.25s/it]

Assault002_x264


 46%|████▌     | 23/50 [02:34<02:58,  6.62s/it]

Assault034_x264


 48%|████▊     | 24/50 [02:41<02:52,  6.64s/it]

Assault039_x264


 50%|█████     | 25/50 [02:42<02:04,  4.97s/it]

Assault015_x264


 52%|█████▏    | 26/50 [02:46<01:55,  4.79s/it]

Assault023_x264


 54%|█████▍    | 27/50 [02:49<01:39,  4.32s/it]

Assault012_x264


 56%|█████▌    | 28/50 [02:51<01:20,  3.67s/it]

Assault049_x264


 58%|█████▊    | 29/50 [02:55<01:16,  3.66s/it]

Assault017_x264


 60%|██████    | 30/50 [03:02<01:35,  4.76s/it]

Assault052_x264


 62%|██████▏   | 31/50 [03:05<01:15,  3.99s/it]

Assault035_x264


 64%|██████▍   | 32/50 [03:08<01:06,  3.70s/it]

Assault030_x264


 66%|██████▌   | 33/50 [03:12<01:08,  4.04s/it]

Assault031_x264


 68%|██████▊   | 34/50 [03:15<00:58,  3.65s/it]

Assault045_x264


 70%|███████   | 35/50 [03:19<00:53,  3.55s/it]

Assault020_x264


 72%|███████▏  | 36/50 [03:20<00:39,  2.79s/it]

Assault019_x264


 74%|███████▍  | 37/50 [03:27<00:55,  4.24s/it]

Assault046_x264


 76%|███████▌  | 38/50 [03:30<00:47,  3.95s/it]

Assault028_x264


 78%|███████▊  | 39/50 [03:40<01:01,  5.58s/it]

Assault016_x264


 80%|████████  | 40/50 [04:16<02:27, 14.76s/it]

Assault047_x264


 82%|████████▏ | 41/50 [04:33<02:18, 15.41s/it]

Assault036_x264


 84%|████████▍ | 42/50 [04:37<01:36, 12.09s/it]

Assault008_x264


 86%|████████▌ | 43/50 [05:19<02:25, 20.84s/it]

Assault013_x264


 88%|████████▊ | 44/50 [05:27<01:42, 17.12s/it]

Assault010_x264


 90%|█████████ | 45/50 [06:15<02:12, 26.51s/it]

Assault042_x264


 92%|█████████▏| 46/50 [06:43<01:46, 26.70s/it]

Assault009_x264


 94%|█████████▍| 47/50 [06:53<01:05, 21.80s/it]

Assault041_x264


 96%|█████████▌| 48/50 [07:08<00:39, 19.81s/it]

Assault022_x264


 98%|█████████▊| 49/50 [07:15<00:16, 16.08s/it]

Assault014_x264


100%|██████████| 50/50 [07:21<00:00,  8.83s/it]
  0%|          | 0/50 [00:00<?, ?it/s]

Arson009_x264


  2%|▏         | 1/50 [00:02<01:48,  2.22s/it]

Arson022_x264


  4%|▍         | 2/50 [00:28<13:15, 16.58s/it]

Arson025_x264


  6%|▌         | 3/50 [00:44<12:37, 16.11s/it]

Arson013_x264


  8%|▊         | 4/50 [01:02<13:06, 17.09s/it]

Arson049_x264


 10%|█         | 5/50 [01:04<08:28, 11.29s/it]

Arson011_x264


 12%|█▏        | 6/50 [01:07<06:23,  8.71s/it]

Arson038_x264


 14%|█▍        | 7/50 [01:10<04:46,  6.66s/it]

Arson050_x264


 16%|█▌        | 8/50 [01:14<04:11,  5.99s/it]

Arson006_x264


 18%|█▊        | 9/50 [01:22<04:31,  6.62s/it]

Arson029_x264


 20%|██        | 10/50 [01:26<03:52,  5.82s/it]

Arson041_x264


 22%|██▏       | 11/50 [01:37<04:49,  7.42s/it]

Arson007_x264


 24%|██▍       | 12/50 [01:56<06:56, 10.97s/it]

Arson045_x264


 26%|██▌       | 13/50 [02:08<06:48, 11.04s/it]

Arson023_x264


 28%|██▊       | 14/50 [02:11<05:20,  8.89s/it]

Arson044_x264


 30%|███       | 15/50 [02:15<04:13,  7.23s/it]

Arson015_x264


 32%|███▏      | 16/50 [02:55<09:38, 17.01s/it]

Arson005_x264


 34%|███▍      | 17/50 [03:02<07:48, 14.20s/it]

Arson031_x264


 36%|███▌      | 18/50 [03:14<07:13, 13.56s/it]

Arson014_x264


 38%|███▊      | 19/50 [03:26<06:41, 12.94s/it]

Arson021_x264


 40%|████      | 20/50 [03:33<05:34, 11.15s/it]

Arson051_x264


 42%|████▏     | 21/50 [03:38<04:30,  9.33s/it]

Arson019_x264


 44%|████▍     | 22/50 [13:36<1:26:51, 186.14s/it]

Arson046_x264


 46%|████▌     | 23/50 [13:43<59:30, 132.26s/it]  

Arson037_x264


 48%|████▊     | 24/50 [13:46<40:29, 93.43s/it] 

Arson030_x264


 50%|█████     | 25/50 [13:59<28:52, 69.28s/it]

Arson048_x264


 52%|█████▏    | 26/50 [14:00<19:31, 48.83s/it]

Arson042_x264


 54%|█████▍    | 27/50 [14:16<14:54, 38.88s/it]

Arson028_x264


 56%|█████▌    | 28/50 [14:22<10:43, 29.23s/it]

Arson039_x264


 58%|█████▊    | 29/50 [14:32<08:10, 23.34s/it]

Arson001_x264


 60%|██████    | 30/50 [14:43<06:35, 19.79s/it]

Arson003_x264


 62%|██████▏   | 31/50 [14:54<05:26, 17.19s/it]

Arson052_x264


 64%|██████▍   | 32/50 [15:08<04:49, 16.08s/it]

Arson035_x264


 66%|██████▌   | 33/50 [15:12<03:32, 12.51s/it]

Arson027_x264


 68%|██████▊   | 34/50 [15:20<02:55, 10.98s/it]

Arson036_x264


 70%|███████   | 35/50 [15:24<02:15,  9.07s/it]

Arson034_x264


 72%|███████▏  | 36/50 [15:31<01:56,  8.31s/it]

Arson047_x264


 74%|███████▍  | 37/50 [15:35<01:31,  7.07s/it]

Arson016_x264


 76%|███████▌  | 38/50 [15:40<01:16,  6.39s/it]

Arson010_x264


 78%|███████▊  | 39/50 [15:48<01:18,  7.09s/it]

Arson053_x264


 80%|████████  | 40/50 [16:07<01:46, 10.66s/it]

Arson017_x264


 82%|████████▏ | 41/50 [16:11<01:17,  8.65s/it]

Arson008_x264


 84%|████████▍ | 42/50 [16:15<00:56,  7.05s/it]

Arson020_x264


 86%|████████▌ | 43/50 [16:22<00:49,  7.03s/it]

Arson024_x264


 88%|████████▊ | 44/50 [16:31<00:46,  7.82s/it]

Arson012_x264


 90%|█████████ | 45/50 [16:34<00:31,  6.20s/it]

Arson002_x264


 92%|█████████▏| 46/50 [16:46<00:32,  8.12s/it]

Arson026_x264


 94%|█████████▍| 47/50 [17:03<00:32, 10.75s/it]

Arson018_x264


 96%|█████████▌| 48/50 [17:06<00:16,  8.34s/it]

Arson040_x264


 98%|█████████▊| 49/50 [17:14<00:08,  8.12s/it]

Arson032_x264


100%|██████████| 50/50 [17:22<00:00, 20.84s/it]
  0%|          | 0/100 [00:00<?, ?it/s]

Burglary008_x264


  1%|          | 1/100 [00:20<33:57, 20.58s/it]

Burglary094_x264


  2%|▏         | 2/100 [00:27<20:04, 12.29s/it]

Burglary054_x264


  3%|▎         | 3/100 [00:28<12:11,  7.54s/it]

Burglary058_x264


  4%|▍         | 4/100 [01:15<36:54, 23.07s/it]

Burglary044_x264


  5%|▌         | 5/100 [01:47<41:16, 26.07s/it]

Burglary046_x264


  6%|▌         | 6/100 [01:56<31:51, 20.34s/it]

Burglary039_x264


  7%|▋         | 7/100 [02:25<35:43, 23.05s/it]

Burglary100_x264


  8%|▊         | 8/100 [02:45<34:13, 22.32s/it]

Burglary081_x264


  9%|▉         | 9/100 [02:47<23:54, 15.76s/it]

Burglary061_x264


 10%|█         | 10/100 [03:23<33:14, 22.16s/it]

Burglary032_x264


 11%|█         | 11/100 [04:35<55:20, 37.31s/it]

Burglary091_x264


 12%|█▏        | 12/100 [04:49<44:24, 30.28s/it]

Burglary099_x264


 13%|█▎        | 13/100 [04:55<33:10, 22.88s/it]

Burglary053_x264


 14%|█▍        | 14/100 [05:09<29:11, 20.37s/it]

Burglary037_x264


 15%|█▌        | 15/100 [05:17<23:35, 16.66s/it]

Burglary086_x264


 16%|█▌        | 16/100 [05:32<22:23, 16.00s/it]

Burglary093_x264


 17%|█▋        | 17/100 [05:35<16:42, 12.08s/it]

Burglary043_x264


 18%|█▊        | 18/100 [05:37<12:34,  9.21s/it]

Burglary068_x264


 19%|█▉        | 19/100 [05:40<09:53,  7.33s/it]

Burglary031_x264


 20%|██        | 20/100 [06:06<17:03, 12.80s/it]

Burglary051_x264


 21%|██        | 21/100 [06:11<13:53, 10.56s/it]

Burglary033_x264


 22%|██▏       | 22/100 [06:17<11:55,  9.17s/it]

Burglary014_x264


 23%|██▎       | 23/100 [06:31<13:26, 10.48s/it]

Burglary003_x264


 24%|██▍       | 24/100 [06:39<12:27,  9.84s/it]

Burglary038_x264


 25%|██▌       | 25/100 [06:45<10:50,  8.67s/it]

Burglary013_x264


 26%|██▌       | 26/100 [07:00<12:58, 10.52s/it]

Burglary036_x264


 27%|██▋       | 27/100 [07:07<11:23,  9.36s/it]

Burglary030_x264


 28%|██▊       | 28/100 [07:13<10:09,  8.47s/it]

Burglary002_x264


 29%|██▉       | 29/100 [07:25<11:28,  9.70s/it]

Burglary016_x264


 30%|███       | 30/100 [07:48<15:51, 13.59s/it]

Burglary023_x264


 31%|███       | 31/100 [08:33<26:20, 22.91s/it]

Burglary004_x264


 32%|███▏      | 32/100 [08:41<20:55, 18.46s/it]

Burglary074_x264


 33%|███▎      | 33/100 [08:50<17:27, 15.64s/it]

Burglary075_x264


 34%|███▍      | 34/100 [09:57<34:04, 30.97s/it]

Burglary065_x264


 35%|███▌      | 35/100 [10:36<36:21, 33.56s/it]

Burglary080_x264


 36%|███▌      | 36/100 [10:41<26:37, 24.96s/it]

Burglary034_x264


 37%|███▋      | 37/100 [10:47<20:10, 19.21s/it]

Burglary083_x264


 38%|███▊      | 38/100 [11:13<22:05, 21.38s/it]

Burglary041_x264


 39%|███▉      | 39/100 [11:21<17:27, 17.18s/it]

Burglary092_x264


 40%|████      | 40/100 [11:24<13:02, 13.05s/it]

Burglary057_x264


 41%|████      | 41/100 [11:54<17:44, 18.04s/it]

Burglary067_x264


 41%|████      | 41/100 [12:03<17:20, 17.64s/it]


OSError: Not enough free space to write 197342208 bytes

In [None]:
# 변환된 npy 파일 개수 확인
for i in dirlist[1:]:
    print(i.split('/')[2], len(os.listdir(i)))

## 1.2 Numpy Array Split with Uniform Sampling


#### 시드 설정

In [None]:
np.random.seed(2045)

### 1.2.1 Numpy Array Split

In [None]:
def NpArraySplit(path, train_rate, val_rate, test_rate): # Data split -> 파일명, 파일 경로
    # ex) NpArraySplit('./NP', 60,20,20):

    train_videos = []
    train_path= []
    val_videos = []
    val_path = []
    test_videos = []
    test_path = []

    for dirname, _, filenames in os.walk(path):
        n = len(filenames)
        
        for i,filename in enumerate(filenames):
            if(i < (n * train_rate / 100)): # train data : 60%
                train_videos.append(os.path.join(filename))
                train_path.append(os.path.join(dirname,filename))
            
            elif(i < (n * (train_rate + val_rate) / 100)): # valid data : 20%
                val_videos.append(os.path.join(filename))
                val_path.append(os.path.join(dirname,filename))
            else: # test data : 20%
                test_videos.append(os.path.join(filename))
                test_path.append(os.path.join(dirname,filename))

    return train_videos,train_path,val_videos,val_path,test_videos, test_path

In [None]:
train_videos, train_path, val_videos, val_path, test_videos, test_path = NpArraySplit('./NP_224_224', 60,20,20)

In [None]:
len(train_videos), len(val_videos), len(test_videos)

In [None]:
import pandas as pd
import numpy as np
import re

pattern = re.compile('[a-zA-Z]*')

def mkDataFrame(videos, path):
    # creating a dataframe having video names
    train = pd.DataFrame()
    train['video_name'] = videos
    train['path'] = path

    # creating tags for training videos
    train_video_tag = []
    for i in range(train.shape[0]):
        train_video_tag.append(pattern.match(train['video_name'][i]).group())

    train['class'] = train_video_tag

    return train

In [None]:
train = mkDataFrame(train_videos, train_path)
val = mkDataFrame(val_videos, val_path)
test = mkDataFrame(test_videos, test_path)

# DataFrame Shuffling
train = train.iloc[np.random.permutation(train.index)].reset_index(drop=True)
val = val.iloc[np.random.permutation(val.index)].reset_index(drop=True)
test = test.iloc[np.random.permutation(test.index)].reset_index(drop=True)

In [None]:
train.head()

In [None]:
print(train.shape)
print(val.shape)
print(test.shape)

In [None]:
train['class'].value_counts()

In [None]:
val['class'].value_counts()

In [None]:
test['class'].value_counts()

### 1.2.2 Numpy Array Load -> Uniform Sampling

In [None]:
 def uniform_sampling(video, target_frames):
        # get total frames of input video and calculate sampling interval 
        len_frames = int(len(video))
        interval = int(np.ceil(len_frames/target_frames))

        # init empty list for sampled video 
        sampled_video = []
        for i in range(0, len_frames, interval):
            sampled_video.append(video[i])     

        # calculate numer of padded frames and fix it 
        num_pad = target_frames - len(sampled_video)
        if num_pad > 0:
            padding = [video[i] for i in range(-num_pad, 0)]
            sampled_video += padding     

        # get sampled video
        return np.array(sampled_video, dtype=np.float32)

In [None]:
def Load_NP(data, target_frames):
    # creating an empty list
    data_image = []
    data_label = []

    # for loop to read and store frames
    for i in tqdm(range(data.shape[0])):
        video = np.load(data['path'][i])
        video = uniform_sampling(video, target_frames)

        # appending the image to the train_image list
        data_image.append(video)
        data_label.append(data['class'][i])

    return np.array(data_image), np.array(data_label)

In [None]:
def mk_train_val_test(train, val, test, target_frames):
    X_train, y_train = Load_NP(train, target_frames)
    X_val, y_val = Load_NP(val, target_frames)
    X_test, y_test = Load_NP(test, target_frames)
    
    print(X_train.shape, y_train.shape)
    print(X_val.shape, y_val.shape)
    print(X_test.shape, y_test.shape)
    
    return X_train, y_train, X_val, y_val, X_test, y_test

In [None]:
X_train, y_train, X_val, y_val, X_test, y_test = mk_train_val_test(train, val, test, 64)

### 1.2.3 y -> One-Hot Encoding

In [None]:
from sklearn.preprocessing import LabelEncoder

encoder = LabelEncoder()

y_train_LE = encoder.fit_transform(y_train)
y_val_LE = encoder.fit_transform(y_val)
y_test_LE = encoder.fit_transform(y_test)

In [None]:
from keras.utils import to_categorical

y_train_OHE = to_categorical(y_train_LE)
y_val_OHE = to_categorical(y_val_LE)
y_test_OHE = to_categorical(y_test_LE)

In [None]:
y_train_OHE

### 1.2.4 Train, Val, Test -> Numpy Array Save

In [None]:
save_dir = 'Data_224_224'

if not os.path.exists(save_dir):
    os.makedirs(save_dir)
    
    np.save(save_dir + '/X_train.npy', X_train)
    np.save(save_dir + '/y_train.npy', y_train_OHE)
    np.save(save_dir + '/X_val.npy', X_val)
    np.save(save_dir + '/y_val.npy', y_val_OHE)
    np.save(save_dir + '/X_test.npy', X_test)
    np.save(save_dir + '/y_test.npy', y_test_OHE)
    
else:
    print('Already done')