# Data Exploration

## Default imports

In [6]:
import os
import sys
import cv2
import numpy as np
import pandas as pd
import sklearn

## Video Image Split

In [4]:
!ls ./example_dataset/images

VIRAT_S_000002_fr_0.2


In [18]:
z = os.listdir('./example_dataset')

In [33]:
def filter_file_extension(filename):
    
    file_extension = 'mp4'
    
    if file_extension in filename:
        return True
    else:
        return False

def get_videos_from_folder(path):
    
    all_items = os.listdir(path)
    only_videos = [video_name for video_name in filter(filter_file_extension, all_items)]
    video_filenames = [video_name.split('.')[0] for video_name in only_videos]
    videos_file_paths = [os.path.join(path, video_fp) for video_fp in only_videos]
    
    return videos_file_paths, video_filenames

In [34]:
video_file_paths, video_filenames = get_videos_from_folder('./example_dataset')

In [55]:
def get_frame(sec, video_object):
    '''
        Based on the given sec, extract the resulting image from the video_object.
    '''
    video_object.set(cv2.CAP_PROP_POS_MSEC, sec*1000)
    success, image = video_object.read()
    return success, image
    

def write_images_from_video(video_path, video_filename, frame_rate):
    '''
        Splits the video by the frame_rate, shown in the video_path variable into different frames,
        then writes the resulting images in a folder with the  video's filename.
        Creates the folder if it exits.
    
    '''
    video_object = cv2.VideoCapture(video_path)
    sec = 0
    count = 0
    frame_rate = 1 / frame_rate # looking at the actual time from second to the next
    success = 1
    
    while success:
        
        success, image = get_frame(sec, video_object)
        
        # create the folder if not exists
        video_file_name = f'{video_filename}_fr_{frame_rate}'
        folder_path = f'./example_dataset/images/{video_file_name}'
        if not os.path.exists(folder_path):
            os.makedirs(folder_path)
        
        # write the image
        cv2.imwrite(os.path.join(folder_path, f'{video_filename}_frame_{np.round(count,2)}.png'), image)
        
        sec = sec + frame_rate
        count += 1
    
    print(f'File {video_filename} processed.')

In [56]:
write_images_from_video(video_file_paths[0], video_filenames[0], 5)

File VIRAT_S_000002 processed.


/home/fury/Code/projects/dsml-projects/4_End-to-End_Project/People_Counting_From_Video_Feed/PoC


## People Counting with Object Recognition Methods

[Reference](https://machinelearningmastery.com/object-recognition-with-deep-learning/?fbclid=IwAR3QHzwV3iteB2tE7EJo3GkNTT_v7loLqHtCqYuH5nopySIkzvTmHiUa-H0)

### 1. Faster R-CNN
* [Reference of Faster R-CNN](https://arxiv.org/pdf/1506.01497.pdf)  
* [Reference of implementation](https://towardsdatascience.com/faster-r-cnn-object-detection-implemented-by-keras-for-custom-data-from-googles-open-images-125f62b9141a)
* [Reference Jupyter Notebook for the implementation](https://github.com/RockyXu66/Faster_RCNN_for_Open_Images_Dataset_Keras/blob/master/frcnn_train_vgg.ipynb)

#### 1.1 Getting The Data:


In [None]:
!wget https://datasets.figure-eight.com/figure_eight_datasets/open-images/train-annotations-bbox.csv

--2019-11-26 21:53:32--  https://datasets.figure-eight.com/figure_eight_datasets/open-images/train-annotations-bbox.csv
Resolving datasets.figure-eight.com (datasets.figure-eight.com)... 52.200.149.96, 3.227.119.162, 3.227.227.96
Connecting to datasets.figure-eight.com (datasets.figure-eight.com)|52.200.149.96|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1194033454 (1,1G) [text/csv]
Saving to: ‘train-annotations-bbox.csv’

tions-bbox.csv       27%[====>               ] 311,04M   537KB/s    eta 51m 27s

In [None]:
!wget https://datasets.figure-eight.com/figure_eight_datasets/open-images/train-images-boxable.csv