<a href="https://colab.research.google.com/github/doubleblindreview2/jbr_video_mining/blob/master/video_mining.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **1. Download All Models**
You need a Google Account to verify your legitimate use 


In [0]:
### Import required libraries
import os
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

In [0]:
### Verify you are a legitimate user
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

In [3]:
### Download models from public Drive
if not os.path.exists('downloads'): os.makedirs('downloads')
os.chdir('./downloads/')

folder_id = '1e-UQc-ylzVOOvW2ZiOCpnP-EEziHA4cQ'
file_list = drive.ListFile({'q': "'{}' in parents and trashed=false".format(folder_id)}).GetList()
for i, file in enumerate(sorted(file_list, key = lambda x: x['title']), start=1):
    print('Downloading {} from Public GDrive ({}/{})'.format(file['title'], i, len(file_list)))
    file.GetContentFile(file['title'])

os.chdir('/content/')

Downloading PySceneDetect-master.zip from Public GDrive (1/4)
Downloading rgb2colors.csv from Public GDrive (2/4)
Downloading video_mining_dependencies.py from Public GDrive (3/4)
Downloading video_mining_functions.py from Public GDrive (4/4)


# **2. Mount Your Drive and Provide Input**
You need to provide a Google Drive Account, which includes:
- Video files for video mining
- Folder for video mining results _(can be empty)_ 
- Folder for log file _(can be  empty)_


In [4]:
### Mount Your Google Drive
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
### Provide data input for analyis - change required information here
in_folder  = '/content/drive/My Drive/trailer/vids/'                   # folder with all videos, names will be used as IDs
out_folder = '/content/drive/My Drive/trailer/preds/'                  # folder to store extracted features
log_name   = '2020-05-05_logfile.csv'                                      # name of lofile, including ending .csv
log_folder = '/content/drive/My Drive/trailer/logs/'                   # folder for logfile
yolo_folder= 'D:/JBR_video_mining/yolov3/'                      # TO BE REPLACED directory for yolov3 folder    

### Select features to be extracted
extract_length           = True                                # get length of video
extract_cuts             = True                                # get scene cuts
extract_colors           = False                                # get brightness and  color information
extract_faces            = False                                # get faces
extract_emotions         = False                                # get 8 different emotions per face 
extract_objects          = False                                # get 80 objects

# **3. Install Requirements**
Run All Cells - No additional input required

In [0]:
# ### Load additionally required modules 
from IPython.utils import io
from downloads.video_mining_dependencies import*

with io.capture_output() as captured:
  if extract_cuts: install_dependencies_cuts()

# **4. Run Video Mining**
Run All Cells - No additional input required

In [0]:
import time
import os
import pandas as pd
import numpy as np
from tqdm import tqdm
from downloads.video_mining_functions import*

In [0]:
### Create logfile
# w2c = np.array(open(RGB_to_Col, "rt").read().splitlines()) # get rgb to colors file
logfile = pd.DataFrame(columns=['time','vid_id','log.length','log.cuts','log.colors','log.faces','log.emotions','log.objects']) # create logfile for video analysis
logfile.to_csv(log_folder+log_name,index_label=False)

In [12]:
### Loop through folder with videos to extract selected features
for i in tqdm(os.listdir(in_folder)[:1]): 

  ### Provide name, input- and output_path
  vid_id = i.split('.')[0]
  input_path = in_folder+i
  output_path = out_folder +vid_id +'/'
  if not os.path.exists(output_path): os.makedirs(output_path)
  
  ### Extract selected features
  if extract_length: log_length = vid_length(input_path,output_path,vid_id)      # get length of video
  else: log_length=False

  if extract_cuts: log_cuts = get_cuts(input_path,output_path,vid_id)      # get length of video
  else:log_cuts=False

  if extract_colors: log_colors = color_loop(input_path,output_path,vid_id)      # get length of video
  else:log_colors=False

  if extract_faces: log_faces = video_loop_faces(input_path,output_path,vid_id)      # get length of video
  else: log_faces=False

  if extract_emotions: log_emotions = video_loop_faces(input_path,output_path,vid_id)      # get length of video
  else: log_emotions=False

  if extract_objects: log_coco = coco_loop(input_path,output_path,vid_id)      # get length of video
  else: log_coco=False
        
  ### write logfile
  row = '"'+time.strftime("%d %b %Y %H:%M:%S", time.localtime())+'","'+str(vid_id)+'","'+str(log_length)+'","'+str(log_cuts)+'","'+str(log_colors)+'","'+str(log_faces)+'","'+str(log_emotions)+'","'+str(log_coco)+'"'+'\n'
  write_csv(row,log_folder,log_name,mode = 'a+')


  0%|          | 0/1 [00:00<?, ?it/s]

scenedetect --input "/content/drive/My Drive/trailer/vids/pzysZI-1LN8_tt1724597.mp4" --output "/content/drive/My Drive/trailer/preds/pzysZI-1LN8_tt1724597/" detect-content list-scenes -f pzysZI-1LN8_tt1724597_FrameLevel_Scenes.csv -q 


100%|██████████| 1/1 [00:21<00:00, 21.20s/it]


In [13]:
watch_log = pd.read_csv('./drive/My Drive/trailer/logs/2020-05-05_logfile.csv')
watch_log.head()

Unnamed: 0,time,vid_id,log.length,log.cuts,log.colors,log.faces,log.emotions,log.objects
0,05 May 2020 16:10:02,pzysZI-1LN8_tt1724597,False,True,False,False,False,False
