In [1]:
# import shutil
# import subprocess
# import matplotlib
# from matplotlib.pyplot import figure
# from datetime import datetime
# from PIL import Image
# import random
# from tqdm import tqdm
# import matplotlib.pyplot as plt
# import telegram

import os
import time as tm
import cv2
import pickle
import face_recognition
import numpy as np
# import pandas as pd
from sklearn.cluster import DBSCAN
from sklearn.neighbors import KNeighborsClassifier
from collections import Counter

## utils.py

In [2]:
def loadPickle(filename):
    infile = open(filename,'rb')
    return pickle.load(infile, encoding='latin1')

def storePickle(filename, data):
    file = open(filename,'wb')
    pickle.dump(data, file)
    file.close() 

In [3]:
def sec2HMS(seconds):
    return tm.strftime('%H:%M:%S', tm.gmtime(seconds))

def HMS2sec(time_str):
    h, m, s = time_str.split(':')
    return int(h) * 3600 + int(m) * 60 + int(s)

## celebDetect.py

In [26]:
celebs, celeb_encodings = loadPickle('final_celeb_detection/final_pickles/anchors-with-TV-encodings.pickle')
celeb_encodings = np.array([np.array(x) for x in celeb_encodings])

In [269]:
# Populating KNN space with labelled encodings
X = []
Y = []
for i in range(len(celeb_encodings)): #prepare dataset
    for celeb_encoding in celeb_encodings[i]:
        X.append(celeb_encoding)
        Y.append(celebs[i])
        
neigh = KNeighborsClassifier(n_neighbors=30)
neigh.fit(X, Y)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=30, p=2,
           weights='uniform')

In [28]:
def encoding2name(f_encodings):
        return neigh.predict(f_encodings)

## main.py

In [7]:
vid_path = '../data/2006-2Hours.mp4'

In [6]:
#TODO - take input from the user - next cell as well
output_path = '../data/output_cuts'

if not os.path.exists(output_path):
    os.mkdir(output_path)

In [282]:
class Show:
    def __init__(self, hosts, start_time, end_time):
        self.hosts = hosts
        self.start_time = start_time
        self.end_time = end_time

In [4]:
def getEncodings(vid_path, skip_seconds=1):
    """ Returns 129D encodings for each face present in the video
        First 128 are the face encodings and the last value is the time.
        
       :param interval: (in seconds) frame interval to skip and look for faces
       :param model: 'hog' is less accurate but faster compared to 'cnn'
       :param store: if True, stores the faces in directory specified by dirName """
    vidcap = cv2.VideoCapture(vid_path)
    cv2.VideoCapture 
    fps = vidcap.get(cv2.CAP_PROP_FPS)
    interval = int(fps+1)*skip_seconds #no. of frames to skip
    print("FPS of the video: {}".format(fps))
    allEncodings = [] #Dict containing path, box and encoding
    n_frame = 0 # no. of frame being processed

    success, frame = vidcap.read()
    
    while success:
        rgb = cv2.cvtColor(frame,cv2.COLOR_BGR2RGB)
            
        bboxes = face_recognition.face_locations(rgb,model='hog')
        encodings = face_recognition.face_encodings(rgb,bboxes)
        for i,bbox in enumerate(bboxes): #for each found face in the frame
            top,right,bottom,left = bbox[0],bbox[1],bbox[2],bbox[3]
            face_img = frame[top:bottom, left:right]
            
            d = {'time': (n_frame/fps), 'loc': bbox, 'encoding':encodings[i]}
            allEncodings.append(d)
            
        n_frame += interval            
        vidcap.set(cv2.CAP_PROP_POS_FRAMES,n_frame)
        success, frame = vidcap.read()
    
    cv2.destroyAllWindows()
    vidcap.release()
    return allEncodings

In [5]:
def clusterFaces(allEncodings, n_jobs=-1, max_clusters = 100):
    """face_times: a Tuple list with x[0] as face_classes, and x[1] as list of their
    times of occurence in the video.
    
       face_encodings: dict mapping face_class to list of encodings of all occurences 
       of that face in the video"""
    encodings = [d['encoding'] for d in allEncodings]
    times = [d['time'] for d in allEncodings]
    clt = DBSCAN(metric='euclidean', n_jobs=n_jobs, min_samples=5,eps=0.37)
    clt.fit(encodings)
    labels = clt.labels_ #-1 => too small to include in a cluster
    
    face_times = [] #list of tuples, first element is time of occurence, 2nd is the class of the face
    face_encodings = {} #dict mapping from a face_classs to all encodings of that faces (from the images found in the video)
    
    for i,label in enumerate(labels):
        encoding = encodings[i]
        time = times[i]
        if(label!=-1):
            face_times.append((int(time), str(label)))
            
            if label in face_encodings:
                face_encodings[label].append(encoding)
            else:
                face_encodings[label] = [encoding]
    return face_times,face_encodings

In [9]:
def addEmptyFaces(faces, skip_seconds):
    """Modifies faces dict to include timestamps where no faces are present
       '-1' is the value assigned to these.
       :skip_gap: 'interval' parameter given in file2encoding() function (in seconds)"""
    min_time = (faces[0][0])
    max_time = (faces[-1][0])
    curr_time = min_time
    faces_empty = []
    counter = 0
    
    while (curr_time < max_time):
        if((faces[counter][0]) > curr_time): #No face found at this time
            faces_empty.append(((curr_time), '-1'))
        else:                              #Face was already marked at this time
            faces_empty.append(faces[counter])
            counter+=1
        curr_time += skip_seconds
    return faces_empty

In [10]:
def faceTrendsDuration(faces, interval = 900, overlapping = False, join_consecutive = False,n_top=10):
    """Trendy faces are the faces of an actor which occur the most in a given interval.
       Video is split into *interval*s and most occuring faces in them are noted.
       For each interval, *n_top* no. of most occuring faces are returned
       in a dict format."""
    #GOTO JUMPER if change interval
    # interval - SKIP_INTERVAL*interval time duration is taken as length of one trend_bucket
    trending_face = faces[0][1] #First face's class
    trendy_faces = {}
    
    if overlapping:
        skip=1
    else:
        skip=interval
    for x in range(0, len(faces), skip):
        face_count = {} #Keeps count of no. of instances of each face_class
        interval_string = sec2HMS(faces[x][0])
        for face in faces[x:min(len(faces),x+interval)]:
            curr_time = face[0]
            curr_face = face[1]
            if curr_face == '-1':
                continue
                
            if curr_face in face_count:
                face_count[curr_face] = (face_count[curr_face][0],curr_time)
            else:
                face_count[curr_face] = (curr_time, curr_time)
                
        if face_count: # if face_count is not empty
            max_face_in_interval = sorted(list(face_count.keys()), key =(lambda key: (face_count[key][1]) - (face_count[key][0])),reverse=True)[:n_top]
        else:
            max_face_in_interval = ['-1']
        if join_consecutive:
            if(max_face_in_interval!=trending_face):
                trending_face = max_face_in_interval
                trendy_faces[interval_string] = trending_face
        else:
            trending_face = max_face_in_interval
            trendy_faces[interval_string] = trending_face

#             if (face_count[curr_face]>face_count[trending_face]):
#                 trending_face = curr_face
#                 curr_time = face[0]
#                 trendy_faces[curr_time] = curr_face
    return trendy_faces

In [None]:
SKIP_SECONDS = 1 
allEncodings = getEncodings(vid_path, SKIP_SECONDS)
face_list, face_encodings = clusterFaces(allEncodings)

In [11]:
faces_empty = addEmptyFaces(face_list, SKIP_SECONDS)
trends = faceTrendsDuration(faces_empty)

In [12]:
face_dict = {} #dict having all occurences of each face
for x in face_list: 
    if x[1] in face_dict:
        face_dict[x[1]].append(x[0])
    else:
        face_dict[x[1]] = [x[0]]

In [13]:
#Getting consecutives
cons_dict = {}
for key,vals in trends.items():
    key = HMS2sec(key)
    for val in vals:
        if val in cons_dict:
            if (cons_dict[val][-1][-1]==prev_time):
                cons_dict[val][-1].append(key)
            else:
                cons_dict[val].append([key])
        else:
            cons_dict[val] = [[key]]
    prev_time = key

In [219]:
face_intervals = {} #Dict containing exact timestamps of all occurences of an actor's face
                    #in intervals specified by 'cons_dict'
    
for face,intervals in cons_dict.items():
    face_intervals[face] = []
    for times in intervals:
        lb = min(x for x in face_dict[face] if x >= times[0]) #lower bound
        ub = max(x for x in face_dict[face] if (x <= times[-1]+900)) #upper bound
        face_intervals[face].append([x for x in face_dict[face] if (x>=lb and x<=ub)])

In [242]:
#Converting the dict to a bunch of tuples of the form (face,each_interval)
shows = [(face,times) for face in face_intervals.keys() for times in face_intervals[face]]
shows = sorted(shows, key = lambda x: x[1][-1]) #Sorting face intervals by their order of ending time.
shows = [list(x) for x in shows]

In [243]:
#Removing too short
min_len = 0.5*60 #In seconds #CHANGE
shows = [x for x in shows if (x[1][-1] - x[1][0])>=min_len]
#Will still be sorted by ending time

In [244]:
# Removing intervals within intervals:
show_intervals = [x[1] for x in shows]
i = 0
for x in range(len(shows)):
    curr_interval = shows[i][1]
    for x in show_intervals:
        if(curr_interval[0]>x[0] and curr_interval[-1]<x[-1]):
            del(shows[i])
            i -= 1
            break
    i += 1   

In [245]:
# Combining consecutive shows with very high overlap
i = 0
overlap_threshold = 0.75
while(i<len(shows)-1):
    diff = shows[i][1][-1] - shows[i+1][1][0]
    #total = shows[i+1][1][-1] - shows[i][1][0]
    short_show = min(shows[i][1][-1]-shows[i][1][0],shows[i+1][1][-1]-shows[i+1][1][0])
    overlap = diff/short_show
    if(overlap > overlap_threshold):
        print('Hosts: {} & {}'.format(shows[i][0],shows[i+1][0]))
        print('Original durations: {} to {} and {} to {}'.format(sec2HMS(shows[i][1][0]),sec2HMS(shows[i][1][-1]),sec2HMS(shows[i+1][1][0]),sec2HMS(shows[i+1][1][-1])))
        print('Total duration: '+str(diff))
        print('Overlap: '+str(overlap))
        lb = min(shows[i][1][0],shows[i+1][1][0])
        ub = max(shows[i][1][-1],shows[i+1][1][-1])
        shows[i][0] = shows[i][0]+'&'+shows[i+1][0]
        shows[i][1].extend(shows[i+1][1])
        shows[i][1] = sorted(shows[i][1])
        print('Merging show {} from {} to {}'.format(shows[i+1][0],shows[i+1][1][0],shows[i+1][1][-1]))
        print()
        del(shows[i+1])
    else:
        i += 1

Hosts: 0 & 32
Original durations: 00:00:35 to 01:57:10 and 01:44:06 to 01:57:10
Total duration: 784
Overlap: 1.0
Merging show 32 from 6246 to 7030



In [246]:
#Removing intervals which are overlapping between two shows.
#Example: A - 01:00 to 10:00
#         B - 09:00 to 12:00
#         C - 10:00 to 20:00
DOUBLE_OVERLAP_THRESHOLD = 0.85
i=1
while (i<len(shows)-1):
    curr_show = len(shows[i][1]) #Length of current show
    diff1 = len([x for x in shows[i][1] if x in range(shows[i-1][1][0],shows[i-1][1][-1])]) #Left side overlapping
    overlap1 = diff1/curr_show
    diff2 = len([x for x in shows[i][1] if x in range(shows[i+1][1][0],shows[i+1][1][-1])]) #Right side overlapping
    overlap2 = diff2/curr_show
    net_overlap = overlap1 + overlap2

#actual algorithm
    if(net_overlap > DOUBLE_OVERLAP_THRESHOLD):
        print('Hosts: {} and {} and {}'.format(shows[i-1][0],shows[i][0],shows[i+1][0]))
        print('Original durations: {} to {} and {} to {} and {} to {}'.format(sec2HMS(shows[i-1][1][0]),sec2HMS(shows[i-1][1][-1]),sec2HMS(shows[i][1][0]),sec2HMS(shows[i][1][-1]),sec2HMS(shows[i+1][1][0]),sec2HMS(shows[i+1][1][-1])))
        #         print('Total duration: '+sec2HMS(diff))
        #         print('Overlap: '+str(overlap))
        print('Left overlap: {}'.format(overlap1))
        print('Right overlap: {}'.format(overlap2))
        print('Net overlap: {}'.format(net_overlap))
        print()
        del(shows[i])
    else:
        i+=1

In [247]:
# #july 4
# #The Kagan Problem solution - TODO - probably not feasible - make it so that it considers every case as TEASER case but not the Kagan problem case
# shows_refined = [[shows[-1][0],[shows[-1][1]]]]
# for curr in range(len(shows)-2,-1,-1):
#     nxt = curr+1
# #     if not (shows[curr][1]): #If current boundary was completely removed(in the previous iteration), we can skip it.
# #         continue
    
# #     if not(shows[nxt][1]) or not(shows[curr][1]):
# #         continue
        
#     nxt_lb   = shows[nxt][1][0]
#     curr_ub  = shows[curr][1][-1]
#     if(curr_ub>=nxt_lb): 
#         nxt_lb2  = max([x for x in shows[nxt][1] if x<=curr_ub])
#         curr_ub0 = min([x for x in shows[curr][1] if x>=nxt_lb])
#         #TODO: Double sided
#         shows_refined[-1][1] = [[x for x in shows[nxt][1] if x<=nxt_lb2],[x for x in shows[nxt][1] if x>nxt_lb2]]
#         shows_refined.append([shows[curr][0],[[x for x in shows[curr][1] if x<=curr_ub0],[x for x in shows[curr][1] if x>curr_ub0]]]) #Only taking values not overlapping with the next show
#     else:
#         shows_refined.append([shows[curr][0],[shows[curr][1]]])

In [248]:
shows_refined = [shows[0]]
prev_show = shows[0]
for show in shows[1:]:
    shows_refined.append([show[0], [x for x in show[1] if x>=prev_show[1][-1]] ])
    prev_show = show

In [289]:
shows = [Show(str(x[0]),x[1][0],x[1][-1]) for x in shows_refined]

### Shows are done, now finding the hosts' names of each show

In [290]:
#TODO:
# 1. Do the celeb recognition thing and replace hosts of every Show with that celeb
#  * If face is a good majority -> make it host name, leave the top 5 predictions in INF
#FIX KNN's N or the epsilon value
#Remove unconfident faces - https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsClassifier.html#sklearn.neighbors.KNeighborsClassifier.kneighbors https://medium.com/@mohtedibf/in-depth-parameter-tuning-for-knn-4c0de485baf6
# 2. Test on a few videos
# 3. Cut out videos from this information
# 4. Use .txt3 to make .txt for each of these cuts(3&4 in the same for loop)
# 5. clear TODOs and prints, make python scripts with input format
# 6. put on github and clone on the singularity, make video input format on singularity, check output formats with Steen and go back to 3.
# 7. documentation/ blogs from slides

In [291]:
for show in shows:
    hosts = show.hosts.split('&') #getting list of hosts of the show
    hosts = sorted(hosts, key = lambda x: len(face_encodings[int(x)]), reverse=True) #Most occuring anchor is taken as the main anchor
    for i in range(len(hosts)):
        host = hosts[i]
        host_encodings = face_encodings[int(host)]      #Getting all encodings of this host's face
        host_prob_names = Counter(list(encoding2name(host_encodings))) #Getting predictions of all faces
        hosts[i] = [(x,y/len(host_encodings)) for x,y in host_prob_names.most_common()] #sorting the predictions by their frequency
        
    show.hosts = hosts
#     show.hosts = sorted(hosts, key = lambda x: sum(y for _,y in x))
# (neigh.kneighbors(face_encodings[0]))[1]

In [252]:
# shows is a list of all 'Show's. Each Show's hosts attribute is a list of all predictions for each celeb.
# Each celeb has its own list having the predictions sorted according to frequency
# TODO: check if output shows are sorted 

In [32]:
#Extracting video metadata
vcap = cv2.VideoCapture(vid_path)
vid_width, vid_height = int(vcap.get(3)), int(vcap.get(4))
vcap.set(cv2.CAP_PROP_POS_AVI_RATIO, 1)
vid_duration = int(vcap.get(cv2.CAP_PROP_POS_MSEC)/1000)
cv2.destroyAllWindows()
vcap.release()

In [129]:
filename = os.path.splitext(os.path.basename(vid_path))[0]
attributes = filename.split('_')
pulldate, barcode = attributes[0], attributes[3]
vid_txt3_path = os.path.splitext(vid_path)[0]+'.txt3'
txt3_subtitles = None

#common headers for all cuts - default values (to be used if this column is not present in the txt3)
OVD = 'OVD|'+filename+'.mp4'  
OID = 'OID|'
COL = 'COL|Communication Studies Archive, UCLA'
SRC = 'SRC|Rosenthal Collection, UCLA'
LAN = 'LAN|ENG'
LBT = 'LBT|'

if os.path.exists(vid_txt3_path):
    txt3_lines = open(vid_txt3_path, 'r').read().splitlines()
    
    for i in range(len(txt3_lines)):
        if txt3_lines[i][3]!='|': #Header lines end
            txt3_headers = txt3_lines[:i]
            txt3_subtitles = txt3_lines[i:] #Subtitles' lines start here
            break
            
    for header in txt3_headers:
        if header[:3]=='TOP':
            OVD = 'OVD|'+header[4:]
        elif header[:3]=='UID':
            OID = 'OID|'+header[4:]
        elif header[:3]=='COL':
            COL = header
        elif header[:3]=='SRC':
            SRC = header
        elif header[:3]=='LAN':
            LAN = header

In [305]:
#Cutting shows from the main video + making a .txt file for each
for n_show, show in enumerate(shows):
    
    channel = 'unknown-channel' #until the work with IMDb is done
    channel = channel.replace(' ', '_')
    
    main_host = show.hosts[0][0]
    if main_host[1] > 0.45: #If majority predictions are of the same person
        host_name = main_host[0]
    else:
        host_name = 'unknown-host'
#     host_name = show.hosts[0][0][0]
    host_name = host_name.replace(' ', '_')
    cut_filename = '_'.join((pulldate, barcode, '-'.join((str(n_show+1), str(len(shows)))), channel, host_name))
    cut_path = os.path.join(output_path, cut_filename)
    cut_starttime = (int(max(0, show.start_time - 60))) #using a buffer of 1 minute
                            
    if n_show==len(shows)-1: #last show                            
        cut_endtime = (int(min(show.end_time + 60, vid_duration)))
    else:
        cut_endtime = (int(shows[n_show+1].start_time)) #till the start of next show
    
    cut_duration = sec2HMS(cut_endtime - cut_starttime)
    cut_starttime = sec2HMS(cut_starttime)
    
     ffmpeg_command = 'ffmpeg -ss {} -t {} -i {} -vcodec copy -acodec copy {}.mp4'.format(cut_starttime, cut_duration, vid_path, cut_path)
     os.system(ffmpeg_command)
    
    TOP = 'TOP|'+cut_filename+'.mp4'
    UID = 'UID|' #TODO: Generate UUID in the cluster
    TTL = 'TTL|'
    PID = 'PID|'
    CMT = 'CMT|'
    INF = 'INF|'
    for i, host in enumerate(show.hosts):
        INF += 'probable_host'+str(i+1)+':'+'_'.join([pred[0].replace(' ','-') for pred in host][:5])+'_'
    INF = INF[:-1]        
    DUR = 'DUR|'+cut_duration
    TMS = 'TMS|'+cut_starttime+'-'+sec2HMS(cut_endtime)
    VID = 'VID|{}x{}'.format(vid_width, vid_height)
    
    #initializing with headers
    cut_txt_lines = [TOP, COL, UID, SRC, TTL, PID, CMT, DUR, VID, LAN, LBT, OVD, OID, TMS, INF] 
    
    sub_starttime = pulldate.replace('-','') + cut_starttime.replace(':','')
    sub_endtime = pulldate.replace('-','') + sec2HMS(cut_endtime).replace(':','')
    
    
    if txt3_subtitles:
        for sub_idx in range(len(txt3_subtitles)): #TODO: maybe make the starting time 0 for each?
            curr_sub = txt3_subtitles[sub_idx]
            
            if curr_sub[:14] >= sub_starttime:
                if curr_sub[:14] <= sub_endtime:
                    cut_txt_lines.append(txt3_subtitles[sub_idx])
                else:
                    break

    with open(cut_path+'.txt', 'w') as f:
        for line in cut_txt_lines:
            f.write("%s\n" % line)

In [None]:
!cat ../data/output_cuts/2006-01-02_00001057_1-1_unknown-channel_Daryn_Kagan.txt