In [1]:
import random
import time
import numpy as np
import matplotlib.pyplot as plt
import cv2
import glob
from Levenshtein import distance
import easyocr
import pickle

reader = easyocr.Reader(['en'])

CUDA not available - defaulting to CPU. Note: This module is much faster with a GPU.


In [2]:
FILE_NAME = 'inpoopment.mp4'

In [3]:
def analyze_video(video, start_frame=0, end_frame=None, sampling_rate=1):
    w, h, fps, num_frames = get_detail(video)
    print(w, h, fps, num_frames)
    start_frame = start_frame
    end_frame = end_frame if end_frame else num_frames
    step = int(round(sampling_rate * fps))
    start = time.time()
    cap = cv2.VideoCapture(video)
    i = start_frame
    output = []
    all_res = []
    while cap.isOpened() and i < end_frame:
        cap.set(cv2.CAP_PROP_POS_FRAMES, i)
#         print(i)
        ret, frame = cap.read()
#         print(frame)
#         try:
        temp_res = run_inference(frame)
#         except:
#             return frame
        all_res.append(temp_res)
        i += step

    cap.release()
    end = time.time()

    return {
        'start_frame' : start_frame,
        'end_frame' : end_frame,
        'result_feature': all_res,
        'step':step,
        'fps':fps
    }

def get_detail(vid_file):
    cap = cv2.VideoCapture(vid_file)

    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    width, height = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    cap.release()

    return width, height, fps, total_frames

def run_inference(frame):
    all_text_here = reader.readtext(frame)
    
    return {
        'text':all_text_here
    }

In [4]:
all_data = glob.glob(FILE_NAME)

In [5]:
result = analyze_video(all_data[0],
                       #start_frame=10000,
                       #end_frame=15000, 
                       sampling_rate = 10)

1280 720 25.0 646


In [6]:
all_meta = {all_data[0]:result}

In [7]:
all_meta.keys()

dict_keys(['inpoopment.mp4'])

In [8]:
all_meta[FILE_NAME].keys()

dict_keys(['start_frame', 'end_frame', 'result_feature', 'step', 'fps'])

In [9]:
all_meta[FILE_NAME]

{'start_frame': 0,
 'end_frame': 646,
 'result_feature': [{'text': []},
  {'text': [([[46, 259], [1243, 259], [1243, 516], [46, 516]],
     'NOGIRLS',
     0.9705299773171104)]},
  {'text': [([[43, 298], [1256, 298], [1256, 541], [43, 541]],
     'InpoopmeGts',
     0.7111144294015315)]}],
 'step': 250,
 'fps': 25.0}

In [10]:
all_meta[FILE_NAME]['result_feature'][1]

{'text': [([[46, 259], [1243, 259], [1243, 516], [46, 516]],
   'NOGIRLS',
   0.9705299773171104)]}

In [11]:
distance('halo data','hala kcdata')

3

In [12]:
def distance_text(a,b):
    if max(len(a),len(b))==0:
        return 1
    return distance(a,b)/max(len(a),len(b))
def reshape_text(data1, data2, idx):
    for word in data2:
        used_word = word[1].lower()
        found = False
        for key in data1.keys():
            dist = distance_text(key,used_word)
            if dist<0.1:
                
                if data1[key][-1]['end']+1 == idx:
                    data1[key][-1]['end']+=1
                else:
                    data1[key].append({'start':idx, 'end':idx})
                found = True
                break
        if not found:
#             print('word : ',word[1].lower())
#             print(data1)
            data1[used_word] = [{'start':idx, 'end':idx}]
    return data1
    
def aggregate_text(all_text):
    aggregated_text = {}
    for idx,txt in enumerate(all_text):
        aggregated_text = reshape_text(aggregated_text,txt,idx)
    return aggregated_text

In [13]:
for key in all_meta.keys():
    print(key)
    res_text = aggregate_text([i['text'] for i in all_meta[key]['result_feature']])
    all_meta[key]['agg_text'] = res_text

inpoopment.mp4


In [14]:
all_meta[FILE_NAME]

{'start_frame': 0,
 'end_frame': 646,
 'result_feature': [{'text': []},
  {'text': [([[46, 259], [1243, 259], [1243, 516], [46, 516]],
     'NOGIRLS',
     0.9705299773171104)]},
  {'text': [([[43, 298], [1256, 298], [1256, 541], [43, 541]],
     'InpoopmeGts',
     0.7111144294015315)]}],
 'step': 250,
 'fps': 25.0,
 'agg_text': {'nogirls': [{'start': 1, 'end': 1}],
  'inpoopmegts': [{'start': 2, 'end': 2}]}}

In [15]:
all_meta[FILE_NAME]['agg_text']

{'nogirls': [{'start': 1, 'end': 1}], 'inpoopmegts': [{'start': 2, 'end': 2}]}

In [16]:
all_meta[FILE_NAME]['agg_text']['nogirls']

[{'start': 1, 'end': 1}]

In [17]:
pickle.dump(all_meta,open('features','wb'))

In [18]:
loaded_meta = pickle.load(open('features', 'rb'))

In [19]:
loaded_meta

{'inpoopment.mp4': {'start_frame': 0,
  'end_frame': 646,
  'result_feature': [{'text': []},
   {'text': [([[46, 259], [1243, 259], [1243, 516], [46, 516]],
      'NOGIRLS',
      0.9705299773171104)]},
   {'text': [([[43, 298], [1256, 298], [1256, 541], [43, 541]],
      'InpoopmeGts',
      0.7111144294015315)]}],
  'step': 250,
  'fps': 25.0,
  'agg_text': {'nogirls': [{'start': 1, 'end': 1}],
   'inpoopmegts': [{'start': 2, 'end': 2}]}}}

In [20]:
res_text

{'nogirls': [{'start': 1, 'end': 1}], 'inpoopmegts': [{'start': 2, 'end': 2}]}

In [21]:
all_meta

{'inpoopment.mp4': {'start_frame': 0,
  'end_frame': 646,
  'result_feature': [{'text': []},
   {'text': [([[46, 259], [1243, 259], [1243, 516], [46, 516]],
      'NOGIRLS',
      0.9705299773171104)]},
   {'text': [([[43, 298], [1256, 298], [1256, 541], [43, 541]],
      'InpoopmeGts',
      0.7111144294015315)]}],
  'step': 250,
  'fps': 25.0,
  'agg_text': {'nogirls': [{'start': 1, 'end': 1}],
   'inpoopmegts': [{'start': 2, 'end': 2}]}}}

In [23]:
for key in result.keys():
    print(key)
    res_text = aggregate_text([i['text'] for i in result[key]['result_feature']])

start_frame


TypeError: 'int' object is not subscriptable