In [1]:
# Basic import
import os
import sys
import json
import shutil
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# Helper function
def writeProgress(msg, count, total):
    sys.stdout.write(msg + "{:.2%}\r".format(count/total))
    sys.stdout.flush()
    
def newPath(path):
    if not os.path.isdir(path):
        os.mkdir(path)

def read_json(src_path):
    with open(src_path, 'r') as json_file:
        data = json.load(json_file)
    return data

def write_json(data,dst_path):
    with open(dst_path, 'w') as outfile:
        json.dump(data, outfile)

In [3]:
ROOT = './2018SplitTrailers/'
len(os.listdir(ROOT))

1405

In [4]:
import cv2
import imutils
from sklearn.preprocessing import MinMaxScaler
from scipy.stats import gaussian_kde

In [5]:
def first_nonzero(arr, axis, invalid_val=-1, rev = False):
    if rev:
        mask = np.flip(arr)!=0
    else:
        mask = arr!=0
    
    return np.where(mask.any(axis=axis), mask.argmax(axis=axis), invalid_val)

def nonzero(arr): # horizontal axis=0 # vertical axis=1
    upper = first_nonzero(arr, axis = 0, invalid_val=0)
    lower = first_nonzero(arr, axis = 0, rev = True, invalid_val=0)
#     print(not np.array_equal(upper, lower))
#     print(upper, lower)
    return np.append(upper, lower)

def count_mode(arr):
    #bincount（）：統計非負整數的個數，不能統計浮點數
    counts = np.bincount(arr)
    #返回眾數
    return np.argmax(counts)

In [6]:
crop_dict = {}
count = 0
for movie in os.listdir(ROOT):
    print(count, 'Running:', movie)
    
    # init
    li = []
    scaler = MinMaxScaler(feature_range=(0, 255), copy=True)
    
    # collect all middle frame in each scene
    for scene in os.listdir(ROOT + movie)[:20]:
        framePath = ROOT + movie + '/' + scene + '/frames/'
#         print(framePath, os.listdir(framePath))
        frameList = [int(name.split('.')[0]) for name in os.listdir(framePath)]
        frameList.sort()
#         print(frameList)
        keyframe = framePath + '/' + str(frameList[len(frameList)//2]) + '.jpg'
#         print(keyframe)
        
        # 讀取圖檔
        img = cv2.imread(keyframe)

        # 轉換至 HSV 色彩空間
        hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)

        # 取出明度
        value = hsv[:,:,2]
        li.append(value)
    
    arr = np.asarray(li)
#     print(len(li), arr.shape)
    # sum up
    arr_sum = np.sum(arr, axis=0)
#     print(arr_sum.shape, 'Max:',np.max(arr_sum), 'Min:',np.min(arr_sum))
    
    # Mapping between 0 and 255
    scaler.fit(arr_sum)
    arr_norm = scaler.transform(arr_sum)
#     print(np.max(arr_norm), np.min(arr_norm))
    
    h = arr_norm.shape[0]
    w = arr_norm.shape[1]
    
    _, threshold = cv2.threshold(arr_norm, 50, 255, cv2.THRESH_BINARY)
#     print(np.unique(threshold)) # only 0 and 255
    
    nonz = nonzero(arr_norm)
    nonz_thresh = nonzero(threshold)
    
    mode = count_mode(nonz)
    mode_thresh = count_mode(nonz_thresh)
    
    print('Crop range:',mode, h - mode)
    crop_img = img[mode: h - mode, :] # horizontal
#     crop_img = img[:, mode: w - mode] # vertical
    
    print('Crop range:',mode_thresh, h - mode_thresh)
    crop_img_thresh = img[mode_thresh: h - mode_thresh, :] # horizontal
#     crop_img = img[:, mode_thresh: w - mode_thresh] # vertical
    
#     plt.figure(figsize=(18,6))
#     plt.subplot(131).set_title("Cropped"), plt.imshow(crop_img)
#     plt.subplot(132).set_title("Cropped_thresh"), plt.imshow(crop_img_thresh)
#     plt.subplot(133).set_title("Original"), plt.imshow(arr_norm)
#     plt.show()
    
    # save crop dict
    crop_dict[movie] = mode_thresh
    count += 1
    print('====================================================================================================')

0 Running: tt8718192
True
False
Crop range: 0 720
Crop range: 14 706
1 Running: tt7640194
True
True
Crop range: 0 720
Crop range: 0 720
2 Running: tt8478602
True
True
Crop range: 0 720
Crop range: 0 720
3 Running: tt8580242
True
True
Crop range: 0 720
Crop range: 0 720
4 Running: tt8887766
True
True
Crop range: 80 640
Crop range: 92 628
5 Running: tt8320292
True
True
Crop range: 0 720
Crop range: 0 720
6 Running: tt5433276
True
True
Crop range: 0 360
Crop range: 0 360
7 Running: tt9776082
True
True
Crop range: 0 360
Crop range: 0 360
8 Running: tt9464032
True
False
Crop range: 0 720
Crop range: 13 707
9 Running: tt1571234
True
True
Crop range: 80 640
Crop range: 92 628
10 Running: tt9616622
True
True
Crop range: 0 720
Crop range: 0 720
11 Running: tt6966692
True
False
Crop range: 28 692
Crop range: 40 680
12 Running: tt3369286
True
True
Crop range: 0 720
Crop range: 0 720
13 Running: tt9818154
True
True
Crop range: 0 270
Crop range: 0 270
14 Running: tt8974572
True
True
Crop range: 0 7

### Some dict problem

In [7]:
len(crop_dict)

1405

In [8]:
type(crop_dict)

dict

In [9]:
crop_dict

{'tt8718192': 14,
 'tt7640194': 0,
 'tt8478602': 0,
 'tt8580242': 0,
 'tt8887766': 92,
 'tt8320292': 0,
 'tt5433276': 0,
 'tt9776082': 0,
 'tt9464032': 13,
 'tt1571234': 92,
 'tt9616622': 0,
 'tt6966692': 40,
 'tt3369286': 0,
 'tt9818154': 0,
 'tt8974572': 0,
 'tt7334528': 14,
 'tt9013842': 0,
 'tt7473390': 0,
 'tt4418730': 26,
 'tt7533486': 93,
 'tt6334884': 92,
 'tt8149240': 14,
 'tt4860490': 88,
 'tt9252028': 0,
 'tt6142146': 0,
 'tt9378944': 93,
 'tt6200656': 87,
 'tt8129892': 0,
 'tt7419412': 93,
 'tt9183176': 0,
 'tt4865022': 40,
 'tt8377564': 93,
 'tt6250554': 86,
 'tt8929946': 86,
 'tt4883336': 76,
 'tt9178978': 0,
 'tt9125468': 0,
 'tt8997090': 0,
 'tt7999950': 0,
 'tt9085540': 46,
 'tt7806572': 0,
 'tt5433398': 0,
 'tt2777020': 0,
 'tt9646364': 0,
 'tt5822154': 92,
 'tt8956976': 0,
 'tt6229218': 0,
 'tt5884348': 92,
 'tt8880064': 93,
 'tt9094782': 92,
 'tt8901940': 0,
 'tt7699348': 90,
 'tt7836694': 87,
 'tt9828698': 0,
 'tt10005184': 0,
 'tt8947034': 0,
 'tt8016572': 86,
 't

In [11]:
write_json(str(crop_dict), './json/crop_dict4Trailers_2018.json')

In [13]:
# reload crop_dict test
dic = read_json('./json/crop_dict4Trailers_2018.json')
print(type(dic))
dic = eval(dic)
print(type(dic))
print(dic)

<class 'str'>
<class 'dict'>
{'tt8718192': 14, 'tt7640194': 0, 'tt8478602': 0, 'tt8580242': 0, 'tt8887766': 92, 'tt8320292': 0, 'tt5433276': 0, 'tt9776082': 0, 'tt9464032': 13, 'tt1571234': 92, 'tt9616622': 0, 'tt6966692': 40, 'tt3369286': 0, 'tt9818154': 0, 'tt8974572': 0, 'tt7334528': 14, 'tt9013842': 0, 'tt7473390': 0, 'tt4418730': 26, 'tt7533486': 93, 'tt6334884': 92, 'tt8149240': 14, 'tt4860490': 88, 'tt9252028': 0, 'tt6142146': 0, 'tt9378944': 93, 'tt6200656': 87, 'tt8129892': 0, 'tt7419412': 93, 'tt9183176': 0, 'tt4865022': 40, 'tt8377564': 93, 'tt6250554': 86, 'tt8929946': 86, 'tt4883336': 76, 'tt9178978': 0, 'tt9125468': 0, 'tt8997090': 0, 'tt7999950': 0, 'tt9085540': 46, 'tt7806572': 0, 'tt5433398': 0, 'tt2777020': 0, 'tt9646364': 0, 'tt5822154': 92, 'tt8956976': 0, 'tt6229218': 0, 'tt5884348': 92, 'tt8880064': 93, 'tt9094782': 92, 'tt8901940': 0, 'tt7699348': 90, 'tt7836694': 87, 'tt9828698': 0, 'tt10005184': 0, 'tt8947034': 0, 'tt8016572': 86, 'tt4177018': 93, 'tt9291892': 

In [None]:
# # fail movies
# failed = ['childsplaymovie', 'httydragon', 'apollo11movie', 'hotelmumbaifilm']
# # vertical border
# stripe = ['highlifemovie']

In [None]:
# especially for vertical
write_json({"highlifemovie": 28}, './json/crop_dict4Trailers_vertical.json')