In [1]:
# Basic import
import os
import sys
import json
import shutil
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import cv2

In [2]:
# Helper function
def writeProgress(msg, count, total):
    sys.stdout.write(msg + "{:.2%}\r".format(count/total))
    sys.stdout.flush()
    
def newPath(path):
    if not os.path.isdir(path):
        os.mkdir(path)

def read_json(src_path):
    with open(src_path, 'r') as json_file:
        data = json.load(json_file)
    return data

def write_json(data,dst_path):
    with open(dst_path, 'w') as outfile:
        json.dump(data, outfile)

## 2019 movies

In [3]:
PATH = './SplitTrailers_crop/'
order = os.listdir(PATH)
order.sort()
len(order)

97

## 2018 movies

In [None]:
df_onehot = pd.read_csv('./csv/filename2genreMat_2018.csv')
df_onehot

In [None]:
trailers = [filename.split('.')[0] for filename in df_onehot['filename'].tolist()]
trailers[:10]

# A_AverageShotLength

In [61]:
A_li = []
for movie in order:
    frameCount = []
    print('Running:', movie)
    
    for scene in os.listdir(PATH + movie):
        framePath = PATH + movie + '/' +scene + '/frames/'
        frameCount.append(len(os.listdir(framePath)))
    
#     print(frameCount)
    avg = np.mean(frameCount)
    std = np.std(frameCount)
    print(avg, std)
    
    A_li.append([avg, std])

Running: 47metersdown
34.80487804878049 23.842183882819985
Running: adogsjourneymovie
34.63247863247863 29.109753495301806
Running: aftermathmovie
35.69230769230769 26.611910622543594
Running: aftermovie
58.276595744680854 54.90405640467416
Running: alitamovie
35.78431372549019 29.192272497163277
Running: angelhasfallen
36.42156862745098 27.8171326366979
Running: angrybirdsmovie
34.65 30.910529489695467
Running: annabellemovie
52.36363636363637 45.78825983035401
Running: annamovie
37.91011235955056 33.61267833617541
Running: apollo11movie
65.6829268292683 58.86505403510322
Running: arcticmovie
72.625 94.80177863385717
Running: artofracingmovie
32.21212121212121 25.5755062460723
Running: avengers
63.70909090909091 71.98678482945773
Running: beachbummovie
33.64485981308411 20.565487983716434
Running: bernadettefilm
35.05555555555556 24.79838455511185
Running: blindedbythelightmovie
34.0990099009901 15.943902245739093
Running: booksmart
65.2 92.35521100620149
Running: breakthroughmovie
42

In [62]:
A_arr = np.asarray(A_li)
np.save('./npy/A_arr.npy', A_arr)
len(A_arr)

97

# B_ColorVariance
* https://docs.opencv.org/3.4/de/d25/imgproc_color_conversions.html#color_convert_rgb_luv
* https://docs.scipy.org/doc/numpy/reference/generated/numpy.linalg.det.html

In [63]:
B_li = []
for movie in order:
    rho = []
    print('Running:', movie)
    
    for scene in os.listdir(PATH + movie):
        framePath = PATH + movie + '/' + scene + '/frames/'
        frameList = [int(name.split('.')[0]) for name in os.listdir(framePath) if not name.startswith('.')]
        frameList.sort()
        keyframe = framePath + '/' + str(frameList[len(frameList)//2]) + '.jpg'
        img = cv2.imread(keyframe)
        luv = cv2.cvtColor(img, cv2.COLOR_BGR2Luv)
        l = luv[:,:,0].ravel()
        u = luv[:,:,1].ravel()
        v = luv[:,:,2].ravel()
        x = np.asarray((l, u, v))
        cov = np.cov(x)
        det = np.linalg.det(cov)
        rho.append(det)
    
    avg = np.mean(rho)
    std = np.std(rho)
    print(avg, std)
    
    B_li.append([avg, std])

Running: 47metersdown
11914482.596505234 30591489.6252802
Running: adogsjourneymovie
2695759.4157513804 4626122.574262091
Running: aftermathmovie
742848.7792286681 1455065.0246480522
Running: aftermovie
5679151.22885397 14193762.10662248
Running: alitamovie
2569163.918159883 6429859.991392611
Running: angelhasfallen
1209756.4450619791 4667335.491197369
Running: angrybirdsmovie
369915511.078566 751228830.3413738
Running: annabellemovie
4562518.128207735 33908580.36551343
Running: annamovie
10070550.647303257 30749152.77130637
Running: apollo11movie
6470591.37401755 16186620.130694835
Running: arcticmovie
419908.79301523365 1445080.8470512065
Running: artofracingmovie
10200353.302957105 23324681.093846396
Running: avengers
1692430.0381357437 7383778.495521684
Running: beachbummovie
10716507.199904121 13329822.277450798
Running: bernadettefilm
8233539.458952443 12388000.619370837
Running: blindedbythelightmovie
9413197.331716841 46368938.631773196
Running: booksmart
3016540.562397561 4491

In [64]:
B_arr = np.asarray(B_li)
np.save('./npy/B_arr.npy', B_arr)
len(B_arr)

97

# C_MotionContent
* https://blog.gtwang.org/programming/opencv-motion-detection-and-tracking-tutorial/

In [65]:
C_li = []
for movie in order: # for 2019
    moving_li = []
    print('Running:', movie)
    
    for scene in os.listdir(PATH + movie):
    
        video  = PATH + movie + '/' + scene + '/clip.avi'

        cap = cv2.VideoCapture(video)

        w = int(cap.get(3))
        h = int(cap.get(4))

        # 計算畫面面積
        area = w * h

        # 初始化平均影像
        ret, frame = cap.read()

        avg = cv2.blur(frame,(4, 4))
        avg_float = np.float32(avg)
#         plt.imshow(frame)
#         plt.show()

        while(cap.isOpened()):
            # 讀取一幅影格
            ret, frame = cap.read()

            # 若讀取至影片結尾，則跳出
            if ret == False:
                break

            # 模糊處理
            blur = cv2.blur(frame, (4, 4))

            # 計算目前影格與平均影像的差異值
            diff = cv2.absdiff(frame, avg)

            # 將圖片轉為灰階
            gray = cv2.cvtColor(diff, cv2.COLOR_BGR2GRAY)

            # 篩選出變動程度大於門檻值的區域
            ret, thresh = cv2.threshold(gray, 25, 255, cv2.THRESH_BINARY)

            # 使用型態轉換函數去除雜訊
            kernel = np.ones((5, 5), np.uint8)
            thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=2)
            thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=2)

            # 產生等高線
            cnts, hierarchy = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
            moving = 0
            for c in cnts:
                moving += cv2.contourArea(c)
                
            moving_li.append(moving/area)
            
#             for c in cnts:
#                 # 忽略太小的區域
#                 if cv2.contourArea(c) < 1000:
#                     continue


#             # 畫出等高線（除錯用）
#             cv2.drawContours(frame, cnts, -1, (0, 255, 255), 2)
#             plt.imshow(frame)
#             plt.show()

            # 更新平均影像
            cv2.accumulateWeighted(blur, avg_float, 0.01)
            avg = cv2.convertScaleAbs(avg_float)

        cap.release()
        cv2.destroyAllWindows()
    
    avg = np.mean(moving_li)
    std = np.std(moving_li)
    print(avg, std)
    
    C_li.append([avg, std])

Running: 47metersdown
0.2451873762435159 0.2496956241860898
Running: adogsjourneymovie
0.33529122549422025 0.3484996000428302
Running: aftermathmovie
0.20402419779112746 0.2597210213431093
Running: aftermovie
0.21242184133975378 0.23119890416151834
Running: alitamovie
0.26672879555742146 0.2690275402601083
Running: angelhasfallen
0.25070263657957015 0.28292585750320864
Running: angrybirdsmovie
0.34156295665012765 0.34148056657907155
Running: annabellemovie
0.09225343125204852 0.1320632262965794
Running: annamovie
0.32312398481337606 0.289832324645573
Running: apollo11movie
0.18732789344498338 0.2364072011887274
Running: arcticmovie
0.2592962563654242 0.31775367317583825
Running: artofracingmovie
0.32618515829154343 0.3450494323652046
Running: avengers
0.26165315823237045 0.26716557785874545
Running: beachbummovie
0.1903243230614347 0.19969096235431438
Running: bernadettefilm
0.19536306055002442 0.26847037362053605
Running: blindedbythelightmovie
0.2191467860304835 0.2509072521252532
Ru

In [66]:
C_arr = np.asarray(C_li)
np.save('./npy/C_arr.npy', C_arr)
len(C_arr)

97

# D_LightingKey

In [67]:
D_li = []
for movie in order: # for 2019
    lk = []
    print('Running:', movie)
    
    for scene in os.listdir(PATH + movie):
        framePath = PATH + movie + '/' + scene + '/frames/'
        frameList = [int(name.split('.')[0]) for name in os.listdir(framePath) if not name.startswith('.')]
        frameList.sort()
        keyframe = framePath + '/' + str(frameList[len(frameList)//2]) + '.jpg'
        img = cv2.imread(keyframe)
        hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
        value = hsv[:,:,2].ravel()
        mean = np.mean(value)
        std = np.std(value)
        lk.append(mean*std)

    avg = np.mean(lk)
    std = np.std(lk)
    print(avg, std)
    
    D_li.append([avg, std])

Running: 47metersdown
3839.2971593633524 3297.384678714118
Running: adogsjourneymovie
3266.0220913929693 2096.407257286327
Running: aftermathmovie
3211.875122987606 2616.73586569727
Running: aftermovie
3176.9102668982273 2106.8836930783136
Running: alitamovie
2843.7807615784172 2350.9973078498624
Running: angelhasfallen
3686.1155810346154 2869.0750183839014
Running: angrybirdsmovie
7726.730432469832 2614.3218269550307
Running: annabellemovie
1384.3650013951321 1816.8625764431472
Running: annamovie
4566.2559678362495 2762.0351083522387
Running: apollo11movie
4122.71844038454 2754.6565366347445
Running: arcticmovie
2704.2718219421777 2309.4024877605807
Running: artofracingmovie
3953.5001117867687 2092.6239713535906
Running: avengers
3102.36333319997 3243.2185308534768
Running: beachbummovie
4613.309763031953 2397.1340624527847
Running: bernadettefilm
4764.443258361573 1989.941345784166
Running: blindedbythelightmovie
5079.046293541132 2795.069152927562
Running: booksmart
3997.88621725594

In [68]:
D_arr = np.asarray(D_li)
np.save('./npy/D_arr.npy', D_arr)
len(D_arr)

97

# Results

In [69]:
# First time succeed!
results = pd.DataFrame({
    'Movie': order,
    'A_avg': A_arr[:,0],
    'A_std': A_arr[:,1],
    'B_avg': B_arr[:,0],
    'B_std': B_arr[:,1],
    'C_avg': C_arr[:,0],
    'C_std': C_arr[:,1],
    'D_avg' : D_arr[:,0],
    'D_std' : D_arr[:,1]
})
results

Unnamed: 0,Movie,A_avg,A_std,B_avg,B_std,C_avg,C_std,D_avg,D_std
0,47metersdown,34.804878,23.842184,1.191448e+07,3.059149e+07,0.245187,0.249696,3839.297159,3297.384679
1,adogsjourneymovie,34.632479,29.109753,2.695759e+06,4.626123e+06,0.335291,0.348500,3266.022091,2096.407257
2,aftermathmovie,35.692308,26.611911,7.428488e+05,1.455065e+06,0.204024,0.259721,3211.875123,2616.735866
3,aftermovie,58.276596,54.904056,5.679151e+06,1.419376e+07,0.212422,0.231199,3176.910267,2106.883693
4,alitamovie,35.784314,29.192272,2.569164e+06,6.429860e+06,0.266729,0.269028,2843.780762,2350.997308
...,...,...,...,...,...,...,...,...,...
92,usmovie,43.010204,52.851833,2.260180e+06,6.481499e+06,0.175488,0.220206,3454.562570,3247.741628
93,whatmenwant,32.086957,19.626320,2.003662e+07,5.276056e+07,0.241500,0.260970,6006.485656,2582.258884
94,wonderparkmovie,27.282443,13.193380,1.571109e+08,4.445462e+08,0.430486,0.331031,5563.594289,2771.415032
95,xmenmovies,41.549296,33.954729,4.791049e+06,1.487502e+07,0.319549,0.279627,2953.689532,2301.564008


In [18]:
# # reload computed values
# A_arr = np.load('./npy/A_arr.npy')
# B_arr = np.load('./npy/B_arr.npy')
# C_arr = np.load('./npy/C_arr.npy')
# D_arr = np.load('./npy/D_arr.npy')

Unnamed: 0,Movie,A_AverageShotLength,B_ColorVariance,C_MotionContent,D_LightingKey
0,47metersdown,34.804878,1.191448e+07,0.245187,3839.297159
1,adogsjourneymovie,34.632479,2.695759e+06,0.335291,3266.022091
2,aftermathmovie,35.692308,7.428488e+05,0.204024,3211.875123
3,aftermovie,58.276596,5.679151e+06,0.212422,3176.910267
4,alitamovie,35.784314,2.569164e+06,0.266729,2843.780762
...,...,...,...,...,...
92,usmovie,43.010204,2.260180e+06,0.175488,3454.562570
93,whatmenwant,32.086957,2.003662e+07,0.241500,6006.485656
94,wonderparkmovie,27.282443,1.571109e+08,0.430486,5563.594289
95,xmenmovies,41.549296,4.791049e+06,0.319549,2953.689532


## Normalize

In [70]:
from sklearn.preprocessing import MinMaxScaler

In [71]:
def normalizing(data):
    scaler = MinMaxScaler()
    scaler.fit(data)
    print('Data range:', scaler.data_range_)
    return scaler.transform(data)

In [72]:
cols = results.columns[1:].tolist()
cols

['A_avg', 'A_std', 'B_avg', 'B_std', 'C_avg', 'C_std', 'D_avg', 'D_std']

In [73]:
for col in cols:
    arr = np.asarray(results[col].tolist()).reshape(-1, 1)
    results[col+'_norm'] = normalizing(arr)

Data range: [109.1367054]
Data range: [168.9261387]
Data range: [3.69651296e+08]
Data range: [7.50688905e+08]
Data range: [0.33823224]
Data range: [0.24930152]
Data range: [6532.97324982]
Data range: [2542.11874874]


In [75]:
results.to_csv('./csv/computed_2019.csv', index = 0)
results

Unnamed: 0,Movie,A_avg,A_std,B_avg,B_std,C_avg,C_std,D_avg,D_std,A_avg_norm,A_std_norm,B_avg_norm,B_std_norm,C_avg_norm,C_std_norm,D_avg_norm,D_std_norm
0,47metersdown,34.804878,23.842184,1.191448e+07,3.059149e+07,0.245187,0.249696,3839.297159,3297.384679,0.071101,0.066649,0.031517,0.040032,0.452157,0.471848,0.379584,0.835915
1,adogsjourneymovie,34.632479,29.109753,2.695759e+06,4.626123e+06,0.335291,0.348500,3266.022091,2096.407257,0.069522,0.097831,0.006578,0.005443,0.718553,0.868171,0.291833,0.363484
2,aftermathmovie,35.692308,26.611911,7.428488e+05,1.455065e+06,0.204024,0.259721,3211.875123,2616.735866,0.079233,0.083045,0.001295,0.001219,0.330456,0.512062,0.283545,0.568167
3,aftermovie,58.276596,54.904056,5.679151e+06,1.419376e+07,0.212422,0.231199,3176.910267,2106.883693,0.286168,0.250527,0.014649,0.018188,0.355284,0.397654,0.278193,0.367605
4,alitamovie,35.784314,29.192272,2.569164e+06,6.429860e+06,0.266729,0.269028,2843.780762,2350.997308,0.080076,0.098320,0.006235,0.007846,0.515845,0.549392,0.227201,0.463632
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
92,usmovie,43.010204,52.851833,2.260180e+06,6.481499e+06,0.175488,0.220206,3454.562570,3247.741628,0.146285,0.238378,0.005400,0.007915,0.246087,0.353559,0.320693,0.816387
93,whatmenwant,32.086957,19.626320,2.003662e+07,5.276056e+07,0.241500,0.260970,6006.485656,2582.258884,0.046198,0.041692,0.053489,0.069564,0.441256,0.517072,0.711315,0.554604
94,wonderparkmovie,27.282443,13.193380,1.571109e+08,4.445462e+08,0.430486,0.331031,5563.594289,2771.415032,0.002175,0.003610,0.424310,0.591465,1.000000,0.798101,0.643522,0.629013
95,xmenmovies,41.549296,33.954729,4.791049e+06,1.487502e+07,0.319549,0.279627,2953.689532,2301.564008,0.132899,0.126512,0.012246,0.019096,0.672010,0.591908,0.244025,0.444187
