In [1]:
# Basic import
import os
import sys
import json
import shutil
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import cv2

In [2]:
# Helper function
def writeProgress(msg, count, total):
    sys.stdout.write(msg + "{:.2%}\r".format(count/total))
    sys.stdout.flush()
    
def newPath(path):
    if not os.path.isdir(path):
        os.mkdir(path)

def read_json(src_path):
    with open(src_path, 'r') as json_file:
        data = json.load(json_file)
    return data

def write_json(data,dst_path):
    with open(dst_path, 'w') as outfile:
        json.dump(data, outfile)

## 2019 movies

In [None]:
PATH = './SplitTrailers_crop/'
trailers = os.listdir(PATH)
trailers.sort()
print(len(trailers), trailers[:10])

## 2018 movies

In [6]:
PATH = './2018SplitTrailers_crop/'
df_onehot = pd.read_csv('./csv/filename2genreMat_2018.csv')
df_onehot

Unnamed: 0,id,Action,Adventure,Animation,Biography,Comedy,Crime,Documentary,Drama,Family,...,History,Horror,Music,Mystery,Romance,Sci-Fi,Sport,Thriller,War,Western
0,tt0328810,0,0,0,0,0,0,0,1,1,...,0,0,0,0,0,0,0,0,0,0
1,tt10005184,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,tt10017502,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,1,0,0,0,0
3,tt10043732,0,0,0,1,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
4,tt10048096,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1400,tt9866700,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,1,0,0
1401,tt9879080,0,0,0,0,0,0,1,0,0,...,0,0,1,0,0,0,0,0,0,0
1402,tt9891764,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
1403,tt9904014,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0


In [7]:
trailers = df_onehot['id'].tolist()
trailers[:10]

['tt0328810',
 'tt10005184',
 'tt10017502',
 'tt10043732',
 'tt10048096',
 'tt10059624',
 'tt10077620',
 'tt10116528',
 'tt10160782',
 'tt10178206']

# A_AverageShotLength

In [8]:
A_li = []
for movie in trailers:
    frameCount = []
    print('Running:', movie)
    
    for scene in os.listdir(PATH + movie):
        framePath = PATH + movie + '/' +scene + '/frames/'
        frameCount.append(len(os.listdir(framePath)))
    
#     print(frameCount)
    avg = np.mean(frameCount)
    std = np.std(frameCount)
    print(avg, std)
    
    A_li.append([avg, std])

Running: tt0328810
74.32432432432432 77.63255588122531
Running: tt10005184
128.11111111111111 168.8131409079574
Running: tt10017502
31.970588235294116 14.59751246761634
Running: tt10043732
41.670731707317074 28.35061614938854
Running: tt10048096
28.48 27.72885861336525
Running: tt10059624
69.64285714285714 93.34039635568692
Running: tt10077620
50.43589743589744 49.143325118288324
Running: tt10116528
132.35714285714286 99.4532245709058
Running: tt10160782
26.629032258064516 14.973346072667347
Running: tt10178206
100.26666666666667 76.16280807148154
Running: tt10182822
32.18260869565217 19.331246438005
Running: tt10186696
28.176470588235293 12.564145296054834
Running: tt10212512
30.671641791044777 20.38757893709948
Running: tt10233502
85.11111111111111 101.45710034234442
Running: tt10298956
69.42857142857143 69.71033361163104
Running: tt10322976
56.353846153846156 50.27506822677718
Running: tt10344086
75.0 52.40229002629561
Running: tt10379122
109.04761904761905 67.17743115240287
Running

In [9]:
A_arr = np.asarray(A_li)
np.save('./npy/2018A_arr.npy', A_arr)
len(A_arr)

1405

# B_ColorVariance
* https://docs.opencv.org/3.4/de/d25/imgproc_color_conversions.html#color_convert_rgb_luv
* https://docs.scipy.org/doc/numpy/reference/generated/numpy.linalg.det.html

In [10]:
B_li = []
for movie in trailers: #order:
    rho = []
    print('Running:', movie)
    
    for scene in os.listdir(PATH + movie):
        framePath = PATH + movie + '/' + scene + '/frames/'
        frameList = [int(name.split('.')[0]) for name in os.listdir(framePath) if not name.startswith('.')]
        frameList.sort()
        keyframe = framePath + '/' + str(frameList[len(frameList)//2]) + '.jpg'
        img = cv2.imread(keyframe)
        luv = cv2.cvtColor(img, cv2.COLOR_BGR2Luv)
        l = luv[:,:,0].ravel()
        u = luv[:,:,1].ravel()
        v = luv[:,:,2].ravel()
        x = np.asarray((l, u, v))
        cov = np.cov(x)
        det = np.linalg.det(cov)
        rho.append(det)
    
    avg = np.mean(rho)
    std = np.std(rho)
    print(avg, std)
    
    B_li.append([avg, std])

Running: tt0328810
7429596.013007359 6754839.757017302
Running: tt10005184
1056263.4396525347 2578889.3730854634
Running: tt10017502
3220427.9363187305 6070315.314554977
Running: tt10043732
5971662.823106107 17101600.83285153
Running: tt10048096
11678228.086752083 22263503.22996316
Running: tt10059624
31720023.7281803 40676136.5176146
Running: tt10077620
30357812.959164944 64114598.02058001
Running: tt10116528
2780284.691670873 4707617.573627054
Running: tt10160782
2243784.000355879 3978417.2039359314
Running: tt10178206
157144.88512865937 580153.1773766062
Running: tt10182822
37925569.8660351 279980446.10781837
Running: tt10186696
10631596.603357196 19801923.914988946
Running: tt10212512
4499704.621307288 22000003.46625037
Running: tt10233502
888790.918606402 1913762.1807559528
Running: tt10298956
1269934.320813021 2525434.571432871
Running: tt10322976
963044.238231098 2876127.3670830326
Running: tt10344086
11029683.94686463 14906021.95828414
Running: tt10379122
13511662.369765293 268

In [11]:
B_arr = np.asarray(B_li)
np.save('./npy/2018B_arr.npy', B_arr)
len(B_arr)

1405

# C_MotionContent
* https://blog.gtwang.org/programming/opencv-motion-detection-and-tracking-tutorial/

In [None]:
C_li = []
for movie in trailers: # [900:]:
    moving_li = []
    print('Running:', movie)
    
    for scene in os.listdir(PATH + movie):
    
        video  = PATH + movie + '/' + scene + '/clip.avi'

        cap = cv2.VideoCapture(video)

        w = int(cap.get(3))
        h = int(cap.get(4))

        # 計算畫面面積
        area = w * h

        # 初始化平均影像
        ret, frame = cap.read()

        avg = cv2.blur(frame,(4, 4))
        avg_float = np.float32(avg)
#         plt.imshow(frame)
#         plt.show()

        while(cap.isOpened()):
            # 讀取一幅影格
            ret, frame = cap.read()

            # 若讀取至影片結尾，則跳出
            if ret == False:
                break

            # 模糊處理
            blur = cv2.blur(frame, (4, 4))

            # 計算目前影格與平均影像的差異值
            diff = cv2.absdiff(frame, avg)

            # 將圖片轉為灰階
            gray = cv2.cvtColor(diff, cv2.COLOR_BGR2GRAY)

            # 篩選出變動程度大於門檻值的區域
            ret, thresh = cv2.threshold(gray, 25, 255, cv2.THRESH_BINARY)

            # 使用型態轉換函數去除雜訊
            kernel = np.ones((5, 5), np.uint8)
            thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=2)
            thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=2)

            # 產生等高線
            cnts, hierarchy = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
            moving = 0
            for c in cnts:
                moving += cv2.contourArea(c)
                
            moving_li.append(moving/area)
            
#             for c in cnts:
#                 # 忽略太小的區域
#                 if cv2.contourArea(c) < 1000:
#                     continue


#             # 畫出等高線（除錯用）
#             cv2.drawContours(frame, cnts, -1, (0, 255, 255), 2)
#             plt.imshow(frame)
#             plt.show()

            # 更新平均影像
            cv2.accumulateWeighted(blur, avg_float, 0.01)
            avg = cv2.convertScaleAbs(avg_float)

        cap.release()
        cv2.destroyAllWindows()
    
    avg = np.mean(moving_li)
    std = np.std(moving_li)
    print(avg, std)
    
    C_li.append([avg, std])

Running: tt0328810
0.3364244009408313 0.33594326869652436
Running: tt10005184
0.09973185055368892 0.17080273013066433
Running: tt10017502
0.2683491626613395 0.3252183696876087
Running: tt10043732
0.31592309889581943 0.33043643883629714
Running: tt10048096
0.2964252601856748 0.31429400334713725
Running: tt10059624
0.29005881302209796 0.28828806803563617
Running: tt10077620
0.3501840260471298 0.2977930994863327
Running: tt10116528
0.20414772240195841 0.19442820906642425
Running: tt10160782
0.2236024705030461 0.24535867921597865
Running: tt10178206
0.05641373666065714 0.07546037068076807
Running: tt10182822
0.18020706533261893 0.28203530691125955
Running: tt10186696
0.2367593641451434 0.26955309181542236
Running: tt10212512
0.19686915028582955 0.24976609876863864
Running: tt10233502
0.09387688987706178 0.0949235008963231
Running: tt10298956
0.17153119070627826 0.27801983048696993
Running: tt10322976
0.2728566843319993 0.3057389990269048
Running: tt10344086
0.2267224927564283 0.28313909865

In [None]:
C_arr = np.asarray(C_li)
np.save('./npy/2018C_arr.npy', C_arr)
len(C_arr)

# D_LightingKey

In [None]:
D_li = []
for movie in trailers:
    lk = []
    print('Running:', movie)
    
    for scene in os.listdir(PATH + movie):
        framePath = PATH + movie + '/' + scene + '/frames/'
        frameList = [int(name.split('.')[0]) for name in os.listdir(framePath) if not name.startswith('.')]
        frameList.sort()
        keyframe = framePath + '/' + str(frameList[len(frameList)//2]) + '.jpg'
        img = cv2.imread(keyframe)
        hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
        value = hsv[:,:,2].ravel()
        mean = np.mean(value)
        std = np.std(value)
        lk.append(mean*std)

    avg = np.mean(lk)
    std = np.std(lk)
    print(avg, std)
    
    D_li.append([avg, std])

In [None]:
D_arr = np.asarray(D_li)
np.save('./npy/2018D_arr.npy', D_arr)
len(D_arr)

# Results

In [None]:
# # 合併 2018 C_arr
# C_arr = np.append(np.load('./npy/2018C_arr_300.npy'), np.load('./npy/2018C_arr_600.npy'), axis=0)
# C_arr = np.append(C_arr, np.load('./npy/2018C_arr_900.npy'), axis=0)
# C_arr = np.append(C_arr, np.load('./npy/2018C_arr_900-end.npy'), axis=0)
# C_arr.shape

In [None]:
# reload computed values
A_arr = np.load('./npy/2018A_arr.npy')
B_arr = np.load('./npy/2018B_arr.npy')
C_arr = np.load('./npy/2018C_arr.npy')
D_arr = np.load('./npy/2018D_arr.npy')

In [None]:
print(A_arr.shape, B_arr.shape, C_arr.shape, D_arr.shape)

In [None]:
# First time succeed!
results = pd.DataFrame({
    'Movie': trailers,
    'A_avg': A_arr[:,0],
    'A_std': A_arr[:,1],
    'B_avg': B_arr[:,0],
    'B_std': B_arr[:,1],
    'C_avg': C_arr[:,0],
    'C_std': C_arr[:,1],
    'D_avg' : D_arr[:,0],
    'D_std' : D_arr[:,1]
})
results

## Normalize

In [None]:
from sklearn.preprocessing import MinMaxScaler

In [None]:
def normalizing(data):
    scaler = MinMaxScaler()
    scaler.fit(data)
    print('Data range:', scaler.data_range_)
    return scaler.transform(data)

In [None]:
cols = results.columns[1:].tolist()
cols

In [None]:
for col in cols:
    arr = np.asarray(results[col].tolist()).reshape(-1, 1)
    results[col+'_norm'] = normalizing(arr)

In [None]:
results.to_csv('./csv/computed_2018.csv', index = 0)
results