In [1]:
import os
from pathlib import Path
#import tempfile
#import cv2
import numpy as np
#from colorthief import ColorThief
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.decomposition import MiniBatchSparsePCA
from sklearn.cluster import KMeans
#from skimage.filters.rank import entropy
#from skimage.morphology import disk*
#from PIL import Image, ImageStat
#from skimage import img_as_float
import pandas as pd
#from multiprocessing import Process
#import matplotlib.image as img
import matplotlib.pyplot as plt
import statistics
from scipy import spatial
#from numba import jit

# Importing feature data

In [None]:
movie_df = pd.DataFrame()

feature_data_path = Path("../datasets/movie_features/")

# Helper functions

In [None]:
# Averages a list
def average(l):
    return sum(l) / len(l)

# Makes a list of unique values from a list
def unique(list1):
    # Init null list
    unique_list = []

    for x in list1:
        if x not in unique_list:
            unique_list.append(x)
            #print(x)
    return unique_list

# Condensing all features into a single row per movie

In [None]:
# Polynomial regression on the movies
# Currently all values are formatted as strings (might be changed later)
def polynomial_regression_alt(df, id):
    temp_df = df
    temp_df['frame_nr'] = pd.to_numeric(temp_df['frame_nr'], downcast='integer')
    temp_df = temp_df.sort_values(by=['frame_nr'], axis=0, ascending=True)
    output = pd.DataFrame()

    frame_nr = temp_df['frame_nr'][temp_df['movie_id'] == id]
    saturation = temp_df['saturation'][temp_df['movie_id'] == id]
    brightness = temp_df['brightness'][temp_df['movie_id'] == id]
    entropy = temp_df['entropy'][temp_df['movie_id'] == id]
    #sharpness = temp_df['sharpness'][temp_df['movie_id'] == id]
    contrast = temp_df['contrast'][temp_df['movie_id'] == id]
    colorfulness = temp_df['colorfulness'][temp_df['movie_id'] == id]

    saturation_model_1st = np.poly1d(np.polyfit(frame_nr, saturation, 1))
    brightness_model_1st = np.poly1d(np.polyfit(frame_nr, brightness, 1))
    entropy_model_1st = np.poly1d(np.polyfit(frame_nr, entropy, 1))
    #sharpness_model_1st = str(np.poly1d(np.polyfit(frame_nr, sharpness, 1)))
    contrast_model_1st = np.poly1d(np.polyfit(frame_nr, contrast, 1))
    colorfulness_model_1st = np.poly1d(np.polyfit(frame_nr, colorfulness, 1))

    saturation_model_2nd = np.poly1d(np.polyfit(frame_nr, saturation, 2))
    brightness_model_2nd = np.poly1d(np.polyfit(frame_nr, brightness, 2))
    entropy_model_2nd = np.poly1d(np.polyfit(frame_nr, entropy, 2))
    #sharpness_model_2nd = str(np.poly1d(np.polyfit(frame_nr, sharpness, 2)))
    contrast_model_2nd = np.poly1d(np.polyfit(frame_nr, contrast, 2))
    colorfulness_model_2nd = np.poly1d(np.polyfit(frame_nr, colorfulness, 2))

    polynomial_dict = {
        'movie_id': str(id),
        'saturation_model_1st': saturation_model_1st, 
        'brightness_model_1st': brightness_model_1st,
        'entropy_model_1st': entropy_model_1st,
        #'sharpness_model_1st': sharpness_model_1st,
        'contrast_model_1st': contrast_model_1st,
        'colorfulness_model_1st': colorfulness_model_1st,
        'saturation_model_2nd': saturation_model_2nd,
        'brightness_model_2nd': brightness_model_2nd,
        'entropy_model_2nd': entropy_model_2nd,
        #'sharpness_model_2nd': sharpness_model_2nd,
        'contrast_model_2nd': contrast_model_2nd,
        'colorfulness_model_2nd': colorfulness_model_2nd,
    }

    output = output.append(polynomial_dict, ignore_index=True)
    return output

In [None]:
# New dataframe condensing each movie into a single row 
def movie_matrix(df, id):
    matrix = pd.DataFrame()

    avg_brightness = average(df['brightness'][df['movie_id'] == id])
    avg_saturation = average(df['saturation'][df['movie_id'] == id])
    avg_entropy = average(df['entropy'][df['movie_id'] == id])
    #avg_sharpness = average(df['sharpness'][df['movie_id'] == id])
    avg_contrast = average(df['contrast'][df['movie_id'] == id])
    avg_colorfulness = average(df['colorfulness'][df['movie_id'] == id])

    stdev_brightness = statistics.stdev((df['brightness'][df['movie_id'] == id]))
    stdev_saturation = statistics.stdev((df['saturation'][df['movie_id'] == id]))
    stdev_entropy = statistics.stdev((df['entropy'][df['movie_id'] == id]))
    #stdev_sharpness = statistics.stdev((df['sharpness'][df['movie_id'] == id]))
    stdev_contrast = statistics.stdev((df['contrast'][df['movie_id'] == id]))
    stdev_colorfulness = statistics.stdev((df['colorfulness'][df['movie_id'] == id]))

    mean_brightness = statistics.median((df['brightness'][df['movie_id'] == id]))
    mean_saturation = statistics.median((df['saturation'][df['movie_id'] == id]))
    mean_entropy = statistics.median((df['entropy'][df['movie_id'] == id]))
    #mean_sharpness = statistics.median((df['sharpness'][df['movie_id'] == id]))
    mean_contrast = statistics.median((df['contrast'][df['movie_id'] == id]))
    mean_colorfulness = statistics.median((df['colorfulness'][df['movie_id'] == id]))


    matrix_dict = {
        'movie_id': str(id),
        'avg_brightness': avg_brightness,
        'avg_saturation': avg_saturation,
        'avg_entropy': avg_entropy,
        #'avg_sharpness': avg_sharpness,
        'avg_contrast': avg_contrast,
        'avg_colorfulness': avg_colorfulness,
        'stdev_brightness': stdev_brightness,
        'stdev_saturation': stdev_saturation,
        'stdev_entropy': stdev_entropy,
        #'stdev_sharpness': stdev_sharpness,
        'stdev_contrast': stdev_contrast,
        'stdev_colorfulness': stdev_colorfulness,
        'mean_brightness': mean_brightness,
        'mean_saturation': mean_saturation,
        'mean_entropy': mean_entropy,
        #'mean_sharpness': mean_sharpness,
        'mean_contrast': mean_contrast,
        'mean_colorfulness': mean_colorfulness,
    }

    matrix = matrix.append(matrix_dict, ignore_index = True)
    return matrix