In [34]:
import cv2
import numpy as np
from sklearn.cluster import KMeans
import os
import pandas as pd
import webcolors

def get_main_colors(image, k=3):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = image.reshape(image.shape[0] * image.shape[1], 3)
    
    kmeans = KMeans(n_clusters=k)
    kmeans.fit(image)
    
    return kmeans.cluster_centers_

def get_brightness(image):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    r, g, b = np.split(image, 3, axis=-1)
    brightness = 0.299 * r + 0.587 * g + 0.114 * b
    return np.mean(brightness)

def analyze_image(image_path):
    image = cv2.imread(image_path)
    if image is None:
        print(f"Error: Unable to read image '{image_path}'")
        return None, None, None, None
    main_colors = get_main_colors(image)
    color1 = webcolors.rgb_to_hex(main_colors[0].astype(int))
    color2 = webcolors.rgb_to_hex(main_colors[1].astype(int))
    color3 = webcolors.rgb_to_hex(main_colors[2].astype(int))
    brightness = get_brightness(image)
    return color1, color2, color3, brightness

folder_path = 'ur22171966_downloaded_images'
image_files = os.listdir(folder_path)

image_names = []
color1_list = []
color2_list = []
color3_list = []
brightness_list = []


for image_file in image_files:
    image_path = os.path.join(folder_path, image_file)
    color1, color2, color3, brightness = analyze_image(image_path)
    
    if color1 is not None and brightness is not None:
        #print(f"Image: {image_file}")
        #print(f"Main colors: {main_colors}")
        #print(f"Brightness: {brightness}\n")
        image_names.append(image_file)
        color1_list.append(color1)
        color2_list.append(color2)
        color3_list.append(color3)
        brightness_list.append(brightness)
    else:
        print(f"Skipping '{image_file}'\n")

Error: Unable to read image 'ur22171966_downloaded_images\ur22171966_Alien³.jpg'
Skipping 'ur22171966_Alien³.jpg'

Error: Unable to read image 'ur22171966_downloaded_images\ur22171966_Amélie.jpg'
Skipping 'ur22171966_Amélie.jpg'

Error: Unable to read image 'ur22171966_downloaded_images\ur22171966_Carnivàle.jpg'
Skipping 'ur22171966_Carnivàle.jpg'

Error: Unable to read image 'ur22171966_downloaded_images\ur22171966_Les Misérables.jpg'
Skipping 'ur22171966_Les Misérables.jpg'

Error: Unable to read image 'ur22171966_downloaded_images\ur22171966_Léon The Professional.jpg'
Skipping 'ur22171966_Léon The Professional.jpg'

Error: Unable to read image 'ur22171966_downloaded_images\ur22171966_Rec².jpg'
Skipping 'ur22171966_Rec².jpg'



In [35]:
df = pd.DataFrame({
    "Img_File_Name": image_names,
    "Color1": color1_list,
    "Color2": color2_list,
    "Color3": color3_list,
    "Brightness": brightness_list
})

df.to_csv("image_analysis.csv", index=False, encoding="utf-8-sig")

In [37]:
#Merging colors and brightness to the user file

# Read in the csv files
df_ratings = pd.read_csv('ur22171966_ratings.csv')
df_colors = pd.read_csv('image_analysis.csv')

# Merge the two dataframes on the common column
merged_df = pd.merge(df_ratings, df_colors, on='Img_File_Name')

colors_columns = ['Color1', 'Color2', 'Color3', 'Brightness']
ratings_columns = ["UserID",
     "Title",
     "Img_Path",
     "Img_File_Name",
     "Year",
     "Description",
     "Directors",
     "Stars",
     "Viewer_Advisory",
     "Duration",
     "Genre",
     "Votes",
     "Movie_Rating",
     "User_Rating"]

final_merged = merged_df[ratings_columns + colors_columns]

# Select the columns you want to keep and save to a new file
#merged_df = merged_df[['Img_File_Name', 'Color1', 'Color2', 'Color3', 'Brightness']]
final_merged.to_csv('ur22171966_img_ratings.csv', index=False, encoding="utf-8-sig")