In [1]:
#import the appropriate libraries
import pandas as pd
import numpy as np
import requests
from PIL import Image
from io import BytesIO
import cv2
#image analysis: first install dlib in your ada environment (1: conda activate ada , 2: install dlib)
import dlib 
#face recognition library, first install it in your ada environment (1: conda activate ada, 2: install face_recognition)
import face_recognition
from PIL import Image, ImageDraw

In [2]:
# import the data of our additionnal dataset and drop the duplicates
actors = pd.DataFrame(pd.read_csv('data/our_datasets/actors_with_tropes.csv'))
actors = actors.drop_duplicates(subset='ActorName')

In [3]:
#Iza's personal API key for the TMDB dataset
api_key = 'ef0f1f544778e0d8be2e944770018c66'

In [4]:
def get_image(api_key, actor):
# get the image of the actor from 'the movie database'
# Args: api_key: iza's API key for the dataset; actor: name of the actor whose image we want
# Returns: image_url: url of the image, images_height, images_width
    api_key = api_key
    actor = actor
    url = 'https://api.themoviedb.org/3/search/person'

    params = {
    'api_key': api_key,
    'query': actor
}

    #get actor ID:
    request = requests.get(url, params=params)
    if request.status_code == 200:
        json = request.json()

        if 'results' in json and json['results']: 
            person_id =json['results'][0]['id']
            url_image= f'https://api.themoviedb.org/3/person/{person_id}/images?api_key={api_key}'
            images_json = requests.get(url_image).json()

            if 'profiles' in images_json and images_json['profiles']:
                images_data = images_json['profiles'][0]['file_path']
                images_height = images_json['profiles'][0]['height']
                images_width = images_json['profiles'][0]['width']
                url_base = 'https://image.tmdb.org/t/p/original'
                image_url = f'{url_base}{images_data}'
            else:
                image_url = '-'
                images_height = 'Nan'
                images_width = 'Nan'
        else:
            image_url = '-'
            images_height = 'Nan'
            images_width = 'Nan'

    else:
        image_url = '-'
        images_height = 'Nan'
        images_width = 'Nan'
    return image_url,images_height,images_width

In [5]:
actor_images ={'Actor': [], 'Image URL': [], 'Image height':[], 'Image width': []}
for actor in actors['ActorName']:
    actor_images['Actor'].append(actor)
    actor_images['Image URL'].append(get_image(api_key, actor)[0])
    actor_images['Image height'].append(get_image(api_key, actor)[1])
    actor_images['Image width'].append(get_image(api_key, actor)[2])

actor_images_df = pd.DataFrame(actor_images)
actor_images_df.to_csv('actor_images', index=False)



In [None]:
#Loads "actor_images.csv" file
actor_images = pd.read_csv('data/our_datasets/actor_images.csv')
actor_images.head()

In [None]:
#Finding missing values => no URLs 
missing = actor_images == '-'
missing_images = actor_images[missing.any(axis=1)]
print(missing_images)

In [None]:
#actor_images without the missing urls
actor_cleaned = actor_images[~missing.any(axis=1)]

In [None]:
def landmarks(row):


    actor_name = row['Actor']
    image_url = row['Image URL']

    try:
        # Retrieve image URL
        img = requests.get(image_url)
        image_content = BytesIO(img.content)

        # Transform the image into an array
        img_array = np.asarray(Image.open(image_content))

        # Check if the image is black and white (grayscale)
        if len(img_array.shape) == 2:  # Grayscale image
            # Convert black and white image to RGB
            img_array_rgb = cv2.cvtColor(img_array, cv2.COLOR_GRAY2RGB)
        elif len(img_array.shape) == 3 and img_array.shape[2] == 3:  # RGB image
            img_array_rgb = img_array
        else:
            raise ValueError("Unsupported image format")

        # Extract facial landmarks
        face_landmarks_list = face_recognition.face_landmarks(img_array_rgb)
        
        if face_landmarks_list:
            # Extract facial encodings
            face_encodings_list = face_recognition.face_encodings(img_array_rgb)
                                                                  
        else:
            face_encodings_list = np.nan

    except Exception as e:
        face_landmarks_list = np.nan
        face_encodings_list = np.nan
        
    return pd.Series({'Actor': actor_name, 'Landmarks': face_landmarks_list, 'Encodings':face_encodings_list})


    
face_encodings2 = actor_cleaned.apply(lambda row: landmarks(row), axis=1)
face_encodings2.to_csv('data/our_datasets/actor_features.csv')