In [None]:
# purpose of these code: given a movie list, extract information about the movies from two websites, using selenium. 
# store the info in tuples. One tuple for each movie. Example:
    # [(7.7, 77,  "Richard Donner", ["Chris ColumbusSteven", "Spielberg"], "Los Goonies son un grupo de amigos que viven en Goon Docks, Astoria, pero sus casas han sido compradas y van a ser demolidas. Sin embargo, vivirán su última aventura en busca de un tesoro que pueda salvar el barrio.", "Aventura", "1h 54min", "Los Gonnies"),  ...]

# info to extract: 
    # Puntuacion de IMDB (en caso de que la tenga).
    # Puntuación de Rotten Tomatoes (Tomatometer).
    # Dirección (director/a o directore/as de cada película).
    # Gionistas (de cada película).
    # Argumento.
    # Duración (en minutos).
    # Nombre de la película

# websites:
    # https://www.imdb.com/
    # https://www.rottentomatoes.com/

# movie list
lista_general = [('Movie', 'Multiverso', 2024, 11, 'tt10225380'),
 ('Movie', 'Detour 95', 2024, 10, 'tt10844184'),
 ('Movie', 'The Legend of Johnny Jones', 2024, 10, 'tt10928904'),
 ('Movie', 'Slasher House 3', 2024, 3, 'tt11078340'),
 ('Movie', 'Clank and the Golden Scar', 2024, 10, 'tt11374514'),
 ('Movie', 'Furiosa: A Mad Max Saga', 2024, 5, 'tt12037194'),
 ('Movie', 'Danya', 2024, None, 'tt12529250'),
 ('Movie', 'Adrienne', 2024, None, 'tt13143064'),
 ('Movie', 'Hitpig', 2024, None, 'tt13176330'),
 ('Movie', 'Mnemonix', 2024, 12, 'tt13590374'),]

# import libraries for data processing
# -----------------------------------------------------------------------
import pandas as pd

# Import libraries for web browser automation with Selenium
# -----------------------------------------------------------------------
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager # ChromeDriverManager manages the installation of the Chrome driver
from selenium.webdriver.common.keys import Keys # to simulate keyboard events in Selenium
from selenium.webdriver.support.ui import Select # to interact with <select> elements on web pages

# import libraries to pause execution
# -----------------------------------------------------------------------
from time import sleep  # Sleep is used to pause the execution of the program for a number of seconds

# settings
# -----------------------------------------------------------------------
pd.set_option('display.max_columns', None)  # Sets a Pandas option to display all columns of a DataFrame



In [10]:
# open browser
driver = webdriver.Chrome()
print("\nOpening Chrome 🧭...")

# maximize window
driver.maximize_window()
print("\nMaximizing window 🪟...")

# open website
url = "https://www.imdb.com/"
driver.get (url)
print(f"\nNavigating to ⛵ {url}...")

# pause to let the website load 
nap_time = 3
sleep(nap_time)
print(f"\nTaking a quick nap 😴... Only {nap_time} seconds though!")

# accept cookies
driver.find_element("css selector", "#__next > div > div > div.sc-jrcTuL.bPmWiM > div > button.icb-btn.sc-bcXHqe.sc-dkrFOg.sc-iBYQkv.dcvrLS.ddtuHe.dRCGjd").click()
print("\nAccepting cookies 🍪...")

# implement a for loop for iterating each movie on the list and: 1) search the movie using the search bar and movie id 2) extracting data needed
for i in range (len(lista_general)):
    # enter movie id in the search field and press Enter
    movie_id = lista_general [i][4]
    movie_name = lista_general [i][1]
    print (f"\n<<{i}>>\nSearching for movie 🔍 '{movie_name}', id '{movie_id}'...")
    try: 
        driver.find_element("css selector", "#suggestion-search").send_keys(movie_id, Keys.ENTER)
        print(f"'{movie_name}' found! 🎉")
        print ("\nExtracting data👽...")
        # find the elements in the page, extract the text and save the info in lists
        # rate css selector: #__next > main > div > section.ipc-page-background.ipc-page-background--base.sc-c41b9732-0.NeSef > section > div:nth-child(5) > section > section > div.sc-491663c0-4.yjUiO > div.sc-491663c0-6.lnlBxO > div.sc-491663c0-11.cvvyMK > div.sc-3a4309f8-0.bjXIAP.sc-1f50b7c-5.dwZnUO > div > div:nth-child(1) > a > span > div > div.sc-eb51e184-0.kFvAju > div.sc-eb51e184-2.czkfBq > span.sc-eb51e184-1.cxhhrI
    except:
        # add none to the list
        print (f"Couldn't find movie '{movie_name}', id '{movie_id}' ❌")
        driver.back()
        continue
print(f"\nLIST COMPLETED! ✅")

#create tuple




Opening Chrome 🧭...

Maximizing window 🪟...

Navigating to ⛵ https://www.imdb.com/...

Taking a quick nap 😴... Only 3 seconds though!

Accepting cookies 🍪...

<<0>>
Searching for movie 🔍 'Multiverso', id 'tt10225380'...
'Multiverso' found! 🎉

Extracting data👽...

<<1>>
Searching for movie 🔍 'Detour 95', id 'tt10844184'...
'Detour 95' found! 🎉

Extracting data👽...

<<2>>
Searching for movie 🔍 'The Legend of Johnny Jones', id 'tt10928904'...
'The Legend of Johnny Jones' found! 🎉

Extracting data👽...

<<3>>
Searching for movie 🔍 'Slasher House 3', id 'tt11078340'...
'Slasher House 3' found! 🎉

Extracting data👽...

<<4>>
Searching for movie 🔍 'Clank and the Golden Scar', id 'tt11374514'...
'Clank and the Golden Scar' found! 🎉

Extracting data👽...

<<5>>
Searching for movie 🔍 'Furiosa: A Mad Max Saga', id 'tt12037194'...
'Furiosa: A Mad Max Saga' found! 🎉

Extracting data👽...

<<6>>
Searching for movie 🔍 'Danya', id 'tt12529250'...
'Danya' found! 🎉

Extracting data👽...

<<7>>
Searching for