# Importing the dependencies

In [2]:
import pandas as pd
import sqlite3

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import TimeoutException, NoSuchElementException
from bs4 import BeautifulSoup 

import google.generativeai as genai

import os
import time
from dotenv import load_dotenv


load_dotenv()
GOOGLE_API_KEY_MOVIE_RECOMMENDER = os.getenv("GOOGLE_API_KEY_MOVIE_RECOMMENDER")
genai.configure(api_key=GOOGLE_API_KEY_MOVIE_RECOMMENDER)

# Loading the dataset

In [3]:
database = "../SQL_Database/Movies.db"

database_key_based = pd.read_sql_query("SELECT m.* FROM Movies_Key_Based AS m", sqlite3.connect(database))
database_query_based = pd.read_sql_query("SELECT m.* FROM Movies_Database AS m", sqlite3.connect(database))

In [4]:
database_key_based.head()

Unnamed: 0,id,title,keywords,review_summary,tags,embeddings
0,283995,Guardians of the Galaxy Vol. 2,"['demi god', 'alien creature', 'sarcasm', 'cra...",Guardians of the Galaxy Vol. 2 elicited a gene...,adventure action sci-fi chris pratt zoe saldañ...,"[-0.010018928121777062, -0.042597577593544884,..."
1,480530,Creed II,"['baby', 'training montage', 'sequel', 'boxing...",Creed II elicits a mixed response from audienc...,drama michael b. jordan sylvester stallone tes...,"[-0.012480250747134571, -0.02905849380429892, ..."
2,299536,Avengers: Infinity War,"['superhero', 'ensemble cast', 'marvel cinemat...",Avengers: Infinity War elicits a generally pos...,adventure action sci-fi robert downey jr. chri...,"[-0.01140899767743463, -0.027857139652446076, ..."
3,299534,Avengers: Endgame,"['time travel', 'superhero', 'super villain', ...",Avengers: Endgame elicited a largely positive ...,adventure sci-fi action robert downey jr. chri...,"[-0.00412223552630982, -0.031019326010432745, ..."
4,337167,Fifty Shades Freed,"['sex scene', 'wedding ceremony', 'bondage', '...",The audience reaction to Fifty Shades Freed is...,drama romance dakota johnson jamie dornan eric...,"[-0.0075680177194708525, -0.024964091224630152..."


In [5]:
database_query_based.head()

Unnamed: 0,id,IMDB_ID,title,release_year,genres,vote_average,cast,Director,keywords,reviews,review_sentiment,review_summary,poster_path,backdrop_path
0,283995,tt3896198,Guardians of the Galaxy Vol. 2,2017,"['Adventure', 'Action', 'Sci-Fi']",7.623,"['Chris Pratt', 'Zoe Saldaña', 'Dave Bautista'...",James Gunn,"['demi god', 'alien creature', 'sarcasm', 'cra...","[""Despite being a huge comic book nerd I was n...","['positive', 'positive', 'positive', 'positive...",Guardians of the Galaxy Vol. 2 elicited a gene...,/y4MBh0EjBlMuOzv9axM4qJlmhzz.jpg,/aJn9XeesqsrSLKcHfHP4u5985hn.jpg
1,480530,tt6343314,Creed II,2018,['Drama'],6.99,"['Michael B. Jordan', 'Sylvester Stallone', 'T...",Steven Caple Jr.,"['baby', 'training montage', 'sequel', 'boxing...","[""This movie is not as good as the first Creed...","['negative', 'positive', 'positive', 'positive...",Creed II elicits a mixed response from audienc...,/v3QyboWRoA4O9RbcsqH8tJMe8EB.jpg,/xTYGN1b3XkOtODryXTKgdXLtPMz.jpg
2,299536,tt4154756,Avengers: Infinity War,2018,"['Adventure', 'Action', 'Sci-Fi']",8.26,"['Robert Downey Jr.', 'Chris Hemsworth', 'Mark...",Anthony RussoJoe Russo,"['superhero', 'ensemble cast', 'marvel cinemat...","[""Avengers infinity war is an emotional roller...","['positive', 'positive', 'positive', 'positive...",Avengers: Infinity War elicits a generally pos...,/7WsyChQLEftFiDOVTGkv3hFpyyt.jpg,/mDfJG3LC3Dqb67AZ52x3Z0jU0uB.jpg
3,299534,tt4154796,Avengers: Endgame,2019,"['Adventure', 'Sci-Fi', 'Action']",8.268,"['Robert Downey Jr.', 'Chris Evans', 'Mark Ruf...",Anthony RussoJoe Russo,"['time travel', 'superhero', 'super villain', ...","[""But its a pretty good film. A bit of a mess ...","['positive', 'positive', 'positive', 'positive...",Avengers: Endgame elicited a largely positive ...,/or06FN3Dka5tukK1e9sl16pB3iy.jpg,/7RyHsO4yDXtBv1zUU3mTpHeQ0d5.jpg
4,337167,tt4477536,Fifty Shades Freed,2018,"['Drama', 'Romance']",6.699,"['Dakota Johnson', 'Jamie Dornan', 'Eric Johns...",James Foley,"['sex scene', 'wedding ceremony', 'bondage', '...","[""The first of the three that is actually emot...","['positive', 'negative', 'negative', 'negative...",The audience reaction to Fifty Shades Freed is...,/9ZedQHPQVveaIYmDSTazhT3y273.jpg,/9ywA15OAiwjSTvg3cBs9B7kOCBF.jpg


In [6]:
database_query_based.tail()

Unnamed: 0,id,IMDB_ID,title,release_year,genres,vote_average,cast,Director,keywords,reviews,review_sentiment,review_summary,poster_path,backdrop_path
909,484889,tt6076226,Rise of the Footsoldier 3,2017,"['Action', 'Crime', 'Drama', 'Thriller']",6.3,"['Craig Fairbrass', 'Terry Stone', 'Roland Man...",Zackary Adler,[],"[""There's a glut of these sort of films but th...","['positive', 'positive', 'negative', 'positive...",Audience reaction to Rise of the Footsoldier 3...,/1LgOIQNpTfWKY3wizRrQ6CvNAXK.jpg,/vC0x4816uI8yHSLaCzjfdTTSDK1.jpg
910,592230,tt7394674,Blood Quantum,2019,['Horror'],5.8,"['Michael Greyeyes', 'Elle', 'Máijá Tailfeathe...",Jeff Barnaby,[],"[""With the lack of a trailer or really any inf...","['positive', 'positive', 'negative', 'positive...","Audience reaction to Blood Quantum is mixed, w...",/pQnfrys3nyOpUxktxK2CBnm7Rv8.jpg,/xg1adjc8iEsQ4znJNjUQSvuiPjr.jpg
911,574638,tt9577852,Rolling Thunder Revue: A Bob Dylan Story by Ma...,2019,"['Documentary', 'Music']",7.1,"['Bob Dylan', 'Allen Ginsberg', 'Patti Smith',...",Martin Scorsese,[],"[""My ex and I saw Bob Dylan perform in 1984, a...","['positive', 'positive', 'positive', 'positive...",Audience reaction to *Rolling Thunder Revue: A...,/ixxELBgYj9OH8hz0XCrcZOJpIx9.jpg,/4MYtYsSGzQUUYQqHGWBNeEkT91s.jpg
912,299782,tt0069049,The Other Side of the Wind,2018,['Drama'],6.7,"['John Huston', 'Oja Kodar', 'Peter Bogdanovic...",Orson Welles,"['film business', 'nudity', 'female nudity', '...","[""Years ago I saw a documentary that included ...","['positive', 'positive', 'positive', 'positive...",The Other Side of the Wind elicits a mixed res...,/kFky1paYEfHxfCYByEc9g7gn6Zk.jpg,/wXUcSJG6dqNKgIRgqYqX98UA1wz.jpg
913,431093,tt5537228,Everybody Loves Somebody,2017,"['Romance', 'Comedy']",6.6,"['Karla Souza', 'José María Yázpik', ""Ben O'To...",Catalina Aguilar Mastretta,[],['This is definitely a light comedy worth reco...,"['positive', 'positive', 'negative', 'positive...","""Everybody Loves Somebody"" elicits a mixed res...",/bZKpuVXmYu6gaprjfkOdvlvY5Z9.jpg,/aAeUP99GEhJr2KIHzfA9chqqXUD.jpg


# Fetching Synopsis from the IMDB Website
Here, our aim is to fetch the synopsis from the IMDB website for each movie. This will help us in getting the text data for each movie which can be used for further analysis and movie discussion.

In [22]:
# Incorrect function, needs work to be done in order to identify the correct section of the page
def load_synopsis(imdb_id):
    # try:
    #     driver = webdriver.Safari()
    #     url = 'https://www.imdb.com/title/{}/plotsummary/?ref_=tt_stry_pl#synopsis'.format(imdb_id)
    #     driver.get(url)

    #     wait = WebDriverWait(driver, 10)

    #     for _ in range(2):
    #         wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, 'body')))
    #         driver.find_element(By.CSS_SELECTOR, 'body').send_keys(Keys.PAGE_DOWN)
    #         time.sleep(1)
            

    #     soup = BeautifulSoup(driver.page_source, 'html.parser')
    #     page_section = soup.find('ul', class_="ipc-metadata-list ipc-metadata-list--dividers-between sc-49ddc26b-0 gXSKic meta-data-list-full ipc-metadata-list--base")  
    #     print(page_section)
    #     synopsis_section = page_section.find('div', class_="ipc-html-content-inner-div")     
    #     synopsis_div = synopsis_section.find('div', class_="ipc-html-content-inner-div")
    #     synopsis = synopsis_div.text
        
        
    #     driver.quit()
    #     if(len(synopsis) > 0):
    #         return synopsis
    #     else:
    #         return ""

    # except (TimeoutException, NoSuchElementException) as e:
    #     print(f"Error scraping synopsis: {e}")
    #     return ""
    try:
        options = webdriver.SafariOptions()
        options.add_argument("--headless") # Run in headless mode (no visible browser window)
        driver = webdriver.Safari(options=options)
        url = f'https://www.imdb.com/title/{imdb_id}/plotsummary/?ref_=tt_stry_pl#synopsis'
        driver.get(url)

        wait = WebDriverWait(driver, 10)

        # Scroll the page down several times with a delay
        for _ in range(3):  # Increase the number of scrolls if needed
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            time.sleep(1)  # Adjust the delay if necessary

        # Find all synopsis elements after scrolling
        synopsis_elements = wait.until(
            EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'li[id^="ps"] .ipc-html-content-inner-div'))
        )

        # Extract text from all synopsis elements
        synopses = [element.text for element in synopsis_elements]

        # Filter out very short synopses (likely plot keywords)
        synopses = [s for s in synopses if len(s) > 100]  # Adjust the threshold as needed

        if synopses:
            # Find the longest synopsis among the filtered ones
            longest_synopsis = max(synopses, key=len)
            return longest_synopsis
        else:
            return ""

    except (TimeoutException, NoSuchElementException) as e:
        print(f"Error scraping synopsis for {imdb_id}: {e}")  # Log the error
        return ""

    finally:
        driver.quit()  # Ensure the browser is always closed


In [23]:
a = load_synopsis("tt6343314")

In [24]:
a

"Years after Adonis Creed made a name for himself under Rocky Balboa's mentorship, the young boxer becomes the Heavyweight Champion of the World. While life is good with that victory and his marriage to Bianca, trouble comes to Philadelphia when Ivan Drago, the Russian boxer who killed Adonis' father, Apollo, arrives with his son, Viktor, to challenge Adonis. Against Rocky's advice, Adonis accepts the challenge without his mentor's participation and pays the price in a punishing bout he wins only by a technicality. Now injured and demoralized, Adonis cannot bring himself to back into the game, leaving his spirit and title in jeopardy. Together, Adonis' family and Rocky must find a way to rekindle Adonis' fighting spirit to face the future in whatever choice he makes. Meanwhile, the Drago family have its own troubles trying to regain the respect in their homeland that they lost at Rocky's hands as they wonder whether is it truly worth it.—Kenneth Chisholm (kchishol@rogers.com)"