In [1]:
!pip install gradio

Collecting gradio
  Downloading gradio-4.44.1-py3-none-any.whl.metadata (15 kB)
Collecting fastapi<1.0 (from gradio)
  Downloading fastapi-0.115.0-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.3.0 (from gradio)
  Downloading gradio_client-1.3.0-py3-none-any.whl.metadata (7.1 kB)
Collecting huggingface-hub>=0.19.3 (from gradio)
  Downloading huggingface_hub-0.25.1-py3-none-any.whl.metadata (13 kB)
Collecting importlib-resources<7.0,>=1.3 (from gradio)
  Downloading importlib_resources-6.4.5-py3-none-any.whl.metadata (4.0 kB)
Collecting orjson~=3.0 (from gradio)
  Downloading orjson-3.10.7-cp311-none-win_amd64.whl.metadata (51 kB)
     ---------------------------------------- 0.0/51.7 kB ? eta -:--:--
     ---------------------------------------  51.2/51.7 kB ? eta -:--:--
     -------------------------------------- 51.7/51.7 kB 881.2 kB/s eta 0:00:00
Collecting pydantic>=2.0 (from

  You can safely remove it manually.


In [5]:
#Collecting Movie Posters
import pandas as pd
import numpy as np
import requests
import time
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

# ==========================
# 1. Load and Preprocess Data
# ==========================

# Load the dataset
df = pd.read_csv('C:/Users/dehey/Desktop/Data_Analytics_Science/Homework/Project_4/BootCamp24-project-4-group-08/Project_4_Web_app_Deployment/16k_Movies.csv')

# Handle missing values
df = df.dropna(subset=['Title', 'Description'])
df['Rating'] = df['Rating'].fillna(df['Rating'].mean())
df['Genres'] = df['Genres'].fillna('')

# Combine 'Description' and 'Genres'
df['Combined_Features'] = df['Description'] + ' ' + df['Genres']

**DO NOT RE-RUN CELL #3!!!** *I've already collected all the movie Posters. This took approximatly 1:30 minutes to run and complete:*

In [None]:
# ==========================
# 2. Obtain Poster URLs
# ==========================

# Your TMDb API key, Create a TMBD account and Enter the API Key here
TMDB_API_KEY = ''

# Base URL for TMDb images
TMDB_IMAGE_BASE_URL = 'https://image.tmdb.org/t/p/w500'

def get_poster_url(title, year=None):
    search_url = f"https://api.themoviedb.org/3/search/movie"
    params = {
        'api_key': TMDB_API_KEY,
        'query': title,
        'year': year,
        'language': 'en-US',
        'page': 1,
        'include_adult': False
    }
    response = requests.get(search_url, params=params)
    if response.status_code != 200:
        return None
    data = response.json()

    if data['results']:
        poster_path = data['results'][0]['poster_path']
        if poster_path:
            return TMDB_IMAGE_BASE_URL + poster_path
    return None

# Load existing poster URLs if available
try:
    df_posters = pd.read_pickle('df_with_posters.pkl')
    print("Loaded existing poster URLs.")
except FileNotFoundError:
    df_posters = df.copy()
    df_posters['Poster_URL'] = None
    print("Initialized Poster_URL column.")

# Function to extract year from 'Release Date'
def extract_year(release_date):
    if pd.isnull(release_date):
        return None
    try:
        return int(pd.to_datetime(release_date).year)
    except:
        return None

# Iterate over the DataFrame to fetch poster URLs
for idx, row in df_posters.iterrows():
    if pd.isnull(row['Poster_URL']):
        title = row['Title']
        year = extract_year(row['Release Date'])
        poster_url = get_poster_url(title, year)
        df_posters.at[idx, 'Poster_URL'] = poster_url
        print(f"Fetched poster for: {title}")
        # Save progress every 1000 iterations
        if idx % 1000 == 0:
            df_posters.to_pickle('df_with_posters.pkl')
            print("Saved progress.")
        # Respect TMDb rate limits (40 requests per 10 seconds)
        time.sleep(0.25)
    else:
        print(f"Poster already exists for: {row['Title']}")

# Save the final DataFrame with poster URLs
df_posters.to_pickle('df_with_posters.pkl')
print("Completed fetching all poster URLs.")

In [6]:
# Load the DataFrame with poster URLs
df = pd.read_pickle('df_with_posters.pkl')

# ==========================
# 3. Compute Cosine Similarity
# ==========================

# Initialize the TF-IDF Vectorizer
tfidf = TfidfVectorizer(stop_words='english')

# Replace NaN with empty string in 'Combined_Features'
df['Combined_Features'] = df['Combined_Features'].fillna('')

# Compute TF-IDF matrix
tfidf_matrix = tfidf.fit_transform(df['Combined_Features'])

# Compute cosine similarity matrix
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

print("Cosine similarity matrix computed.")

Cosine similarity matrix computed.


In [7]:
# ==========================
# 4. Recommendation Function
# ==========================

# Define a default placeholder image URL
DEFAULT_POSTER_URL = 'https://via.placeholder.com/200x300?text=No+Image'


# Create a reverse mapping from title to index
indices = pd.Series(df.index, index=df['Title'].str.lower()).drop_duplicates()

def get_recommendations(title, cosine_sim=cosine_sim):
    """
    Given a movie title, return the top 10 similar movies.
    """
    title = title.lower().strip()
    if title not in indices:
        return "Movie title not found. Please check the spelling and try again."

    idx = indices[title]

    # Get the pairwise similarity scores
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort based on similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the scores of the top 11 movies (including itself)
    sim_scores = sim_scores[1:11]

    # Get the movie indices
    movie_indices = [i[0] for i in sim_scores]

    # Retrieve the movie titles and poster URLs
    recommended_titles = df['Title'].iloc[movie_indices].tolist()
    recommended_posters = df['Poster_URL'].iloc[movie_indices].tolist()
    recommended_genres = df['Genres'].iloc[movie_indices].tolist()
    recommended_ratings = df['Rating'].iloc[movie_indices].tolist()

    # Combine all information into a list of dictionaries
    recommendations = []
    for title, poster, genre, rating in zip(recommended_titles, recommended_posters, recommended_genres, recommended_ratings):
        recommendations.append({
            'Title': title,
            'Poster_URL': poster if pd.notnull(poster) else DEFAULT_POSTER_URL,
            'Genres': genre,
            'Rating': rating
        })

    return recommendations


In [8]:
#Testing
get_recommendations('The Dark Knight Rises')

[{'Title': 'Batman Begins',
  'Poster_URL': 'https://image.tmdb.org/t/p/w500/4MpN4kIEqUjW8OPtOQJXlTdHiJV.jpg',
  'Genres': 'Action,Crime,Drama',
  'Rating': 8.3},
 {'Title': 'The Dark Knight',
  'Poster_URL': 'https://image.tmdb.org/t/p/w500/qJ2tW6WMUDux911r6m7haRef0WH.jpg',
  'Genres': 'Action,Crime,Drama,Thriller',
  'Rating': 8.9},
 {'Title': 'Batman Returns',
  'Poster_URL': 'https://image.tmdb.org/t/p/w500/jKBjeXM7iBBV9UkUcOXx3m7FSHY.jpg',
  'Genres': 'Action,Crime,Fantasy',
  'Rating': 8.1},
 {'Title': 'Batman v Superman: Dawn of Justice',
  'Poster_URL': 'https://image.tmdb.org/t/p/w500/5UsK3grJvtQrtzEgqNlDljJW96w.jpg',
  'Genres': 'Action,Adventure,Sci-Fi',
  'Rating': 7.0},
 {'Title': 'Batman Forever',
  'Poster_URL': 'https://image.tmdb.org/t/p/w500/mzzNBVwTiiY94xAXDMWJpNPW2US.jpg',
  'Genres': 'Action,Adventure',
  'Rating': 6.8},
 {'Title': 'The LEGO Batman Movie',
  'Poster_URL': 'https://image.tmdb.org/t/p/w500/snGwr2gag4Fcgx2OGmH9otl6ofW.jpg',
  'Genres': 'Animation,Acti

In [9]:

# ==========================
# 5. Enhanced Gradio Interface
# ==========================
def format_recommendations(recommendations):
    if isinstance(recommendations, str):
        # If an error message is returned
        return f"<p style='color: red; text-align: center;'>{recommendations}</p>"
    else:
        # Create HTML content with movie titles and posters
        html_content = "<div style='display: flex; flex-wrap: wrap; gap: 20px; justify-content: center;'>"
        for movie in recommendations:
            title = movie['Title']
            poster = movie['Poster_URL']
            genres = movie['Genres']
            rating = movie['Rating']
            movie_card = f"""
            <div class='movie-card' style='width: 200px; text-align: center;'>
                <img src='{poster}' alt='{title}' style='width: 100%; height: auto; border-radius: 8px; box-shadow: 0 4px 8px rgba(0,0,0,0.2);'>
                <h3 style='font-size: 1.1em; margin-top: 10px;'>{title}</h3>
                <p style='color: #555;'>{genres}</p>
                <p style='color: #777;'>Rating: {rating}</p>
            </div>
            """
            html_content += movie_card
        html_content += "</div>"
        return html_content

#Testing
#format_recommendations(get_recommendations('The Dark Knight Rises'))

In [10]:
# Get list of all unique movie titles for autocomplete
all_titles = df['Title'].unique().tolist()
all_titles[:10]

['Dekalog (1988)',
 'Three Colors: Red',
 'The Conformist',
 'Tokyo Story',
 'The Leopard (re-release)',
 'The Godfather',
 'Boyhood',
 'Lawrence of Arabia (re-release)',
 'Fanny and Alexander (re-release)',
 'Playtime']

In [11]:
import gradio as gr
# Define the Gradio Blocks interface
with gr.Blocks(css="""
    body {
        background-color: #f0f2f5;
        font-family: Arial, sans-serif;
    }
    .header {
        text-align: center;
        padding: 20px;
    }
    .recommendation-section {
        margin-top: 20px;
    }
    .movie-card {
        transition: transform 0.2s;
    }
    .movie-card:hover {
        transform: scale(1.05);
    }
    .gradio-container {
        max-width: 1200px;
        margin: auto;
    }
""") as demo:
    # Header
    gr.Markdown("<h1 style='text-align: center; color: #333;'>🎬 Movie Recommender System</h1>")
    gr.Markdown("<p style='text-align: center; color: #555;'>Enter a movie title to receive top 10 recommendations based on similarity.</p>")

    # Input Section
    with gr.Row():
        input_movie = gr.Textbox(
            lines=1,
            placeholder="Enter a movie title...",
            label="Movie Title",
            interactive=True
        )
        recommend_btn = gr.Button("Get Recommendations", variant="primary")

    # Output Section
    with gr.Row():
        recommendations_output = gr.HTML(label="Recommended Movies")

    # Event Handling
    recommend_btn.click(
        fn=lambda title: format_recommendations(get_recommendations(title)),
        inputs=input_movie,
        outputs=recommendations_output
    )

    # Footer
    gr.Markdown("<p style='text-align: center; margin-top: 40px; color: #888;'>© 2024 MovieRecommender Inc.</p>")

# Launch the app
demo.launch(share=False)

Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.


