In [48]:
import pandas as pd
import numpy as np
from ast import literal_eval
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import gradio as gr
import os

print("Starting application...")

try:
    # Check if files exist in current directory
    if not os.path.exists('tmdb_5000_movies.csv') or not os.path.exists('tmdb_5000_credits.csv'):
        raise FileNotFoundError("CSV files not found in current directory")

    print("Loading datasets...")
    # Load datasets from current directory
    movies = pd.read_csv('tmdb_5000_movies.csv')
    credits = pd.read_csv('tmdb_5000_credits.csv')
    print("Datasets loaded successfully")

    # Merge datasets on 'id'
    credits.rename(columns={'movie_id': 'id'}, inplace=True)
    movies_data = movies.merge(credits, on='id')

    # Store original titles before cleaning
    original_titles = movies_data['original_title'].copy()

    # Drop unnecessary columns
    movies_data.drop(['title_x', 'title_y', 'spoken_languages'], axis=1, inplace=True)
    movies_data.rename(columns={'original_title': 'title'}, inplace=True)

    print("Processing features...")
    # Convert stringified features into Python objects
    features = ['keywords', 'cast', 'crew', 'genres', 'production_companies']
    for feature in features:
        movies_data[feature] = movies_data[feature].apply(literal_eval)

    # Extract director's name
    def get_director(crew):
        for member in crew:
            if member['job'] == 'Director':
                return member['name']
        return np.nan

    # Extract top 10 elements from a list
    def get_list(elements):
        if isinstance(elements, list):
            names = [element['name'] for element in elements]
            return names[:10] if len(names) > 10 else names
        return []

    movies_data['director'] = movies_data['crew'].apply(get_director)
    for feature in ['cast', 'keywords', 'genres', 'production_companies']:
        movies_data[feature] = movies_data[feature].apply(get_list)

    # Clean and preprocess text data
    def clean_data(text):
        if isinstance(text, list):
            return [str.lower(item.replace(" ", "")) for item in text]
        elif isinstance(text, str):
            return str.lower(text.replace(" ", ""))
        else:
            return ''

    # Store cleaned titles separately and keep original ones
    movies_data['clean_title'] = movies_data['title'].apply(clean_data)
    
    for feature in ['overview', 'cast', 'keywords', 'director', 'genres', 'production_companies']:
        movies_data[feature] = movies_data[feature].apply(clean_data)

    print("Creating feature soup...")
    # Create a 'soup' of features using cleaned data
    def create_soup(features):
        return ' '.join(features['clean_title']) + ' ' + ' '.join(features['overview']) + ' ' + ' '.join(features['keywords']) + ' ' + ' '.join(features['cast']) + ' ' + features['director'] + ' ' + ' '.join(features['genres'])

    movies_data['soup'] = movies_data.apply(create_soup, axis=1)

    print("Creating vectorizer...")
    # Vectorize the 'soup' using CountVectorizer
    count_vectorizer = CountVectorizer(stop_words='english')
    count_matrix = count_vectorizer.fit_transform(movies_data['soup'])

    print("Computing similarity matrix...")
    # Compute cosine similarity between movies
    cosine_sim = cosine_similarity(count_matrix, count_matrix)

    # Reset index and create a reverse mapping of indices and movie titles
    movies_data = movies_data.reset_index()
    indices = pd.Series(movies_data.index, index=movies_data['clean_title']).drop_duplicates()

    def get_recommendations(title="", overview="", cast="", director=""):
        try:
            # Handle empty inputs
            if not any([title, overview, cast, director]):
                return "Please provide at least one input field."
            
            # Construct query
            query_parts = []
            if title:
                query_parts.append(str.lower(title.replace(" ", "")))
            if overview:
                query_parts.append(str.lower(overview.replace(" ", "")))
            if cast:
                query_parts.extend([str.lower(member.strip().replace(" ", "")) for member in cast.split(",")])
            if director:
                query_parts.append(str.lower(director.replace(" ", "")))
            
            query = " ".join(query_parts)
            
            # Transform query using the same vectorizer
            query_vec = count_vectorizer.transform([query])
            
            # Calculate similarity scores
            sim_scores = cosine_similarity(query_vec, count_matrix).flatten()
            
            # Get top 10 similar movies
            movie_indices = sim_scores.argsort()[::-1][:10]
            
            # Get the original titles of recommended movies
            recommended_movies = original_titles.iloc[movie_indices].tolist()
            
            # Format the output
            return "\n".join(recommended_movies)
        except Exception as e:
            return f"Error generating recommendations: {str(e)}"

    print("Creating Gradio interface...")
    # Define the Gradio interface
    iface = gr.Interface(
        fn=get_recommendations,
        inputs=[
            gr.Textbox(label="Movie Title"),
            gr.Textbox(label="Plot Overview"),
            gr.Textbox(label="Cast (comma-separated)"),
            gr.Textbox(label="Director")
        ],
        outputs=gr.Textbox(label="Recommended Movies"),
        title="Movie Recommendation System",
        description="Enter movie details to get recommendations. At least one field is required."
    )

    # Launch the interface with specific server settings
    if __name__ == "__main__":
        print("Launching interface...")
        iface.launch(
            server_name="0.0.0.0",  # Listen on all network interfaces
            server_port=7860,        # Use Gradio's default port
            share=True,              # Enable sharing
            debug=True
        )

except FileNotFoundError as e:
    print(f"Error: {e}")
    print("Please ensure 'tmdb_5000_movies.csv' and 'tmdb_5000_credits.csv' are in the same directory as this script.")
except Exception as e:
    print(f"An unexpected error occurred: {e}")

ImportError: cannot import name 'deprecated' from 'typing_extensions' (/Users/charansaikondapaneni/opt/anaconda3/lib/python3.9/site-packages/typing_extensions.py)