# Combining All Methods

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer

In [None]:
# Function to fetch movie data from TMDb
def fetch_movie_data(movie_title, api_key):
    base_url = 'https://api.themoviedb.org/3'
    search_url = f'{base_url}/search/movie'
    params = {
        'api_key': api_key,
        'query': movie_title
    }

    response = requests.get(search_url, params=params)
    if response.status_code == 200:
        data = response.json()
        return data.get('results', [])
    else:
        print(f"Error fetching movie data: {response.status_code}")
        return []

In [None]:
# Function to scrape IMDb movie reviews
def scrape_imdb_reviews(imdb_url):
    page = requests.get(imdb_url)
    soup = BeautifulSoup(page.content, 'html.parser')
    reviews = []
    for review in soup.find_all('div', class_='text show-more__control'):
        text = review.get_text()
        reviews.append(text)
    return reviews

In [None]:
# Function to preprocess text
def preprocess_text(text):
    stop_words = set(stopwords.words('english'))
    stemmer = PorterStemmer()
    words = word_tokenize(text)
    words = [word.lower() for word in words if word.isalpha() and word.lower() not in stop_words]
    words = [stemmer.stem(word) for word in words]
    return ' '.join(words)

In [None]:
# Main function
def main():
    # Replace 'YOUR_TMDB_API_KEY' with your actual TMDb API key
    tmdb_api_key = '703a12952f3292e9958bc4d2d904a228'
    
    movie_title = 'Inception'  # Replace with the movie title you want to search for

In [None]:
# Fetch movie data from TMDb
    movie_data = fetch_movie_data(movie_title, tmdb_api_key)

    if not movie_data:
        print(f"No movie found for '{movie_title}'")
        return

In [None]:
    # Get the first movie's description from TMDb data
    movie_description = movie_data[0]['overview']

In [None]:
    # Scrape IMDb movie reviews for the same movie
    imdb_url = 'https://www.imdb.com/title/' + movie_data[0]['id']
    imdb_reviews = scrape_imdb_reviews(imdb_url)

In [None]:
    # Preprocess IMDb reviews and movie description
    preprocessed_reviews = [preprocess_text(review) for review in imdb_reviews]
    preprocessed_description = preprocess_text(movie_description)

In [None]:
    # Create TF-IDF vectors for reviews and movie description
    vectorizer = TfidfVectorizer()
    tfidf_reviews = vectorizer.fit_transform(preprocessed_reviews)
    tfidf_description = vectorizer.transform([preprocessed_description])

In [None]:
    # Calculate cosine similarity between IMDb reviews and movie description
    cosine_similarities = cosine_similarity(tfidf_reviews, tfidf_description)

In [None]:
    # Perform sentiment analysis on IMDb reviews using a Naive Bayes classifier
    # Sample labels (you can label the reviews according to your own criteria)
    labels = ["Positive", "Negative", "Positive", "Negative", "Positive"]

In [None]:
    # Split data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(imdb_reviews, labels, test_size=0.2, random_state=42)

In [None]:
    # Create TF-IDF vectors for reviews
    tfidf_vectorizer = TfidfVectorizer()
    X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
    X_test_tfidf = tfidf_vectorizer.transform(X_test)

In [None]:
    # Train a Naive Bayes classifier
    clf = MultinomialNB()
    clf.fit(X_train_tfidf, y_train)
    y_pred = clf.predict(X_test_tfidf)

In [None]:
    # Calculate accuracy
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Naive Bayes Accuracy: {accuracy:.2f}")

In [None]:
    # Output recommendations based on cosine similarity and sentiment analysis
    print("Movie Description:")
    print(movie_description)
    print("\nIMDb Reviews:")
    for i, review in enumerate(imdb_reviews):
        print(f"Review {i+1}: {review} (Sentiment: {labels[i]})")
    print("\nCosine Similarity Scores:")
    for i, similarity_score in enumerate(cosine_similarities):
        print(f"Review {i+1}: {similarity_score[0]:.4f} (Sentiment: {labels[i]})")

if __name__ == "__main__":
    main()