In [3]:
import numpy as np
import pandas as pd
import ast
import re
import spacy
import wordninja
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import TruncatedSVD
from sklearn.metrics.pairwise import cosine_similarity
import joblib
from flask import Flask, request, jsonify

# Load data
movies = pd.read_csv('tmdb_5000_movies.csv')
credits = pd.read_csv('tmdb_5000_credits.csv')

# Merge data
movies = movies.merge(credits, on='title')
movies = movies[['movie_id', 'title', 'overview', 'genres', 'keywords', 'cast', 'crew']]

# Convert columns
def convert(text):
    L = []
    for i in ast.literal_eval(text):
        L.append(i['name']) 
    return L

movies.dropna(inplace=True)
movies['genres'] = movies['genres'].apply(convert)
movies['keywords'] = movies['keywords'].apply(convert)

def convert_cast(text):
    L = []
    counter = 0
    if isinstance(text, list):
        for i in text:
            if counter < 3:
                L.append(i)
                counter += 1
    return L

movies['cast'] = movies['cast'].apply(convert_cast)
movies['cast'] = movies['cast'].apply(lambda x: x[0:3])

def fetch_director(text):
    L = []
    for i in ast.literal_eval(text):
        if i['job'] == 'Director':
            L.append(i['name'])
    return L 

movies['crew'] = movies['crew'].apply(fetch_director)

def collapse(L):
    L1 = []
    for i in L:
        L1.append(i.replace(" ",""))
    return L1

movies['cast'] = movies['cast'].apply(collapse)
movies['crew'] = movies['crew'].apply(collapse)
movies['genres'] = movies['genres'].apply(collapse)
movies['keywords'] = movies['keywords'].apply(collapse)

movies['overview'] = movies['overview'].apply(lambda x: x.split())
movies['tags'] = movies['overview'] + movies['genres'] + movies['keywords'] + movies['cast'] + movies['crew']
new = movies.drop(columns=['overview', 'genres', 'keywords', 'cast', 'crew'])
new['tags'] = new['tags'].apply(lambda x: " ".join(x))

# Text cleaning and preprocessing
def clean_text(text):
    return re.sub(r'\s+', '', text)

new['tags'] = new['tags'].apply(clean_text)

def reinsert_spaces(text):
    text = re.sub(r'(?<!^)(?<!\s)(?<!\d)(?<![a-z])(?=[A-Z])', ' ', text)
    pattern = '|'.join(f'(?<!\w){word}(?!\w)' for word in common_words)
    text = re.sub(pattern, r' \0 ', text)
    text = re.sub(r'\s+', ' ', text).strip()
    return text

common_words = ['the', 'and', 'a', 'in', 'of', 'to', 'is', 'with', 'on', 'for', 'when', 'who', 'which', 'where', 'how', 'why', 'as', 'it', 'at', 'an', 'by', 'or', 'from', 'that', 'this', 'but', 'was', 'has', 'been', 'will', 'shall', 'may', 'could', 'would', 'might', 'can']
new['tags'] = new['tags'].apply(reinsert_spaces)

# TF-IDF Vectorizer
tfidf = TfidfVectorizer(max_features=5000, stop_words='english')
tfidf_vector = tfidf.fit_transform(new['tags']).toarray()

# Matrix factorization with SVD
svd = TruncatedSVD(n_components=100)
svd_matrix = svd.fit_transform(tfidf_vector)
svd_similarity = cosine_similarity(svd_matrix)

# Recommendation Functions
def recommend(movie):
    index = new[new['title'] == movie].index[0]
    distances = sorted(list(enumerate(svd_similarity[index])), reverse=True, key=lambda x: x[1])
    for i in distances[1:6]:
        print(new.iloc[i[0]].title)

def hybrid_recommend(movie, alpha=0.5):
    index = new[new['title'] == movie].index[0]
    content_distances = sorted(list(enumerate(svd_similarity[index])), reverse=True, key=lambda x: x[1])
    collab_distances = sorted(list(enumerate(svd_similarity[index])), reverse=True, key=lambda x: x[1])
    combined_scores = {}
    for i, score in content_distances[1:]:
        combined_scores[i] = combined_scores.get(i, 0) + alpha * score
    for i, score in collab_distances[1:]:
        combined_scores[i] = combined_scores.get(i, 0) + (1 - alpha) * score
    sorted_scores = sorted(combined_scores.items(), key=lambda x: x[1], reverse=True)
    for i in sorted_scores[:5]:
        print(new.iloc[i[0]].title)

# Flask API
app = Flask(__name__)

@app.route('/recommend', methods=['GET'])
def recommend_route():
    movie = request.args.get('movie')
    if movie in new['title'].values:
        recommendations = recommend(movie)
        return jsonify({'recommendations': recommendations})
    else:
        return jsonify({'error': 'Movie not found'}), 404

if __name__ == '__main__':
    app.run(debug=True)

# Save models
joblib.dump(tfidf, 'tfidf_vectorizer.pkl')
joblib.dump(svd, 'svd_model.pkl')

print("Models saved and server started.")


 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
 * Restarting with watchdog (windowsapi)


SystemExit: 1

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
