In [2]:
import threading
from flask import Flask, request, jsonify
from flask_cors import CORS
import pandas as pd
from scipy.sparse import csr_matrix
from sklearn.metrics.pairwise import cosine_similarity
import time

# Load data
ratings = pd.read_csv('ratings.csv')
movies = pd.read_csv('movies.csv')

# Merge ratings and movies data
data = pd.merge(ratings, movies, on='movieId')

# Sample data: select popular movies and active users
top_movies = data['movieId'].value_counts().nlargest(500).index
top_users = data['userId'].value_counts().nlargest(500).index
data_sampled = data[data['movieId'].isin(top_movies) & data['userId'].isin(top_users)]

# Create user-item rating matrix and convert to sparse matrix
user_movie_matrix = data_sampled.pivot_table(index='userId', columns='title', values='rating').fillna(0)
user_movie_matrix_sparse = csr_matrix(user_movie_matrix.values)

# Initialize Flask app
app = Flask(__name__)
CORS(app)  # Enable CORS

# Recommendation system API: Accepts a movie title and returns similar movies
@app.route('/recommend', methods=['GET'])
def recommend():
    start_time = time.time()  # Record start time
    movie_title = request.args.get('movie_title')
    
    if movie_title not in user_movie_matrix.columns:
        return jsonify({"error": "Movie not found in dataset"})
    
    # Call the function to get similar movies
    similar_movies = get_similar_movies(movie_title, user_movie_matrix)
    
    # Calculate response time
    response_time = time.time() - start_time
    print(f"Response Time: {response_time:.4f} seconds")
    
    return jsonify({"recommended_movies": similar_movies, "response_time": response_time})

# Function to find similar movies based on cosine similarity
def get_similar_movies(movie_title, user_movie_matrix, top_n=5):
    movie_vector = user_movie_matrix[movie_title].values.reshape(1, -1)
    similarity_scores = cosine_similarity(movie_vector, user_movie_matrix.values.T)[0]
    similarity_df = pd.DataFrame(similarity_scores, index=user_movie_matrix.columns, columns=['similarity'])
    similar_movies = similarity_df.sort_values(by='similarity', ascending=False).iloc[1:top_n+1]
    return similar_movies.index.tolist()

# Function to run the Flask app in a separate thread
def run_app():
    app.run(debug=False, use_reloader=False)

# Start the Flask app in a background thread
thread = threading.Thread(target=run_app)
thread.start()


 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit


In [3]:
from sklearn.model_selection import train_test_split

# Split into training and testing sets
train_data, test_data = train_test_split(data_sampled, test_size=0.2, random_state=42)

# Calculate precision and recall based on test data
def evaluate_recommendations(test_data, user_movie_matrix, top_n=5):
    relevant_count = 0  # Count of relevant recommended movies
    retrieved_count = 0  # Total recommended movies
    total_relevant_count = 0  # Total liked movies

    for user_id in test_data['userId'].unique():
        user_ratings = test_data[test_data['userId'] == user_id]
        liked_movies = user_ratings[user_ratings['rating'] >= 4.0]['title'].tolist()

        if liked_movies:
            # Use the first liked movie as a reference to generate recommendations
            movie_title = liked_movies[0]
            if movie_title in user_movie_matrix.columns:
                recommended_movies = get_similar_movies(movie_title, user_movie_matrix, top_n=top_n)

                # Count relevant movies
                relevant_count += len(set(recommended_movies) & set(liked_movies))
                retrieved_count += len(recommended_movies)
                total_relevant_count += len(liked_movies)

    # Calculate precision and recall
    precision = relevant_count / retrieved_count if retrieved_count else 0
    recall = relevant_count / total_relevant_count if total_relevant_count else 0
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")

# Run the evaluation function
evaluate_recommendations(test_data, user_movie_matrix)


Precision: 0.1261
Recall: 0.0176


In [5]:
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

# Split into training and testing sets
train_data, test_data = train_test_split(data_sampled, test_size=0.2, random_state=42)

# Calculate precision and recall for different values of top_n
def evaluate_recommendations(test_data, user_movie_matrix, max_n=10):
    precision_values = []
    recall_values = []
    n_values = list(range(1, max_n + 1))

    for top_n in n_values:
        relevant_count = 0  # Count of relevant recommended movies
        retrieved_count = 0  # Total recommended movies
        total_relevant_count = 0  # Total liked movies

        for user_id in test_data['userId'].unique():
            user_ratings = test_data[test_data['userId'] == user_id]
            liked_movies = user_ratings[user_ratings['rating'] >= 4.0]['title'].tolist()

            if liked_movies:
                # Use the first liked movie as a reference to generate recommendations
                movie_title = liked_movies[0]
                if movie_title in user_movie_matrix.columns:
                    recommended_movies = get_similar_movies(movie_title, user_movie_matrix, top_n=top_n)

                    # Count relevant movies
                    relevant_count += len(set(recommended_movies) & set(liked_movies))
                    retrieved_count += len(recommended_movies)
                    total_relevant_count += len(liked_movies)

        # Calculate precision and recall
        precision = relevant_count / retrieved_count if retrieved_count else 0
        recall = relevant_count / total_relevant_count if total_relevant_count else 0
        precision_values.append(precision)
        recall_values.append(recall)

    return n_values, precision_values, recall_values


In [None]:
# Run the evaluation function and get results
n_values, precision_values, recall_values = evaluate_recommendations(test_data, user_movie_matrix, max_n=10)

# Plot precision and recall
plt.figure(figsize=(10, 6))
plt.plot(n_values, precision_values, marker='o', label='Precision')
plt.plot(n_values, recall_values, marker='s', label='Recall')

# Add labels and title
plt.xlabel('Top N Recommendations')
plt.ylabel('Score')
plt.title('Precision and Recall vs. Top N Recommendations')
plt.legend()
plt.grid(True)
plt.show()
