<a href="https://colab.research.google.com/github/benasphy/Recommendation_System_Neo4j/blob/main/Recommendation_System_Neo4j.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
from neo4j import GraphDatabase
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import LabelEncoder
import numpy as np

# Use the new Neo4j URI
NEO4J_URI = "neo4j+s://e3c132c9.databases.neo4j.io"
NEO4J_USER = "neo4j"  # Replace with your Neo4j username
NEO4J_PASSWORD = "c4GoxNxc7ZU4sv8reohTvAh9RH3rf1GXXmielB2TZps"  # Replace with your Neo4j password

# Connect to Neo4j
driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))

def fetch_data():
    query = """
    MATCH (p:Person)-[w:WATCHED]->(m:Movie)
    RETURN p.name AS user, m.title AS movie, w.rating AS rating
    """
    with driver.session() as session:
        result = session.run(query)
        data = [{"user": record["user"], "movie": record["movie"], "rating": record["rating"]} for record in result]
    return pd.DataFrame(data)

df = fetch_data()

# Initialize the LabelEncoder and fit it to the user names
label_encoder = LabelEncoder()
label_encoder.fit(df['user'].unique())

# Encode user names
df['user'] = label_encoder.transform(df['user'])

# Encode movie names
label_encoder_movie = LabelEncoder()
df['movie'] = label_encoder_movie.fit_transform(df['movie'])

# Prepare features and labels
X = df[['user', 'movie']]
y = df['rating']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Naive Bayes model
model = GaussianNB()
model.fit(X_train, y_train)

# Predict ratings
y_pred = model.predict(X_test)

# Calculate RMSE
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
print(f"RMSE: {rmse}")

# Recommend for a user
def recommend_for_user(user_name, model, df, label_encoder, label_encoder_movie):
    user_id = label_encoder.transform([user_name])[0]  # Encode the user name

    # Identify unseen movies
    user_movies = df[df['user'] == user_id]['movie']
    all_movies = df['movie'].unique()
    unseen_movies = pd.DataFrame({'movie': [m for m in all_movies if m not in user_movies]})
    unseen_movies['user'] = user_id

    # Predict ratings for unseen movies
    unseen_movies['predicted_rating'] = model.predict(unseen_movies[['user', 'movie']])
    recommendations = unseen_movies.sort_values(by='predicted_rating', ascending=False)

    # Decode movie titles
    recommendations['movie_title'] = label_encoder_movie.inverse_transform(recommendations['movie'])

    return recommendations[['movie_title', 'predicted_rating']]

def collaborative_filtering(user_name, df, label_encoder, label_encoder_movie):
    user_id = label_encoder.transform([user_name])[0]

    # Find movies watched by the user
    user_movies = df[df['user'] == user_id]['movie']

    # Find other users who watched the same movies
    similar_users = df[df['movie'].isin(user_movies) & (df['user'] != user_id)]['user'].unique()

    # Find movies watched by similar users
    similar_users_movies = df[df['user'].isin(similar_users)]['movie'].unique()

    # Exclude movies already watched by the user
    recommendations = [m for m in similar_users_movies if m not in user_movies.values]

    # Prepare recommendation DataFrame
    recommended_movies = pd.DataFrame({'movie': recommendations})
    recommended_movies['movie_title'] = label_encoder_movie.inverse_transform(recommended_movies['movie'])

    return recommended_movies['movie_title'].tolist()

# Example recommendations for a user
user_name = "Alice"  # Replace with the user you want to get recommendations for

# Model-based recommendations
model_recommendations = recommend_for_user(user_name, model, df, label_encoder, label_encoder_movie)
print(f"Model-based Top Recommendations for {user_name}:")
print(model_recommendations.head())

# Collaborative filtering recommendations
cf_recommendations = collaborative_filtering(user_name, df, label_encoder, label_encoder_movie)
print(f"Collaborative Filtering Recommendations for {user_name}:")
print(cf_recommendations)

# Function to generate Cypher query for Neo4j
def generate_recommendation_query(user_name, recommendations):
    queries = []
    for movie_title in recommendations:
        query = f"""
        MATCH (u:Person {{name: '{user_name}'}})
        MATCH (m:Movie {{title: '{movie_title}'}})
        MERGE (u)-[:RECOMMENDED]->(m)
        """
        queries.append(query)
    return queries

# Write collaborative filtering recommendations to Neo4j
cf_queries = generate_recommendation_query(user_name, cf_recommendations)

# Execute the recommendation queries
def execute_queries(queries):
    with driver.session() as session:
        for query in queries:
            session.run(query)

execute_queries(cf_queries)
print(f"Collaborative Filtering Recommendations for {user_name} have been written to Neo4j.")


RMSE: 0.7071067811865476
Model-based Top Recommendations for Alice:
    movie_title  predicted_rating
0    The Matrix                 4
1       Titanic                 4
2  The Notebook                 4
Collaborative Filtering Recommendations for Alice:
['Interstellar']
Collaborative Filtering Recommendations for Alice have been written to Neo4j.
