1️⃣ Carregar os dados do Neo4j

In [None]:
import pandas as pd
from py2neo import Graph

graph = Graph("bolt://localhost:7687", auth=("neo4j", "password"))

query = """
MATCH (u:User)-[r:RATED]->(m:Movie)
RETURN u.id AS user_id, m.id AS movie_id, r.score AS rating
"""
data = graph.run(query).to_data_frame()


2️⃣ Preparar os dados para Machine Learning

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Codificar IDs para numéricos
encoder_user = LabelEncoder()
encoder_movie = LabelEncoder()

data["user_id"] = encoder_user.fit_transform(data["user_id"])
data["movie_id"] = encoder_movie.fit_transform(data["movie_id"])

# Separar features e target
X = data[["user_id", "movie_id"]]
y = data["rating"]

# Dividir em treino e teste
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


3️⃣ Treinar um modelo de Regressão Random Forest

In [None]:
from sklearn.ensemble import RandomForestRegressor

model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)


4️⃣ Avaliar a performance do modelo

In [None]:
from sklearn.metrics import mean_squared_error

y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"Erro Quadrático Médio (MSE): {mse:.2f}")


5️⃣ Salvar o modelo treinado

In [None]:
import joblib

joblib.dump(model, "models/recommender.pkl")
