In [None]:
from lightfm import LightFM
from lightfm.data import Dataset
import numpy as np
import pandas as pd
from scipy import sparse
from sklearn.feature_extraction.text import TfidfVectorizer

# 1. Preparação dos dados
def prepare_features(treino_df, itens_df):
    # Criar features do usuário
    user_features = treino_df.select(
        'userId',
        'userType',
        'historySize',
        'timeOnPageHistory',
        'scrollPercentageHistory'
    )
    
    # Criar features do item
    vectorizer = TfidfVectorizer(max_features=1000)
    content_features = vectorizer.fit_transform(itens_df.select('title', 'body'))
    
    # Adicionar feature temporal
    current_time = datetime.now()
    itens_df = itens_df.withColumn(
        'days_since_published',
        datediff(current_time, col('issued'))
    )
    
    return user_features, content_features

# 2. Treinamento do modelo
def train_model(interactions, user_features, item_features):
    model = LightFM(
        learning_rate=0.05,
        loss='warp',
        random_state=42,
        user_alpha=1e-6,
        item_alpha=1e-6
    )
    
    model.fit(
        interactions,
        user_features=user_features,
        item_features=item_features,
        epochs=30,
        num_threads=4
    )
    
    return model

# 3. Avaliação do modelo
def evaluate_model(model, test_interactions, user_features, item_features):
    precision = lightfm.evaluation.precision_at_k(
        model,
        test_interactions,
        user_features=user_features,
        item_features=item_features,
        k=10
    ).mean()
    
    return precision