In [202]:
import sys
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
import pymysql
import scipy.sparse as sps

from lightfm.data import Dataset
from lightfm import LightFM
from lightfm.evaluation import auc_score
from lightfm.cross_validation import random_train_test_split

In [203]:
db_connection_str = 'mysql+pymysql://root:123@db/invest'
db_connection = create_engine(db_connection_str)

df_clientes = pd.read_sql('SELECT * FROM cli_Clientes', con=db_connection)
df_cliente_produto = pd.read_sql('SELECT * FROM cli_Produtos', con=db_connection)
df_produtos = pd.read_sql('SELECT * FROM Produtos', con=db_connection)

In [204]:
df_cliente_produto.sample(2)

Unnamed: 0,userid,produtoid,valorrendimento,dataefetivacao
2553,486,804,1168.4,2019-08-14
7367,2555,6085,,2019-11-28


In [205]:
# removing rows with userid = 0

df_cliente_produto = df_cliente_produto[df_cliente_produto['userid'] > 0]

In [206]:
# create mapping dictionaries

user_id = list(df_clientes['userid'].values)

user_dict = {}
counter = 0 
for i in user_id:
    user_dict[i] = counter
    counter += 1
    
product_id = list(df_produtos['dataId'].values)

product_dict = {}
counter = 0 
for i in product_id:
    product_dict[i] = counter
    counter += 1

In [207]:
# creating interactions matrix

user_item_matrix = sps.lil_matrix((len(df_clientes), len(df_produtos)), dtype=np.int8)

for i in range(len(df_cliente_produto)):
    sys.stdout.write(
        "\rProcessing row " + str(i) + "/ " + str(df_cliente_produto.shape[0]))
    sys.stdout.flush()
    user = df_cliente_produto['userid'][i]
    product = df_cliente_produto['produtoid'][i]
    ui, pi = user_dict[user], product_dict[product]
    user_item_matrix[ui, pi] = 1

Processing row 48332/ 48333

In [208]:
# splitting into train and test

interactions_train, interactions_test = random_train_test_split(
    user_item_matrix, 
    test_percentage=0.2, 
    random_state=np.random.RandomState(2020)
)

In [209]:
# training model

model = LightFM(
    no_components=10,
    learning_rate=0.05,
    loss='warp',
    random_state=2019)

model = model.fit(interactions_train,
                  epochs=10,
                  num_threads=16, verbose=True)

Epoch 0
Epoch 1
Epoch 2
Epoch 3
Epoch 4
Epoch 5
Epoch 6
Epoch 7
Epoch 8
Epoch 9


In [213]:
# evaluating model

auc_score(model, interactions_test, train_interactions=interactions_train, num_threads=4).mean()

0.9892913

In [211]:
def get_recommendations(user_id, interactions_matrix, user_dict, user_products, df_produtos):
    n_users, n_items = interactions_matrix.shape
    user_x = user_dict[user_id]
    scores = pd.Series(model.predict(user_x, np.arange(n_items)))  
    
    # remove from score the products the user already have
    scores = scores.drop(labels=user_products)
    recommendations = list(pd.Series(scores.sort_values(ascending=False).index))
    
    return df_produtos.iloc[recommendations[0:5]]

In [212]:
# get recommendation for user_id 100
user_id = 100

# get user products
user_products = df_cliente_produto.groupby('userid').get_group(user_id)['produtoid'].values

get_recommendations(user_id, user_item_matrix, user_dict, user_products, df_produtos)

Unnamed: 0,dataId,AdministradorFundo,AtivoDisponivel,Carencia,CarteiraCNPJ,CNPJAdministrador,DataDeVencimento,DescontoIR,DescricaoAtivo,DescricaoCarencia,...,TipoCarteira,TipoCota,TipoIndexador,TipoRegimeCondomonioCVM,Tributacao,ValorMinimoAplicacaoAdicional,ValorMinimoAplicaInicial,ValorMinimoResgate,ValorMinimo,Vencimento
5972,5972,SANTANDER SECURITIES SERVICES BRASIL DTVM S.A.,1,,30.353.590/0001-05,03.502.968/0001-04,,22.5,O FUNDO tem por objetivo investir em ativos fi...,Qualquer dia,...,Fundo de Renda Fixa,Abertura,,Aberto,Longo prazo,30.0,30.0,30.0,,
6085,6085,SANTANDER SECURITIES SERVICES BRASIL DTVM S.A.,1,,30.509.107/0001-20,03.502.968/0001-04,,22.5,A Carteira SAM PRIVATE PI MULTIMERCADO CP FI n...,Qualquer dia,...,Fundo Multimercado,Fechamento,,Aberto,Longo prazo,30.0,30.0,30.0,,
5386,5386,SANTANDER SECURITIES SERVICES BRASIL DTVM S.A.,1,,09.216.026/0001-46,03.502.968/0001-04,,22.5,O FUNDO tem por objetivo investir em ativos fi...,Qualquer dia,...,FIC de Fundo de Renda Fixa,Fechamento,,Aberto,Longo prazo,30.0,30.0,30.0,,
6082,6082,Santander Securities Services Brtasil DTVM SA,1,,30.509.143/0001-94,03.502.968/0001-04,,22.5,A Carteira TAG EQUILÍBRIO PI WEALTH FIC FIM CP...,Qualquer dia,...,FIC de Fundo Multimercado,Fechamento,,Aberto,Longo prazo,100.0,100.0,100.0,,
2965,2965,BEM - DISTRIBUIDORA DE TÍTULOS E VALORES\nMOBI...,1,,15.586.797/0001-28,03.502.968/0001-04,,22.5,A exposição dos ativos ao risco depende da Dat...,Qualquer dia,...,FIC de Fundo Multimercado,Fechamento,,Aberto,Longo prazo,500.0,500.0,500.0,,
