In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
from datetime import datetime
from sklearn.preprocessing import LabelEncoder, MinMaxScaler

from src.config import DADOS_BRUTOS, DADOS_TRATADOS
from src.auxiliares_ml import downcast_dataframe

In [2]:
# Carregar os dados

df = pd.read_parquet(DADOS_TRATADOS)

In [3]:
# Filtrar colunas relevantes

df = df[['Customer Name', 'Product ID', 'Product Name', 'Sales', 'Category', 'Sub-Category']]

In [4]:
# Codificar os nomes dos clientes e IDs dos produtos

customer_encoder = LabelEncoder()
product_encoder = LabelEncoder()
category_encoder = LabelEncoder()
subcategory_encoder = LabelEncoder()

df['Customer ID Enc'] = customer_encoder.fit_transform(df['Customer Name'])
df['Product ID Enc'] = product_encoder.fit_transform(df['Product ID'])
df['Category Enc'] = category_encoder.fit_transform(df['Category'])
df['Sub-Category Enc'] = subcategory_encoder.fit_transform(df['Sub-Category'])

In [5]:
# Normalizar as vendas

scaler = MinMaxScaler()
df['Sales Normalized'] = scaler.fit_transform(df[['Sales']])

In [6]:
# Criar um conjunto de dados TensorFlow

customer_ids = df['Customer ID Enc'].values
product_ids = df['Product ID Enc'].values
category_ids = df['Category Enc'].values
subcategory_ids = df['Sub-Category Enc'].values
sales = df['Sales Normalized'].values

In [7]:
# Definir dimensões dos embeddings

num_customers = len(customer_encoder.classes_)
num_products = len(product_encoder.classes_)
num_categories = len(category_encoder.classes_)
num_subcategories = len(subcategory_encoder.classes_)
embedding_dim = 16

In [8]:
# Criando os embeddings

customer_embeddings = tf.keras.layers.Embedding(input_dim=num_customers, output_dim=embedding_dim)
product_embeddings = tf.keras.layers.Embedding(input_dim=num_products, output_dim=embedding_dim)
category_embeddings = tf.keras.layers.Embedding(input_dim=num_categories, output_dim=embedding_dim)
subcategory_embeddings = tf.keras.layers.Embedding(input_dim=num_subcategories, output_dim=embedding_dim)

In [9]:
# Definição do modelo

customer_input = tf.keras.layers.Input(shape=(1,))
product_input = tf.keras.layers.Input(shape=(1,))
category_input = tf.keras.layers.Input(shape=(1,))
subcategory_input = tf.keras.layers.Input(shape=(1,))

customer_vec = tf.keras.layers.Flatten()(customer_embeddings(customer_input))
product_vec = tf.keras.layers.Flatten()(product_embeddings(product_input))
category_vec = tf.keras.layers.Flatten()(category_embeddings(category_input))
subcategory_vec = tf.keras.layers.Flatten()(subcategory_embeddings(subcategory_input))

In [10]:
# Concatenar todas as informações

concat_vec = tf.keras.layers.Concatenate()([customer_vec, product_vec, category_vec, subcategory_vec])

In [11]:
# Camadas densas para aprender padrões

dense_1 = tf.keras.layers.Dense(64, activation='relu')(concat_vec)
dense_2 = tf.keras.layers.Dense(32, activation='relu')(dense_1)
output = tf.keras.layers.Dense(1, activation='linear')(dense_2)

model = tf.keras.Model([customer_input, product_input, category_input, subcategory_input], output)
model.compile(optimizer='adam', loss='mse')

In [12]:
# Treinar o modelo

model.fit([customer_ids, product_ids, category_ids, subcategory_ids], sales, epochs=20, batch_size=32, verbose=1)

Epoch 1/20
[1m307/307[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - loss: 9.4769e-04
Epoch 2/20
[1m307/307[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 5.6021e-04
Epoch 3/20
[1m307/307[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 2.3102e-04
Epoch 4/20
[1m307/307[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 1.8066e-04
Epoch 5/20
[1m307/307[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 1.0529e-04
Epoch 6/20
[1m307/307[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 8.4916e-05
Epoch 7/20
[1m307/307[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 5.7225e-05
Epoch 8/20
[1m307/307[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 4.0977e-05
Epoch 9/20
[1m307/307[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 3.0727e-05
Epoch 10/20
[1m307/307[0m [32m━━━━━━━━━━━━━━━━━━━━[

<keras.src.callbacks.history.History at 0x2c9585ebe10>

### NOTICE:

"Let's use the already trained model to generate the best recommendations for the customer, **Irene Maddox."**

In [14]:
# Obter o ID da cliente "Irene Maddox"

irene_id = customer_encoder.transform(['Irene Maddox'])[0]

In [15]:
# Criar recomendações para Irene

product_scores = model.predict([
    np.full((num_products,), irene_id),
    np.arange(num_products),
    np.zeros(num_products),  # Categoria neutra
    np.zeros(num_products)   # Subcategoria neutra
])

[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step


In [16]:
# Obter os 7 melhores produtos recomendados

recommended_product_ids = np.argsort(product_scores.flatten())[::-1][:7]
recommended_products = product_encoder.inverse_transform(recommended_product_ids)

In [17]:
# Criar um DataFrame para exibir as recomendações

recommended_df = df[df['Product ID'].isin(recommended_products)][['Product ID', 'Product Name', 'Category', 'Sub-Category']].drop_duplicates()
recommended_df = recommended_df.set_index('Product ID').loc[recommended_products].reset_index()

recommended_df = pd.DataFrame({
    'Product ID': recommended_products
}).merge(df[['Product ID', 'Product Name', 'Category', 'Sub-Category']], on='Product ID', how='left').drop_duplicates()

recommended_df.insert(0, 'Ranking', range(1, len(recommended_df) + 1))

### NOTICE 2:

Here are the top seven recommendations for **Irene Maddox."**

In [19]:
# Dataframe com as top 7 recomendações para "Irene Maddox"

recommended_df

Unnamed: 0,Ranking,Product ID,Product Name,Category,Sub-Category
0,1,TEC-MA-10002412,Cisco TelePresence System EX90 Videoconferenci...,Technology,Machines
1,2,TEC-CO-10004722,Canon imageCLASS 2200 Advanced Copier,Technology,Copiers
6,3,OFF-BI-10001120,Ibico EPK-21 Electric Binding System,Office Supplies,Binders
9,4,TEC-AC-10003870,Logitech Z-906 Speaker sys - home theater - 5....,Technology,Accessories
13,5,TEC-PH-10002885,Apple iPhone 5,Technology,Phones
19,6,OFF-SU-10000151,High Speed Automatic Electric Letter Opener,Office Supplies,Supplies
22,7,FUR-CH-10004086,Hon 4070 Series Pagoda Armless Upholstered Sta...,Furniture,Chairs


### NOTICE 3:

Creating a function to generalize recommendations for any chosen customer.

In [21]:
# Função para determinar as melhores 7 recomendações para o cliente escolhido 

def recomendar_produtos(nome_cliente, df, model, customer_encoder, product_encoder, num_products):
    """
    Retorna os 7 melhores produtos recomendados para um cliente específico.

    Parâmetros:
    - nome_cliente (str): Nome do cliente para gerar recomendações.
    - df (DataFrame): DataFrame original contendo os dados.
    - model (tf.keras.Model): Modelo treinado para recomendações.
    - customer_encoder (LabelEncoder): Encoder dos nomes dos clientes.
    - product_encoder (LabelEncoder): Encoder dos produtos.
    - num_products (int): Número total de produtos.

    Retorno:
    - DataFrame com os 7 produtos recomendados.
    """
    try:
        cliente_id = customer_encoder.transform([nome_cliente])[0]
    except ValueError:
        # Cliente não encontrado, atribuímos um ID fictício (-1)
        cliente_id = -1

    if cliente_id == -1:
        # Cliente novo → Recomendação baseada nos produtos mais vendidos
        produtos_mais_vendidos = (
            df.groupby("Product ID")["Sales"]
            .sum()
            .reset_index()
            .sort_values(by="Sales", ascending=False)
            .head(7)
        )
        recommended_products = product_encoder.inverse_transform(produtos_mais_vendidos["Product ID"].values)
    else:
        # Cliente conhecido → Prever pontuação para todos os produtos
        product_scores = model.predict([
            np.full((num_products,), cliente_id),
            np.arange(num_products),
            np.zeros(num_products),
            np.zeros(num_products)
        ])

        # Obter os 7 melhores produtos recomendados
        recommended_product_ids = np.argsort(product_scores.flatten())[::-1][:7]
        recommended_products = product_encoder.inverse_transform(recommended_product_ids)

    # Criar um DataFrame com as recomendações
    recommended_df = pd.DataFrame({'Product ID': recommended_products})
    recommended_df = recommended_df.merge(
        df[['Product ID', 'Product Name', 'Category', 'Sub-Category']],
        on='Product ID',
        how='left'
    ).drop_duplicates()

    recommended_df.insert(0, 'Ranking', range(1, len(recommended_df) + 1))

    return recommended_df

### NOTICE 4:

Testing the function for the customer **"Darrin Van Huff."**

In [23]:
# Utilizando a função

resultado = recomendar_produtos("Darrin Van Huff", df, model, customer_encoder, product_encoder, num_products)

print(50*'___')
print("Recomendações sugeridas:")
print(50*'___')


[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
______________________________________________________________________________________________________________________________________________________
Recomendações sugeridas:
______________________________________________________________________________________________________________________________________________________


In [24]:
# Dataframe com as top 7 recomendações para "Darrin Van Huff"

resultado

Unnamed: 0,Ranking,Product ID,Product Name,Category,Sub-Category
0,1,TEC-MA-10002412,Cisco TelePresence System EX90 Videoconferenci...,Technology,Machines
1,2,TEC-CO-10004722,Canon imageCLASS 2200 Advanced Copier,Technology,Copiers
6,3,OFF-BI-10001120,Ibico EPK-21 Electric Binding System,Office Supplies,Binders
9,4,OFF-SU-10000151,High Speed Automatic Electric Letter Opener,Office Supplies,Supplies
12,5,TEC-PH-10002885,Apple iPhone 5,Technology,Phones
18,6,OFF-AP-10000275,Sanyo Counter Height Refrigerator with Crisper...,Office Supplies,Appliances
21,7,OFF-BI-10000545,GBC Ibimaster 500 Manual ProClick Binding System,Office Supplies,Binders


### CONCLUSION:

We now have a versatile and scalable model that can provide the best recommendations for any selected customer.