## Geração dos DataSets de Clientes, Produtos e Interações

In [1]:
! pip install Faker



### Importação da bibliotecas

In [3]:
import string
import random
from faker import Faker
from uuid import uuid4

import pandas as pd
import numpy as np

### Variaveis de configurações

In [9]:
fake = Faker('pt-BR')

number_of_rows = 100000
VIEWED = 1

columns_customers = ['uuid', 'name']
columns_products = ['sku', 'title', 'description', 'price']
columns_interactions = ['customer_id', 'product_id', 'action_id','timestamp']

### Gerando DataSet de Clientes

In [3]:
df_customers = pd.DataFrame(
    columns=columns_customers,
    data=[
        [str(uuid4()), fake.name()]
        for _ in np.arange(0, number_of_rows / 100)
    ]
)

# Persistindo dataset
df_customers.to_csv('customers.csv')

### Gerando DataSet de Produtos

In [10]:
df_products = pd.DataFrame(
    columns=columns_products,
    data=[
        [
            ''.join(np.random.choice(list(string.hexdigits), 8)),
            fake.sentence(),
            fake.text(),
            random.randint(1, 1000)
        ] for _id in np.arange(0, number_of_rows / 1000)
    ]
)
# Persistindo dataset
df_products.to_csv('products.csv')
df_products.to_json('products.json', orient='records', indent=4)

### Gerando DataSet de Interações

In [6]:
df_customers = pd.read_csv('customers.csv')
df_products = pd.read_csv('products.csv')

df_sample_customers = df_customers.sample(frac=0.7).reset_index(drop=True)

df_interactions = pd.DataFrame(
    columns=columns_interactions,
    data=[
        dict(
            customer_id=df_sample_customers.sample().iloc[0]['uuid'],
            product_id=df_products.sample().iloc[0]['sku'],
            action_id=VIEWED,
            timestamp=fake.date_time_between(start_date='-1d', end_date='now').timestamp()
        ) for _ in np.arange(0, number_of_rows)]
)

# Persistindo dataset
df_interactions.to_csv('interactions.csv')