<a href="https://colab.research.google.com/github/ewertonUrso/99/blob/main/fast_shop.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd

##############################
# 1. Importing data about SALES
f = '/content/drive/MyDrive/Fast Shop/fast_shop_vendas.csv'
sales = pd.read_csv(f)

# Changing the type of data
sales['data'] = pd.to_datetime(sales['data'])
sales['dia_venda'] = pd.to_datetime(sales['data']).dt.day.astype('string')
sales['mes_venda'] = pd.to_datetime(sales['data']).dt.year.astype('string') + '-' + pd.to_datetime(sales['data']).dt.month.astype('string')
sales['produto'] = sales['produto'].astype('string')
sales['linha'] = sales['linha'].astype('string')
sales['vendedor_id'] = sales['vendedor_id'].astype('string')
sales['cliente_id'] = sales['cliente_id'].astype('string')
sales['valor'] = sales['valor'].str.replace(',', '.').astype('float')
sales['margem'] = sales['margem'].str.replace(',', '.').astype('float')
sales['custo'] = sales['custo'].str.replace(',', '.').astype('float')
sales['ped_app'] = sales['ped_app'].astype('bool')
sales['categoria'] = sales['categoria'].astype('string')
sales['nf'] = sales['nf'].astype('string')

# Removing the data with the year equal to 1900 and also year equal to 2029
sales = sales[sales['data'] >'1900-01-01']
sales = sales[sales['data'] < '2020-02-28']

# Sorting the dataset by sale's date variable
sales = sales.sort_values('data')

# Removing sales made in January and February
# because these months are not complete
sales = sales[sales['data'] <= '2019-12-31']

##############################
# 2. Importing data about SELLERS
# The original data there're sellers with the observation duplicated,
# but the repetion was deleted
f = '/content/drive/MyDrive/Fast Shop/fast_shop_vendedores.csv'
sellers = pd.read_csv(f)

# Changing the type of data
sellers['vendedor_id'] = sellers['vendedor_id'].astype('string')
sellers['filial'] = sellers['filial'].astype('string')
sellers['sexo'] = sellers['sexo'].astype('string')

# Renaming columns
sellers = sellers.rename(columns = {'sexo' : 'sexo_vendedor'})

##############################
# 3. Importing data about BRANCHES
# The original data brings branches duplicated and with different states.
# To excel this issue the solution applied was look the state of the customers
# and choose the state based on the state of the branches' customers
f = '/content/drive/MyDrive/Fast Shop/fast_shop_filiais.csv'
branches = pd.read_csv(f)

# Changing the type of data
branches['filial'] = branches['filial'].astype('string')
branches['cod_uf'] = branches['cod_uf'].astype('string')

##############################
# 4. Importing data about CUSTOMERS
f = '/content/drive/MyDrive/Fast Shop/fast_shop_clientes.csv'
customers = pd.read_csv(f)

# Changing the type of data
customers['cliente_id'] = customers['cliente_id'].astype('string')
customers['sexo'] = customers['sexo'].astype('string')
customers['idade'] = customers['idade'].astype('int64')
customers['cod_uf'] = customers['cod_uf'].astype('string')

# Renaming columns
customers = customers.rename(columns = {'sexo' : 'sexo_cliente',
                                        'idade' : 'idade_cliente'})

##############################
# 5. Importing data about UF
f = '/content/drive/MyDrive/Fast Shop/fast_shop_uf.csv'
uf = pd.read_csv(f)

# Changing the type of data
uf['cod_uf'] = uf['cod_uf'].astype('string')
uf['Unidade'] = uf['Unidade'].astype('string')
uf['UF'] = uf['UF'].astype('string')
uf['Região'] = uf['Região'].astype('string')
uf['Região'] = uf['Região'].str.replace('Região ', '')

# Renaming columns
uf = uf.rename(columns = {'Unidade' : 'unidade',
                          'Região' : 'região'})

##############################
# Merging datasets
d = sales.merge(sellers, on = 'vendedor_id') \
      .merge(customers, on = 'cliente_id') \
      .merge(uf, on = 'cod_uf')
d.shape
d[d['filial'] == '18'].head()

##############################
# Exporting data
f = '/content/drive/MyDrive/Fast Shop/fast_shop_data.csv'
d.to_csv(path_or_buf = f, sep = ',', index = False)
