###  Focos principais — otimização de vendas, projeção de ROI e recomendação de produtos — no projeto da empresa DataWise Solutions

In [1]:
# Coleta e Processamento de Dados
# Vamos carregar o arquivo CSV fornecido para verificar sua estrutura e conteúdo.
# importando as bibliotecas

import pandas as pd

In [16]:
# Carregando os dados
data = pd.read_csv('ifood_df.csv')

In [4]:
# Vamos visualizar as primeiras linhas para entender a estrutura dos dados
data.head(10)

Unnamed: 0,Income,Kidhome,Teenhome,Recency,MntWines,MntFruits,MntMeatProducts,MntFishProducts,MntSweetProducts,MntGoldProds,...,marital_Together,marital_Widow,education_2n Cycle,education_Basic,education_Graduation,education_Master,education_PhD,MntTotal,MntRegularProds,AcceptedCmpOverall
0,58138.0,0,0,58,635,88,546,172,88,88,...,0,0,0,0,1,0,0,1529,1441,0
1,46344.0,1,1,38,11,1,6,2,1,6,...,0,0,0,0,1,0,0,21,15,0
2,71613.0,0,0,26,426,49,127,111,21,42,...,1,0,0,0,1,0,0,734,692,0
3,26646.0,1,0,26,11,4,20,10,3,5,...,1,0,0,0,1,0,0,48,43,0
4,58293.0,1,0,94,173,43,118,46,27,15,...,0,0,0,0,0,0,1,407,392,0
5,62513.0,0,1,16,520,42,98,0,42,14,...,1,0,0,0,0,1,0,702,688,0
6,55635.0,0,1,34,235,65,164,50,49,27,...,0,0,0,0,1,0,0,563,536,0
7,33454.0,1,0,32,76,10,56,3,1,23,...,0,0,0,0,0,0,1,146,123,0
8,30351.0,1,0,19,14,0,24,3,3,2,...,1,0,0,0,0,0,1,44,42,0
9,5648.0,1,1,68,28,0,6,1,1,13,...,1,0,0,0,0,0,1,36,23,1


In [5]:
# Primeiro, vamos fazer uma análise descritiva básica de algumas variáveis-chave.
descritiva = data[['Income', 'MntWines', 'MntMeatProducts', 'MntGoldProds', 'Recency']].describe()

In [6]:
# Calculando a correlação entre algumas variáveis para entender suas relações.
correlacao = data[['Income', 'MntWines', 'MntMeatProducts', 'MntGoldProds', 'Recency']].corr()

In [7]:
descritiva, correlacao

(              Income     MntWines  MntMeatProducts  MntGoldProds      Recency
 count    2205.000000  2205.000000      2205.000000   2205.000000  2205.000000
 mean    51622.094785   306.164626       165.312018     44.057143    49.009070
 std     20713.063826   337.493839       217.784507     51.736211    28.932111
 min      1730.000000     0.000000         0.000000      0.000000     0.000000
 25%     35196.000000    24.000000        16.000000      9.000000    24.000000
 50%     51287.000000   178.000000        68.000000     25.000000    49.000000
 75%     68281.000000   507.000000       232.000000     56.000000    74.000000
 max    113734.000000  1493.000000      1725.000000    321.000000    99.000000,
                    Income  MntWines  MntMeatProducts  MntGoldProds   Recency
 Income           1.000000  0.730495         0.702500      0.417653  0.006716
 MntWines         0.730495  1.000000         0.593119      0.390194  0.016470
 MntMeatProducts  0.702500  0.593119         1.000000 

In [8]:
# Vamos criar novos recursos para entender melhor o comportamento dos clientes.
# Renda per Capita: Renda dividida pelo total de pessoas na casa (adultos + crianças)

data['Renda_per_Capita'] = data['Income'] / (data['Kidhome'] + data['Teenhome'] + 1)

In [9]:
# Taxa de Engajamento: Proporção do total gasto em produtos (MntTotal) em relação à renda total,
# para entender o quanto o cliente está engajado com a marca.

data['Taxa_Engajamento'] = data['MntTotal'] / data['Income']

In [10]:
# Dias desde a Última Compra: Categorizar 'Recency'

conditions = [
    (data['Recency'] <= 30),
    (data['Recency'] > 30) & (data['Recency'] <= 60),
    (data['Recency'] > 60)
]
choices = ['recente', 'moderado', 'distante']
data['Dias_Ultima_Compra'] = pd.Categorical(pd.cut(data['Recency'], bins=[-1, 30, 60, 100], labels=choices))

In [14]:
# Visualizar as primeiras linhas com os novos recursos  criados
# Esses novos recursos podem ajudar a revelar padrões mais profundos nos dados e aprimorar a modelagem.

data[['Renda_per_Capita', 'Taxa_Engajamento', 'Dias_Ultima_Compra']].head(10)

Unnamed: 0,Renda_per_Capita,Taxa_Engajamento,Dias_Ultima_Compra
0,58138.0,0.026299,moderado
1,15448.0,0.000453,moderado
2,71613.0,0.01025,recente
3,13323.0,0.001801,recente
4,29146.5,0.006982,distante
5,31256.5,0.01123,recente
6,27817.5,0.01012,moderado
7,16727.0,0.004364,moderado
8,15175.5,0.00145,recente
9,1882.666667,0.006374,distante
