In [1]:
import pandas as pd

### 1 - Original data

In [3]:
# Dataset
df_food_delivery = pd.read_csv("dados/dataset.csv")

In [4]:
# Shape
df_food_delivery.shape

(260645, 7)

In [5]:
# Head
df_food_delivery.head()

Unnamed: 0,id_transacao,horario_pedido,localidade,nome_item,quantidade_item,latitude,longitude
0,0x7901ee,2019-01-16 18:33:00,7,bebida,2,41.794132,-88.01014
1,0x7901ee,2019-01-16 18:33:00,7,pizza,2,41.794132,-88.01014
2,0x7901ee,2019-01-16 18:33:00,7,sobremesa,2,41.794132,-88.01014
3,0x12b47f,2019-09-04 12:36:00,3,salada,1,41.88449,-87.627059
4,0x12b47f,2019-09-04 12:36:00,3,sobremesa,1,41.88449,-87.627059


In [7]:
# Unique
df_food_delivery.nunique()

id_transacao       100000
horario_pedido      76799
localidade              9
nome_item               4
quantidade_item         5
latitude                9
longitude               9
dtype: int64

### 2 - Data pivot

In [8]:
# Create a pivot table
df_pivot = df_food_delivery.pivot_table(index = ["id_transacao"], columns = ["nome_item"], values = "quantidade_item")

In [9]:
# Replace possible NA values generated in pivot, by 0 and transform the index into column
df_pivot = df_pivot.fillna(0).reset_index()

In [11]:
type(df_pivot)

pandas.core.frame.DataFrame

In [12]:
df_pivot.columns

Index(['id_transacao', 'bebida', 'pizza', 'salada', 'sobremesa'], dtype='object', name='nome_item')

In [13]:
df_pivot.head()

nome_item,id_transacao,bebida,pizza,salada,sobremesa
0,0x10000a,0.0,1.0,0.0,1.0
1,0x100058,0.0,2.0,0.0,2.0
2,0x1000c8,4.0,4.0,1.0,5.0
3,0x10014c,0.0,1.0,0.0,1.0
4,0x1001d8,3.0,3.0,0.0,3.0


In [14]:
df_pivot.shape

(100000, 5)

In [15]:
df_pivot.nunique()

nome_item
id_transacao    100000
bebida               6
pizza                6
salada               6
sobremesa            5
dtype: int64

In [16]:
# Cannot have null values
df_pivot.isnull().sum()

nome_item
id_transacao    0
bebida          0
pizza           0
salada          0
sobremesa       0
dtype: int64

### 3 - Merge column

In [18]:
# Merge "localidade" column to df_pivot2
df_pivot2 = df_pivot.merge(df_food_delivery[['id_transacao', 'localidade']])

In [19]:
df_pivot2.head()

Unnamed: 0,id_transacao,bebida,pizza,salada,sobremesa,localidade
0,0x10000a,0.0,1.0,0.0,1.0,9
1,0x10000a,0.0,1.0,0.0,1.0,9
2,0x100058,0.0,2.0,0.0,2.0,6
3,0x100058,0.0,2.0,0.0,2.0,6
4,0x1000c8,4.0,4.0,1.0,5.0,9


In [20]:
df_pivot2.nunique()

id_transacao    100000
bebida               6
pizza                6
salada               6
sobremesa            5
localidade           9
dtype: int64

### End