In [2]:
import kagglehub
import shutil
import os
import pandas as pd
import dash
from dash import dcc, html
import plotly.express as px

  from .autonotebook import tqdm as notebook_tqdm


# Dashboard

In [4]:
data_path = "./data"

customers_dataset = pd.read_csv(f"{data_path}/olist_customers_dataset.csv")

geolocation_dataset = pd.read_csv(f"{data_path}/olist_geolocation_dataset.csv")

orders_dataset = pd.read_csv(f"{data_path}/olist_orders_dataset.csv")

order_items_dataset = pd.read_csv(f"{data_path}/olist_order_items_dataset.csv")

order_payments_dataset = pd.read_csv(f"{data_path}/olist_order_payments_dataset.csv")

order_reviews_dataset = pd.read_csv(f"{data_path}/olist_order_reviews_dataset.csv")

products_dataset = pd.read_csv(f"{data_path}/olist_products_dataset.csv")

sellers_dataset = pd.read_csv(f"{data_path}/olist_sellers_dataset.csv")

### 1. Frequency of each order status

In [5]:
order_status_counts = orders_dataset["order_status"].value_counts().reset_index()
order_status_counts.columns = ["Status", "Count"]
order_status_counts

Unnamed: 0,Status,Count
0,delivered,96478
1,shipped,1107
2,canceled,625
3,unavailable,609
4,invoiced,314
5,processing,301
6,created,5
7,approved,2


In [6]:
fig_orders = px.bar(
    order_status_counts, 
    x="Status", 
    y="Count", 
    title="📦 Order Status in E-Commerce", 
    text="Count",
    color="Status", 
    color_discrete_sequence=px.colors.qualitative.Set2,
)

fig_orders.update_traces(
    marker=dict(line=dict(width=1, color="black"))
)

fig_orders.update_layout(
    title_font_size=20, 
    xaxis_title="Order Status",
    yaxis_title="Order count",
    xaxis_tickangle=-30, 
    template="plotly_white",
)

fig_orders

### 2. Frequency of each payment type

In [7]:
payment_type_counts = order_payments_dataset["payment_type"].value_counts().reset_index()
payment_type_counts.columns = ["Payment type", "Count"]
payment_type_counts

Unnamed: 0,Payment type,Count
0,credit card,76795
1,boleto,19784
2,voucher,5775
3,debit card,1529
4,not defined,3


In [8]:
fig_payments = px.bar(
    payment_type_counts, 
    x="Payment type", 
    y="Count", 
    title="💳 Payment Types in E-Commerce", 
    text="Count",
    color="Payment type",
    color_discrete_sequence=px.colors.qualitative.Set1,
)

fig_payments.update_traces(
    marker=dict(line=dict(width=1, color="black"))
)

fig_payments.update_layout(
    title_font_size=20,
    xaxis_title="Payment Type",
    yaxis_title="Payment Count",
    xaxis_tickangle=-30,
    template="plotly_white",
)

fig_payments

### 3. Monthly purchase period

In [9]:
orders_dataset["order_purchase_timestamp"] = pd.to_datetime(orders_dataset["order_purchase_timestamp"])
orders_dataset["Year-month"] = orders_dataset["order_purchase_timestamp"].dt.to_period("M")

orders_by_month = orders_dataset["Year-month"].value_counts().sort_index().reset_index()
orders_by_month["Year-month"] = orders_by_month["Year-month"].astype(str)
orders_by_month.columns = ["Year-month", "Count"]
orders_by_month.head(5)

Unnamed: 0,Year-month,Count
0,2016-09,4
1,2016-10,324
2,2016-12,1
3,2017-01,800
4,2017-02,1780


In [10]:
fig_timestamp = px.line(
    orders_by_month, 
    x="Year-month", 
    y="Count", 
    title="📅 Monthly Order Count", 
    labels={"Year-month": "Period", "Count": "Quantity of Orders"}, 
    markers=True,
    line_shape="linear", 
    color_discrete_sequence=["#1f77b4"],
)

fig_timestamp.update_layout(
    title_font_size=20,
    xaxis_title="Period", 
    yaxis_title="Quantity of Orders",
    xaxis_tickangle=-45,  
    template="plotly_white",  
    plot_bgcolor="rgba(0,0,0,0)",  
    showlegend=False
)

fig_timestamp.update_xaxes(showgrid=True)
fig_timestamp.update_yaxes(showgrid=True)

fig_timestamp

### 4. Price value with freight

In [11]:
df_orders_items = order_items_dataset.merge(products_dataset, on="product_id")
df_orders_items["total_price"] = df_orders_items["price"] + df_orders_items["freight_value"]

revenue_by_category = df_orders_items.groupby("product_category_name")["total_price"].sum().reset_index()
top_categories = revenue_by_category.nlargest(10, "total_price")
top_categories.columns = ["Product category", "Total price"]
top_categories

Unnamed: 0,Product category,Total price
11,beleza saude,1441248.07
66,relogios presentes,1305541.61
13,cama mesa banho,1241681.72
32,esporte lazer,1156656.48
44,informatica acessorios,1059272.4
54,moveis decoracao,902511.79
72,utilidades domesticas,778397.77
26,cool stuff,719329.95
8,automotivo,685384.32
40,ferramentas jardim,584219.21


In [12]:
fig_product_category = px.bar(
    top_categories, 
    x="Total price", 
    y="Product category", 
    title="💰 Top 10 Product Categories by Revenue", 
    labels={"Total price": "Revenue", "Product category": "Category"},
    orientation="h",
    text="Total price",
    color="Product category",
    color_discrete_sequence=px.colors.qualitative.Set3,
)

fig_product_category.update_layout(
    title_font_size=20,
    xaxis_title="Count",
    yaxis_title="Category",
    template="plotly_white",
    plot_bgcolor="rgba(0,0,0,0)",
    xaxis_tickangle=0,
    showlegend=False
)

fig_product_category.update_traces(
    textposition="outside",
    marker=dict(line=dict(width=1, color="black"))
)

fig_product_category

### 5. Average value for each type of payment

In [13]:
df_payments = order_payments_dataset.merge(orders_dataset, on="order_id")
avg_ticket = df_payments.groupby("payment_type")["payment_value"].mean().reset_index()
avg_ticket.columns = ["Payment type", "Mean value"]
avg_ticket = avg_ticket.sort_values(by="Mean value", ascending=False)
avg_ticket

Unnamed: 0,Payment type,Mean value
1,credit card,163.319021
0,boleto,145.034435
2,debit card,142.57017
4,voucher,65.703354
3,not defined,0.0


In [14]:
fig_avg_ticket = px.bar(
    avg_ticket, 
    x="Payment type", 
    y="Mean value", 
    title="💳 Average Ticket by Payment Type",
    labels={"payment_type": "Payment Type", "payment_value": "Mean Value"},
    text_auto=True,
)

fig_avg_ticket.update_traces(
    marker=dict(line=dict(width=1, color="black"))
)

fig_avg_ticket.update_layout(
    title_font_size=20,
    xaxis_title="Payment Type",
    yaxis_title="Mean Value",
    xaxis_tickangle=-30,
    template="plotly_white",
)

fig_avg_ticket

### 6. Product Categories by Revenue

In [15]:
df_orders_items = order_items_dataset.merge(products_dataset, on="product_id")
df_orders_items["total_price"] = df_orders_items["price"] + df_orders_items["freight_value"]

revenue_by_category = df_orders_items.groupby("product_category_name")["total_price"].sum().reset_index()
top_categories = revenue_by_category.nlargest(10, "total_price")
top_categories.columns = ["Product category", "Total price"]

top_categories

Unnamed: 0,Product category,Total price
11,beleza saude,1441248.07
66,relogios presentes,1305541.61
13,cama mesa banho,1241681.72
32,esporte lazer,1156656.48
44,informatica acessorios,1059272.4
54,moveis decoracao,902511.79
72,utilidades domesticas,778397.77
26,cool stuff,719329.95
8,automotivo,685384.32
40,ferramentas jardim,584219.21


In [16]:
fig_top_categories = px.bar(
    top_categories, 
    x="Product category", 
    y="Total price", 
    title="💵 Top 10 Product Categories by Revenue", 
    labels={"Product category": "Category", "Total price": "Revenue"},
    text="Total price",
    color="Product category",
    color_discrete_sequence=px.colors.qualitative.Set3,
)

fig_top_categories.update_layout(
    title_font_size=20,
    xaxis_tickangle=-45,
    template="plotly_white",
    xaxis_title="Category",
    yaxis_title="Revenue",
    plot_bgcolor="rgba(0,0,0,0)",
)

fig_top_categories.update_traces(
    marker=dict(line=dict(width=1, color="black")),
    textposition="outside"
)

fig_top_categories

## -------------------------------------------------------------------

## Dash development

In [17]:
app = dash.Dash(__name__)

app.layout = html.Div([
    html.H1("E-Commerce Dashboard"),
    dcc.Graph(figure=fig_orders),
    dcc.Graph(figure=fig_payments),
    dcc.Graph(figure=fig_timestamp),
    dcc.Graph(figure=fig_product_category),
    dcc.Graph(figure=fig_avg_ticket),
    dcc.Graph(figure=fig_top_categories)
])

if __name__ == "__main__":
    from threading import Timer
    import webbrowser

    def open_browser():
        webbrowser.open_new("http://127.0.0.1:8050/")

    Timer(1, open_browser).start()
    app.run_server(debug=False)