In [1]:
# importing the necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import geopandas as gpd
import plotly.express as px
import dash
from dash import dcc, html

In [2]:
df = pd.read_csv(r"C:\Users\user\Desktop\#\DA\practice\python\E - commerce\cleaned_data sets\merged_data.csv", low_memory=False)

In [3]:
# changing all the columns to apporiate data type to enhance the visualisation
df['order_id'] = df['order_id'].astype(str)  # Identifier columns
df['customer_id'] = df['customer_id'].astype(str)
df['order_status'] = df['order_status'].astype(str)  # Categorical/text data
df['order_purchase_timestamp'] = pd.to_datetime(df['order_purchase_timestamp'], errors='coerce')  # DateTime
df['order_approved_at'] = pd.to_datetime(df['order_approved_at'], errors='coerce')  # DateTime
df['order_delivered_carrier_date'] = pd.to_datetime(df['order_delivered_carrier_date'], errors='coerce')  # DateTime
df['order_delivered_customer_date'] = pd.to_datetime(df['order_delivered_customer_date'], errors='coerce')  # DateTime
df['order_estimated_delivery_date'] = pd.to_datetime(df['order_estimated_delivery_date'], errors='coerce')  # DateTime
df['order_item_id'] = pd.to_numeric(df['order_item_id'], errors='coerce')  # Integer
df['product_id'] = df['product_id'].astype(str)  # Identifier
df['seller_id'] = df['seller_id'].astype(str)  # Identifier
df['shipping_limit_date'] = pd.to_datetime(df['shipping_limit_date'], errors='coerce')  # DateTime
df['price'] = pd.to_numeric(df['price'], errors='coerce')  # Numeric
df['freight_value'] = pd.to_numeric(df['freight_value'], errors='coerce')  # Numeric
df['product_category_name'] = df['product_category_name'].astype(str)  # Categorical/text data
df['product_weight_g'] = pd.to_numeric(df['product_weight_g'], errors='coerce')  # Numeric
df['customer_unique_id'] = df['customer_unique_id'].astype(str)  # Identifier
df['customer_zip_code_prefix'] = df['customer_zip_code_prefix'].astype(str)  # Categorical/text data
df['customer_city'] = df['customer_city'].astype(str)  # Categorical/text data
df['customer_state'] = df['customer_state'].astype(str)  # Categorical/text data
df['payment_sequential'] = pd.to_numeric(df['payment_sequential'], errors='coerce')  # Numeric
df['payment_type'] = df['payment_type'].astype(str)  # Categorical/text data
df['payment_installments'] = pd.to_numeric(df['payment_installments'], errors='coerce')  # Numeric
df['payment_value'] = pd.to_numeric(df['payment_value'], errors='coerce')  # Numeric
df['seller_zip_code_prefix'] = df['seller_zip_code_prefix'].astype(str)  # Categorical/text data
df['seller_city'] = df['seller_city'].astype(str)  # Categorical/text data
df['seller_state'] = df['seller_state'].astype(str)  # Categorical/text data
df['product_category_name_eng'] = df['product_category_name_eng'].astype(str)  # Categorical/text data


In [4]:
# Remove unwanted values from the payment_type column
df = df[~df['payment_type'].isin(['not_defined', 'UNKNOWN'])]

# Alternatively, you can use boolean indexing
# df = df[(df['payment_type'] != 'not_defined') & (df['payment_type'] != 'UNKNOWN')]


In [6]:
df['payment_type'].unique()

array(['credit_card', 'voucher', 'boleto', 'debit_card'], dtype=object)

In [15]:

# Initialize the Dash app
app = dash.Dash(__name__)

def update_layout(fig):
    """Update layout for consistency across figures."""
    fig.update_xaxes(showgrid=False, zeroline=False)
    fig.update_yaxes(showgrid=False, zeroline=False)
    fig.update_layout(
        plot_bgcolor='white',
        paper_bgcolor='white',
        font=dict(family="Arial", size=12)
    )
    return fig

# Custom function to format the sales values with a dollar sign
def format_sales(value):
    return f"${value:,.2f}"

# Define the dashboard layout
app.layout = html.Div(style={'fontFamily': 'Arial'}, children=[
    html.Div([
        html.H1("Baana Group Sales Dashboard", style={'textAlign': 'center', 'color': '#000', 'backgroundColor': '#fff', 'padding': '10px'}),
    ]),
    
    html.Div(style={'padding': '20px'}, children=[
        html.H2("Sales Performance Analysis", style={'textAlign': 'left'}),
        dcc.Graph(id='daily-sales', figure=update_layout(px.line(sales_performance_daily, x='order_purchase_timestamp', y='total_sales', title='Total Sales by Day', markers=True))),
        dcc.Graph(id='monthly-sales', figure=update_layout(px.line(sales_performance_monthly, x='order_purchase_timestamp', y='total_sales', title='Total Sales by Month', markers=True))),
        dcc.Graph(id='quarterly-sales', figure=update_layout(px.line(sales_performance_quarterly.sort_values('order_purchase_timestamp'), x='order_purchase_timestamp', y='total_sales', title='Total Sales by Quarter', markers=True))),
        dcc.Graph(id='yearly-sales', figure=update_layout(px.line(sales_performance_yearly.sort_values('order_purchase_timestamp'), x='order_purchase_timestamp', y='total_sales', title='Total Sales by Year', markers=True))),
        dcc.Graph(id='top-10-categories', figure=update_layout(px.bar(top_10_categories, x='product_category_name', y='total_revenue', title='Top 10 Product Categories', color='total_revenue'))),
        dcc.Graph(id='top-10-categories-orders', figure=update_layout(px.bar(top_10_categories_orders, x='product_category_name', y='order_count', title='Top 10 Categories by Order Count', color='order_count'))),
        html.P(f"Average Order Value (AOV): ${aov:.2f}", style={'fontSize': '20px'}),
    ]),

    html.Div(style={'padding': '20px'}, children=[
        html.H2("Payment Methods Analysis", style={'textAlign': 'left'}),
        dcc.Graph(id='payment-distribution', figure=update_layout(px.pie(payment_distribution, names='payment_type', values='count', title='Payment Method Distribution', hole=0.3))),
    ]),

    html.Div(style={'padding': '20px'}, children=[
        html.H2("Order Fulfillment and Delivery Efficiency", style={'textAlign': 'left'}),
        html.P(f"Average Delivery Time: {average_delivery_time:.2f} days", style={'fontSize': '20px'}),
        html.P(f"Percentage of Delayed Orders: {delayed_orders_percentage:.2f}%", style={'fontSize': '20px'}),
    ]),

    html.Div(style={'padding': '20px'}, children=[
        html.H2("Geographic Insights", style={'textAlign': 'left'}),
        dcc.Graph(id='sales-by-states', figure=update_layout(px.treemap(state_revenue, path=['customer_state'], values='total_revenue', title='Sales by State').update_traces(textinfo='label+value', texttemplate='%{label}: $%{value:,.2f}'))),
        dcc.Graph(id='top-10-states', figure=update_layout(px.bar(top_10_states, x='customer_state', y='total_revenue', title='Top 10 States by Revenue', color='total_revenue'))),
    ]),

    html.Div(style={'padding': '20px'}, children=[
        html.H2("Sales by Day of the Week", style={'textAlign': 'left'}),
        dcc.Graph(id='sales-by-day', figure=update_layout(px.bar(sales_by_day.reset_index(), x='order_day_of_week', y='total_sales', title='Sales by Day of the Week', color='total_sales'))),
    ]),

    html.Div(style={'padding': '20px'}, children=[
        html.H2("Advanced Metrics", style={'textAlign': 'left'}),
        dcc.Graph(
            id='orders-vs-payment',
            figure=update_layout(px.scatter(orders_payment_value, 
                                              x='order_id', 
                                              y='total_payment_value', 
                                              title='Orders vs Average Payment Value'))
                .update_xaxes(tickvals=[1, 3, 4])  # Specify the x-axis ticks here
        ),
        dcc.Graph(id='heatmap-order-distribution', figure=update_layout(px.density_heatmap(order_distribution_hour, x='order_hour', y='total_orders', title='Order Distribution by Hour'))),
        dcc.Graph(id='correlation-price-freight', figure=update_layout(px.scatter(correlation_data, x='price', y='freight_value', title='Correlation Between Price and Freight Value'))),
        dcc.Graph(id='delivery-performance-top-products', figure=update_layout(px.bar(top_10_categories, x='product_category_name', y='total_revenue', title='Delivery Performance of Top 10 Categories', color='total_revenue'))),
    ]),

    html.Div(style={'padding': '20px'}, children=[
        html.H2("Customer Insights", style={'textAlign': 'left'}),
        dcc.Graph(id='customer-acquisition', figure=update_layout(px.line(customer_acquisition, x='order_purchase_timestamp', y='customer_count', title='Customer Acquisition Over Time', markers=True))),
        dcc.Graph(id='top-10-cities', figure=update_layout(px.bar(top_10_cities, x='customer_city', y='order_volume', title='Top 10 Cities by Order Volume', color='order_volume'))),
    ]),
])

if __name__ == '__main__':
    app.run_server(debug=True)