In [1]:
import kagglehub
import shutil
import os
import pandas as pd
import dash
from dash import dcc, html
import plotly.express as px

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
data_path = "./data"

closed_deals_dataset = pd.read_csv(f"{data_path}/olist_closed_deals_dataset.csv")

marketing_qualified_leads_dataset = pd.read_csv(f"{data_path}/olist_marketing_qualified_leads_dataset.csv")

### 1. Evolution of Business Closing Over Time

In [3]:
closed_deals_dataset["won_date"] = pd.to_datetime(closed_deals_dataset["won_date"])

closed_deals_dataset["year_month"] = closed_deals_dataset["won_date"].dt.to_period("M")

business_clising = closed_deals_dataset.groupby("year_month").size().reset_index(name="num_deals")

business_clising["year_month"] = business_clising["year_month"].astype(str)
business_clising.columns = ["Period", "Count"]

business_clising

Unnamed: 0,Period,Count
0,2017-12,3
1,2018-01,73
2,2018-02,113
3,2018-03,147
4,2018-04,207
5,2018-05,122
6,2018-06,57
7,2018-07,37
8,2018-08,33
9,2018-09,23


In [4]:
business_closing_fig = px.line(business_clising, 
                    x="Period", 
                    y="Count", 
                    markers=True, 
                    title="Evolution of Business Closing Over Time",
                    labels={"Period": "Month-year", "Number deals": "Number of Business Closing"})

business_closing_fig

### 2. Distribution of Business Segments

In [5]:
business_segment = closed_deals_dataset["business_segment"].value_counts().reset_index()
business_segment.columns = ["Business segment", "Count"]

business_segment.head(3)

Unnamed: 0,Business segment,Count
0,home decor,105
1,health beauty,93
2,car accessories,77


In [6]:
business_segment_fig = px.bar(business_segment, 
                    x="Business segment", 
                    y="Count", 
                    title="Distribution of Business Segments",
                    labels={"Business segment": "Business segment", "Count": "Count"},
                    text_auto=True,
                    color="Count", 
                    color_continuous_scale="viridis")

business_segment_fig

### 3. Lead Profile

In [7]:
lead_profile = closed_deals_dataset.groupby(["lead_type", "lead_behaviour_profile"]).size().reset_index(name="num_deals")
lead_profile.columns = ["Lead type", "Lead behaviour profile", "Count"]

lead_profile.head(3)

Unnamed: 0,Lead type,Lead behaviour profile,Count
0,industry,cat,49
1,industry,eagle,25
2,industry,"eagle, wolf",2


In [8]:
lead_profile_fig = px.bar(lead_profile, 
                    x="Lead type", 
                    y="Count", 
                    color="Lead behaviour profile", 
                    title="Lead profile",
                    labels={"Lead type": "Lead type", "Count": "Count", "Lead behaviour profile": "Lead behaviour profile"},
                    text_auto=True,
                    barmode="group")

lead_profile_fig