In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import pearsonr
import statsmodels.api
from statsmodels.tsa.seasonal import seasonal_decompose
import panel as pn
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [2]:

data=pd.read_csv('./data/data.csv',index_col='Unnamed: 0')
data.drop(data.loc[data['Weekly_Sales']<0].index,inplace=True)
data.loc[(data.Size < 40000) & (data.Type=='A'),'Type']='C'

departments = sorted(data.Dept.unique())

# DataFrame for general metrics in period by department (Period, Sales, Percentage of total sales and Position)

departments_total_sales=data.groupby('Dept').agg(total_sales=('Weekly_Sales','sum'))
departments_total_sales['total_sales']=departments_total_sales['total_sales'].astype('int')
departments_total_sales['percentage']=round(departments_total_sales['total_sales']/departments_total_sales['total_sales'].sum()*100,2)
departments_total_sales=departments_total_sales.sort_values(by='total_sales',ascending=False).reset_index()

In [3]:
# Function that returns an empty plot to start the dashboard before selecting a Department
def empty_plot():
    fig=plt.figure(figsize=(5,5))
    fig.patch.set_facecolor('#f0f0f0')
    return plt.gcf()


In [4]:
# Function that returns a pie plot that shows the percentage of Stores with department included
def plot_stores():

    fig=plt.figure(figsize=(5,5))
    fig.patch.set_facecolor('#f0f0f0')

    stores_percentage=type_of_store['Store'].sum()/type_of_store['Total_stores'].sum()*100

    plt.pie([type_of_store['Store'].sum(),type_of_store['No_store'].sum()],colors=['darkblue','grey'],wedgeprops=dict(width=0.3, edgecolor='w'))
    plt.text(0,0.1,f'{round(stores_percentage)}%',ha='center',va='center',fontsize=40,fontweight='bold')
    plt.annotate(f'({type_of_store['Store'].sum()}/{type_of_store['Total_stores'].sum()})',xy=(-0.30,-0.23),fontsize=20)
    plt.tight_layout()
    return plt.gcf()


In [5]:
# Function that returns a bar plot showing the stores with and without the deparment by type of store
def plot_stores_by_type():

    fig=plt.figure(figsize=(5,5))
    fig.patch.set_facecolor('#f0f0f0')

    cmap = plt.colormaps["tab20c"]
    colors = cmap([2,1,0])

    plt.barh(type_of_store.index,type_of_store['Store'],color=colors,height=0.6)
    plt.barh(type_of_store.index,type_of_store['No_store'],left=type_of_store['Store'],color='grey',height=0.6,label='Not selling')

    plt.legend()
    plt.tick_params(axis='x',labelsize=15)
    plt.tick_params(axis='y',labelsize=25)
    plt.title('By type of store',fontsize=20)
    plt.tight_layout()
    fig.patch.set_facecolor('#f0f0f0')
    return plt.gcf()

In [6]:
# Function that returns a box plot of the weekly sales of the department
def plot_distribution():
    fig=plt.figure(figsize=(3,8))
    fig.patch.set_facecolor('#f0f0f0')
    fig=sns.boxplot(department_df['Weekly_Sales'],color='#2385BD',saturation=0.7,width=0.5,
                    linecolor='#061C80',linewidth=2)
    fig.patch.set_facecolor('#f0f0f0')
    fig.grid(True)
    plt.tight_layout()
    return plt.gcf()

In [7]:
# Function that returns a seasonal decompose of the weekly sales of the deparment (4 plots: Weekly_Sales, Trend, Seasonal and Residual)
def seasonal_plot2():

    fig = make_subplots(rows=4, cols=1, shared_xaxes=True, vertical_spacing=0.05,
                        subplot_titles=('Observed Sales', 'Trend', 'Seasonal', 'Residuals'))

    try:
        result = seasonal_decompose(department_df['Weekly_Sales'], model='multiplicative')


        fig.add_trace(go.Scatter(
            x=result.observed.index,
            y=result.observed,
            mode='lines',
            line=dict(color='darkblue'),
            name='Observed Sales'
        ), row=1, col=1)

        fig.add_trace(go.Scatter(
            x=result.trend.index,
            y=result.trend,
            mode='lines',
            line=dict(color='blue'),
            name='Trend'
        ), row=2, col=1)

        fig.add_trace(go.Scatter(
            x=result.seasonal.index,
            y=result.seasonal,
            mode='lines',
            line=dict(color='blue'),
            name='Seasonal'
        ), row=3, col=1)

        fig.add_trace(go.Scatter(
            x=result.resid.index,
            y=result.resid,
            mode='lines',
            line=dict(color='blue'),
            name='Residuals'
        ), row=4, col=1)

        fig.update_layout(
            height=800,
            showlegend=False,
            plot_bgcolor='#f0f0f0',
            title_text="Seasonal Decomposition of Weekly Sales",
            title_x=0.5,
            margin=dict(t=50, b=20, l=50, r=50),
            font=dict(size=12),
        )

        fig.update_xaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
        fig.update_yaxes(showline=True, linewidth=1, linecolor='black', mirror=True)

    except Exception as e:
        fig.add_annotation(
            text=f'Not enough data to perform a seasonal decompose: {str(e)}',
            xref="paper", yref="paper",
            x=0.5, y=0.5, showarrow=False,
            font=dict(size=20, color='darkred')
        )

    return fig

# Function that returns an empty plot to start the dashboard before selecting a Department (Plotly library)
def empty_plot2():
    fig=go.Figure()
    return fig



In [8]:
# Function that returns a regression plot if Pearson test p-value < 1% (alpha)
# If p-value > 1% alpha applied + text displayed with p-value result 
def quan_quan_regression(var):
    
    pearson_pval=round(pearsonr(department_df['Weekly_Sales'],department_df[var])[1],6)
    
    fig=plt.figure(figsize=(4,4))
     
    fig.patch.set_facecolor('#f0f0f0')
    
    if pearson_pval < 0.01:
        
        fig=sns.regplot(x=department_df[var], y=department_df['Weekly_Sales'], ci=None, scatter_kws={'s': 20},line_kws={'color': 'darkblue'})
        fig.patch.set_facecolor('#f0f0f0')
        plt.tight_layout()
    
    else:
        fig=sns.regplot(x=department_df[var], y=department_df['Weekly_Sales'], ci=None, scatter_kws={'s': 20,'alpha':0.2},line_kws={'color': 'darkblue','alpha':0.2})
        fig.patch.set_facecolor('#f0f0f0')
        plt.xlabel('')
        plt.ylabel('')
        plt.xticks([])
        plt.yticks([])
        plt.annotate(f'Weekly Sales and {var} Uncorrelated \n\nP-value = {pearson_pval} > 0.01',xy=(0.1,0.43), xycoords='axes fraction',fontweight='bold',fontsize='large',color='darkred')
        plt.tight_layout()
    
    return plt.gcf()

# Function that returns a regression plot if ANOVA test p-value < 1% (alpha)
# If p-value > 1% alpha applied + text displayed with p-value result 
def qual_quan_regression(var):

    result=statsmodels.formula.api.ols(f'Weekly_Sales ~ {var}',data=department_df).fit()
    anova=statsmodels.api.stats.anova_lm(result)
    anova_pval=round(anova['PR(>F)'].iloc[0],6)

    fig=plt.figure(figsize=(8,4)) 
    fig.patch.set_facecolor('#f0f0f0')

    if anova_pval < 0.01:
        fig=sns.boxplot(data=department_df,y=var,x='Weekly_Sales',hue=var,orient='h')
        fig.patch.set_facecolor('#f0f0f0')
        plt.legend('', frameon=False)
        plt.tight_layout()
    
    else:
        fig=sns.boxplot(data=department_df,y=var,x='Weekly_Sales',orient='h',hue=var,boxprops={'alpha':0.1},whiskerprops={'alpha':0.1},flierprops={'alpha':0.1},medianprops={'alpha':0.1},capprops={'alpha':0.1})
        fig.patch.set_facecolor('#f0f0f0')
        plt.legend('', frameon=False)
        plt.xlabel('')
        plt.ylabel('')
        plt.xticks([])
        plt.yticks([])
        plt.annotate(f'Weekly Sales and {var} Uncorrelated \n\nP-value = {anova_pval} > 0.01',xy=(0.1,0.43), xycoords='axes fraction',fontweight='bold',fontsize='xx-large',color='darkred')
        plt.tight_layout()
    return plt.gcf()


In [9]:
# Function that updates the dashboard (plots and stats) when department selected and button clicked
def display_dashboard(event):

    global department_df, type_of_store

    department_df=data.loc[data['Dept']==selected_department.value].groupby('Date').agg({'Weekly_Sales':'mean','IsHoliday':'mean','Temperature':'mean',
                                                                               'CPI':'mean','Unemployment':'mean','Fuel_Price':'mean'}).reset_index()
    department_df['Date']=pd.to_datetime(department_df['Date'])
    department_df.set_index('Date',inplace=True)
    
    type_of_store=data.loc[data.Dept==selected_department.value].groupby('Type').agg({'Store':'nunique','Weekly_Sales':'sum'})
    types=['A','B','C']
    for t in types:
        if t not in type_of_store.index:
            type_of_store.loc[t]=[0,0]
    type_of_store['Total_stores']=[data.loc[data.Type==t]['Store'].nunique() for t in type_of_store.index]
    type_of_store['No_store']=type_of_store['Total_stores']-type_of_store['Store']
    type_of_store=type_of_store.sort_values(by='Total_stores')

    department_s.object=f"# <span style='color:white;'>Department {selected_department.value}</span>"

    period.object=f'# 📆 Period: {data.Date.min()} / {data.Date.max()}'
    sales.object=f'# 💰 Sales: {format_to_money(departments_total_sales.loc[departments_total_sales['Dept']==selected_department.value]['total_sales'].values[0])}'
    percentage_sales.object=f'# 📈 Percentage of the total sales: {departments_total_sales.loc[departments_total_sales['Dept']==selected_department.value]['percentage'].values[0]}%'
    position.object=f'# 🏁 Position in sales: {departments_total_sales.loc[departments_total_sales['Dept']==selected_department.value].index[0]+1}/{departments_total_sales.shape[0]}'

    plot1.object=plot_stores()
    plot2.object=plot_stores_by_type()
    plot3.object=plot_distribution()
    plot4.object=seasonal_plot2()
    plot5.object=quan_quan_regression('Temperature')
    plot6.object=qual_quan_regression('IsHoliday')
    plot7.object=quan_quan_regression('CPI')
    plot8.object=quan_quan_regression('Unemployment')
    plot9.object=quan_quan_regression('Fuel_Price')


In [10]:
# Function to transform float number to monetary format (Total sales in period)
def format_to_money(amount):
    return "${:,.2f}".format(amount)

# Function to display the department selected 
def display_selected_department(selected):
    return f"# <span style='color:white;'>Department: {selected}</span>"


# Style for primary blocks (plots)
rounded_style = {
    "background": "#f0f0f0", 
    "padding": "15px",  
    "border-radius": "15px",  
    "box-shadow": "2px 2px 5px rgba(0, 0, 0, 0.3)"  
}

# Style for background blocks (groups of plots)
rounded_style2 = {
    "background": "#01388B",  
    "padding": "15px",  
    "border-radius": "15px",  
    "box-shadow": "2px 2px 5px rgba(0, 0, 0, 0.3)"  
}

# Select object to select new department
selected_department=pn.widgets.Select(name='Select Department',
                                       options=departments,
                                       width=400,
                                       height=100,
                                       styles=rounded_style)

# Update selection of new department
markdown_pane = pn.bind(display_selected_department, selected_department)

# Button to display the new department's stats
button = pn.widgets.Button(name='Display Dashboard', button_type='primary')
button.on_click(display_dashboard)

# Elements displayed (Initialised as empty in first render)
department_s=pn.pane.Markdown("# <span style='color:white;'>Department -</span>")

period=pn.pane.Markdown(f'# 📆 Period: ')
sales=pn.pane.Markdown(f'# 💰 Sales: ')
percentage_sales=pn.pane.Markdown(f'# 📈 Percentage of the total sales: ')
position=pn.pane.Markdown(f'# 🏁 Position in sales: ')

plot1=pn.pane.Matplotlib(empty_plot(),height=300,width=300)
plot2=pn.pane.Matplotlib(empty_plot(),height=300,width=300)
plot3=pn.pane.Matplotlib(empty_plot(),height=610,width=330)

plot4=pn.pane.Plotly(empty_plot2(), height=800, width=1320)

plot5=pn.pane.Matplotlib(empty_plot(), height=400, width=400)
plot6=pn.pane.Matplotlib(empty_plot(), height=400, width=860)
plot7=pn.pane.Matplotlib(empty_plot(), height=400, width=400)
plot8=pn.pane.Matplotlib(empty_plot(), height=400, width=400)
plot9=pn.pane.Matplotlib(empty_plot(), height=400, width=400)


#--------------------------------------Dashboard--------------------------------------------

pn.extension(design="material",raw_css=["body { background-color: #2E2E2E; }"])

dashboard = pn.Column(

    pn.Column(
        pn.pane.Markdown(f"# <span style='color:white;'>Department Analysis</span>"),
        selected_department,
        markdown_pane,
        button,
        styles=rounded_style2,
        margin=(20, 0, 30, 20)
    ),

    pn.Column(
        department_s,
        pn.Row( 
            pn.Column(
                pn.Column(
                    period,
                    sales,
                    percentage_sales,
                    position,
                    styles=rounded_style,
                    margin=(0, 0, 0, 20)
                ),
                pn.Column(
                    pn.pane.Image('./data/walmart-logo-477.png',width=442,height=350),
                    styles=rounded_style,
                    margin=(45, 45, 0, 20)

                )
            ),

            pn.Column(
                pn.pane.Markdown("# Presence in Stores"),
                pn.Row(
                    plot1,
                    margin=(0, 20, 0, 20)      
                ),
                pn.Row(
                    plot2,
                    margin=(0, 20, 0, 20) 
                ),
                styles=rounded_style,
                margin=(0, 45, 0, 0)    
            ),

            pn.Column(
                pn.pane.Markdown("# Distribution of Weekly Sales"),
                pn.Row(
                    plot3,
                    margin=(0, 10, 0, 10)             
                ),
                styles=rounded_style
            ),
            margin=(0, 18, 10, 0)
        ),
        styles=rounded_style2,
        margin=(0, 0, 30, 20)
    ),

    pn.Column(
        pn.pane.Markdown("# <span style='color:white;'>Seasonal Decompose</span>"),
        pn.Row( 
            pn.Column(
                plot4,
                styles=rounded_style,
                margin=(10, 20, 20, 20)
            ),
        ),
        styles=rounded_style2,
        margin=(0, 0, 30, 20)
    ),

    pn.Column(
        pn.pane.Markdown("# <span style='color:white;'>Weekly Sales correlation with other features</span>"),
        pn.Row(
            pn.Column(
                '## Temperature vs Weekly Sales',
                plot5,
                styles=rounded_style,
                margin=(10, 5, 5, 20)
            ),
            pn.Column(
                '## IsHoliday vs Weekly Sales',
                plot6,
                styles=rounded_style,
                margin=(10, 10, 5, 5)
            )
        ),
        pn.Row(
            pn.Column(
                '## CPI vs Weekly Sales',
                plot7,
                styles=rounded_style,
                margin=(5, 5, 5, 20)
            ),
            pn.Column(
                '## Unemployment vs Weekly Sales',
                plot8,
                styles=rounded_style,
                margin=(5, 5, 5, 5)
            ),
            pn.Column(
                '## Fuel Price vs Weekly Sales',
                plot9,
                styles=rounded_style,
                margin=(5, 20, 5, 5)
            )
        ),
        styles=rounded_style2,
        margin=(0, 0, 20, 20)
    )
)

dashboard.show()


Using Panel interactively in VSCode notebooks requires the jupyter_bokeh package to be installed. You can install it with:

   pip install jupyter_bokeh

or:
    conda install jupyter_bokeh

and try again.



Launching server at http://localhost:64751


<panel.io.server.Server at 0x261baa01550>

<Figure size 500x500 with 0 Axes>

<Figure size 500x500 with 0 Axes>

<Figure size 500x500 with 0 Axes>

<Figure size 500x500 with 0 Axes>

<Figure size 500x500 with 0 Axes>

<Figure size 500x500 with 0 Axes>

<Figure size 500x500 with 0 Axes>

<Figure size 500x500 with 0 Axes>

### Dashboard Example
***
![dashboard example](./Images/dashboard_example28.png)