In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt


In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
df = pd.read_excel('/content/drive/MyDrive/Data Analysis/Data sets/Superstore.xls')
df.head(4)

In [None]:
df['Postal Code'] = df['Postal Code'].astype('Int64').astype(str)
df['Postal Code'].dtypes

In [None]:
df['Order Date'] = pd.to_datetime(df['Order Date'], format='%d/%m/%Y',errors='coerce')
df['Ship Date'] = pd.to_datetime(df['Ship Date'], format='%d/%m/%Y',errors='coerce')
df.info()

In [None]:
df['Year'] = df['Order Date'].dt.year 
df['Month'] = df['Order Date'].dt.month 
df['Month Name'] = df['Order Date'].dt.strftime('%B')
df['Weekday'] = df['Order Date'].dt.day_name() 

In [None]:
df['Year'].head(2)

In [None]:
df['Profit_Margin'] = df['Profit'] / df['Sales']
df['Profit_Margin'] = df['Profit_Margin'].fillna(0) 
df['Profit_Margin'].head(2)

In [None]:
df['Profit_Margin_Percent'] = df['Profit_Margin'] * 100
df['Profit_Margin_Percent'].round(2) .head(2)

In [None]:
df.describe()

In [None]:
df.describe().columns

In [None]:
df['Revenue_After_Discount'] = df['Sales'] * (1 - df['Discount'])
df['Revenue_After_Discount'].head(2)

In [None]:
print(df['Ship Mode'].unique())
print(df['Segment'].unique())
print(df['Region'].unique())
print(df['Category'].unique())
print(df['Sub-Category'].unique())


In [None]:
!pip install dash dash-daq pandas plotly

In [None]:
!pip install dash-bootstrap-components

In [None]:
import plotly.express as px
import dash
from dash import dcc, html
import dash_bootstrap_components as dbc




app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
app.title = "Superstore Dashboard"

kpi_cards = dbc.Row([
    dbc.Col(dbc.Card([dbc.CardHeader("Total Sales"), dbc.CardBody(f"${df['Sales'].sum():,.2f}")]), width=3),
    dbc.Col(dbc.Card([dbc.CardHeader("Total Quantity"), dbc.CardBody(f"{df['Quantity'].sum():,}")]), width=3),
    dbc.Col(dbc.Card([dbc.CardHeader("Total Profit"), dbc.CardBody(f"${df['Profit'].sum():,.2f}")]), width=3),
    dbc.Col(dbc.Card([dbc.CardHeader("Avg Discount"), dbc.CardBody(f"{df['Discount'].mean()*100:.2f}%")]), width=3),
], className="mb-4")


region_sales = df.groupby('Region', as_index=False)['Sales'].sum()
fig_region = px.bar(region_sales, x='Region', y='Sales', title="Sales by Region", text='Sales')
fig_region.update_traces(texttemplate='%{text:.2s}', textposition='outside')


state_sales = df.groupby('State', as_index=False)['Sales'].sum().sort_values('Sales', ascending=False).head(10)
fig_state = px.bar(state_sales, x='State', y='Sales', title="Top 10 States by Sales", text='Sales')
fig_state.update_traces(texttemplate='%{text:.2s}', textposition='outside')


fig_cat = px.scatter(df, x='Sales', y='Profit', color='Category', title='Sales vs Profit by Category', hover_data=['Sub-Category'])


fig_subcat = px.box(df, x='Sub-Category', y='Profit', color='Category', title="Profit Distribution by Sub-Category")
fig_subcat.update_traces(boxpoints='all', jitter=0.3)


monthly = df.groupby('Month', as_index=False).agg({'Sales': 'sum', 'Profit': 'sum'})
fig_month = px.line(monthly, x='Month', y=['Sales', 'Profit'], title="Monthly Sales vs Profit")
fig_month.update_traces(mode='lines+markers+text', textposition='top center')
fig_month.update_layout(hovermode='x unified')


fig_segment = px.histogram(df, x='Segment', title='Orders by Customer Segment', text_auto=True)


fig_shipmode = px.histogram(df, x='Ship Mode', title='Orders by Shipping Preference', text_auto=True)


top_customers = df.groupby('Customer Name', as_index=False).agg({'Order ID': 'nunique'})
top_customers = top_customers.sort_values('Order ID', ascending=False).head(3)
fig_customers = px.bar(top_customers, x='Customer Name', y='Order ID', title='Top 3 Customers by Orders', text='Order ID')
fig_customers.update_traces(textposition='outside')


app.layout = dbc.Container([
    html.H1("Superstore Dashboard Overview", className="text-center mt-4 mb-4"),
    kpi_cards,
    dbc.Row([
        dbc.Col(dcc.Graph(figure=fig_region), md=6),
        dbc.Col(dcc.Graph(figure=fig_state), md=6),
    ]),
    dbc.Row([
        dbc.Col(dcc.Graph(figure=fig_cat), md=6),
        dbc.Col(dcc.Graph(figure=fig_subcat), md=6),
    ]),
    dbc.Row([
        dbc.Col(dcc.Graph(figure=fig_month), md=12),
    ]),
    dbc.Row([
        dbc.Col(dcc.Graph(figure=fig_segment), md=6),
        dbc.Col(dcc.Graph(figure=fig_shipmode), md=6),
    ]),
    dbc.Row([
        dbc.Col(dcc.Graph(figure=fig_customers), md=12),
    ])
], fluid=True)





if __name__ == '__main__':
    app.run(debug=True)

In [None]:
df['Month'] = df['Order Date'].dt.to_period('M').astype(str)


app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
app.title = "Sales by Geography Dashboard"


total_sales = df['Sales'].sum()
total_profit = df['Profit'].sum()
total_quantity = df['Quantity'].sum()
total_products = df['Product Name'].nunique()
avg_profit_margin = (df['Profit'].sum() / df['Sales'].sum()) * 100

kpi_cards = dbc.Row([
    dbc.Col(dbc.Card([dbc.CardHeader("Total Sales"), dbc.CardBody(f"${total_sales:,.2f}")]), width=2),
    dbc.Col(dbc.Card([dbc.CardHeader("Total Profit"), dbc.CardBody(f"${total_profit:,.2f}")]), width=2),
    dbc.Col(dbc.Card([dbc.CardHeader("Total Quantity"), dbc.CardBody(f"{total_quantity:,}")]), width=2),
    dbc.Col(dbc.Card([dbc.CardHeader("Total Products"), dbc.CardBody(f"{total_products:,}")]), width=2),
    dbc.Col(dbc.Card([dbc.CardHeader("Profit Margin"), dbc.CardBody(f"{avg_profit_margin:.2f}%")]), width=2),
])


fig_decomposition = px.sunburst(
    df,
    path=['State', 'City', 'Sub-Category', 'Product Name'],
    values='Sales',
    title="Decomposition: Sales by State → City → Sub-Category → Product",
)


top_cities = df.groupby('City')['Sales'].sum().sort_values(ascending=False).head(3).index.tolist()
top3_city_time = df[df['City'].isin(top_cities)]
fig_top3 = px.line(
    top3_city_time.groupby(['Month', 'City'], as_index=False)['Sales'].sum(),
    x='Month', y='Sales', color='City',
    title="Top 3 Cities by Sales Over Time"
)


bottom_cities = df.groupby('City')['Sales'].sum().sort_values(ascending=True).head(3).index.tolist()
bottom3_city_time = df[df['City'].isin(bottom_cities)]
fig_bottom3 = px.line(
    bottom3_city_time.groupby(['Month', 'City'], as_index=False)['Sales'].sum(),
    x='Month', y='Sales', color='City',
    title="Bottom 3 Cities by Sales Over Time"
)


top10_qty = df.groupby('City', as_index=False)['Quantity'].sum().sort_values('Quantity', ascending=False).head(10)
fig_top10_qty = px.bar(top10_qty, x='City', y='Quantity', text='Quantity', title="Top 10 Cities by Quantity")
fig_top10_qty.update_traces(textposition='outside')


qty_by_cat_state = df.groupby(['State', 'Sub-Category'], as_index=False)['Quantity'].sum()
fig_qty_heatmap = px.density_heatmap(
    qty_by_cat_state, x='State', y='Sub-Category', z='Quantity',
    title="Quantity by Sub-Category and State", color_continuous_scale='Blues'
)


app.layout = dbc.Container([
    html.H1("Sales by Geography Dashboard", className="text-center mt-4 mb-4"),
    kpi_cards,
    dbc.Row([
        dbc.Col(dcc.Graph(figure=fig_decomposition), md=12)
    ]),
    dbc.Row([
        dbc.Col(dcc.Graph(figure=fig_top3), md=6),
        dbc.Col(dcc.Graph(figure=fig_bottom3), md=6),
    ]),
    dbc.Row([
        dbc.Col(dcc.Graph(figure=fig_top10_qty), md=6),
        dbc.Col(dcc.Graph(figure=fig_qty_heatmap), md=6),
    ])
], fluid=True)


if __name__ == '__main__':
    app.run(debug=True)


In [None]:

df['Month'] = df['Order Date'].dt.to_period('M').dt.to_timestamp()


app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
app.title = "Customer Segment Insights"


total_sales = df['Sales'].sum()
total_quantity = df['Quantity'].sum()
total_profit = df['Profit'].sum()
total_orders = df['Order ID'].nunique()
avg_discount = df['Discount'].mean() * 100

kpi_cards = dbc.Row([
    dbc.Col(dbc.Card([dbc.CardHeader("Total Sales"), dbc.CardBody(f"${total_sales:,.2f}")]), width=2),
    dbc.Col(dbc.Card([dbc.CardHeader("Total Quantity"), dbc.CardBody(f"{total_quantity:,}")]), width=2),
    dbc.Col(dbc.Card([dbc.CardHeader("Total Profit"), dbc.CardBody(f"${total_profit:,.2f}")]), width=2),
    dbc.Col(dbc.Card([dbc.CardHeader("Total Orders"), dbc.CardBody(f"{total_orders:,}")]), width=2),
    dbc.Col(dbc.Card([dbc.CardHeader("Avg Discount"), dbc.CardBody(f"{avg_discount:.2f}%")]), width=2),
])


seg_sales = df.groupby(['Month', 'Segment'], as_index=False)['Sales'].sum()
fig_seg_sales = px.line(seg_sales, x='Month', y='Sales', color='Segment', title="Sales by Segment Over Time")


seg_profit = df.groupby(['Month', 'Segment'], as_index=False)['Profit'].sum()
fig_seg_profit = px.line(seg_profit, x='Month', y='Profit', color='Segment', title="Profit by Segment Over Time")


ship_sales = df.groupby(['Month', 'Ship Mode'], as_index=False)['Sales'].sum()
fig_ship_sales = px.line(ship_sales, x='Month', y='Sales', color='Ship Mode', title="Sales by Ship Mode Over Time")


ship_profit = df.groupby(['Month', 'Ship Mode'], as_index=False)['Profit'].sum()
fig_ship_profit = px.line(ship_profit, x='Month', y='Profit', color='Ship Mode', title="Profit by Ship Mode Over Time")


seg_cat_profit = df.groupby(['Segment', 'Category'], as_index=False)['Profit'].sum()
fig_seg_cat_profit = px.bar(seg_cat_profit, x='Segment', y='Profit', color='Category',
                            barmode='group', title="Profit by Segment and Category")


city_seg_orders = df.groupby(['City'], as_index=False)['Order ID'].nunique()
city_seg_orders = city_seg_orders.sort_values('Order ID', ascending=False).head(10)
fig_top_city_seg = px.bar(city_seg_orders, x='City', y='Order ID', title="Top 10 Cities by Orders in Segment", text='Order ID')
fig_top_city_seg.update_traces(textposition='outside')


city_ship_orders = df.groupby(['City'], as_index=False)['Order ID'].nunique()
city_ship_orders = city_ship_orders.sort_values('Order ID', ascending=False).head(10)
fig_top_city_ship = px.bar(city_ship_orders, x='City', y='Order ID', title="Top 10 Cities by Orders in Ship Mode", text='Order ID')
fig_top_city_ship.update_traces(textposition='outside')


app.layout = dbc.Container([
    html.H1("Customer Segment Insights", className="text-center mt-4 mb-4"),
    kpi_cards,
    dbc.Row([
        dbc.Col(dcc.Graph(figure=fig_seg_sales), md=6),
        dbc.Col(dcc.Graph(figure=fig_seg_profit), md=6),
    ]),
    dbc.Row([
        dbc.Col(dcc.Graph(figure=fig_ship_sales), md=6),
        dbc.Col(dcc.Graph(figure=fig_ship_profit), md=6),
    ]),
    dbc.Row([
        dbc.Col(dcc.Graph(figure=fig_seg_cat_profit), md=12),
    ]),
    dbc.Row([
        dbc.Col(dcc.Graph(figure=fig_top_city_seg), md=6),
        dbc.Col(dcc.Graph(figure=fig_top_city_ship), md=6),
    ]),
], fluid=True)


if __name__ == '__main__':
    app.run(debug=True)

In [None]:

app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
app.title = "Product Performance Dashboard"


top_sub_sales = df.groupby('Sub-Category', as_index=False)['Sales'].sum().sort_values('Sales', ascending=False).head(3)
top_sub_discount = df.groupby('Sub-Category', as_index=False)['Discount'].mean().sort_values('Discount', ascending=False).head(3)
df['Profit Margin'] = df['Profit'] / df['Sales']
top_sub_profit_margin = df.groupby('Sub-Category', as_index=False)['Profit Margin'].mean().sort_values('Profit Margin', ascending=False).head(3)


basket = df.groupby('Product Name', as_index=False).agg({
    'Sales': 'sum',
    'Profit': 'sum',
    'Order ID': pd.Series.nunique
}).rename(columns={'Order ID': 'Transactions'}).sort_values('Sales', ascending=False).head(10)

fig_basket = px.bar(basket, x='Product Name', y='Sales', text='Sales', title='Top Basket Items by Sales')
fig_basket.update_traces(textposition='outside')


subcat_perf = df.groupby('Sub-Category', as_index=False)['Sales'].sum().sort_values('Sales', ascending=False)
fig_subcat_perf = px.bar(subcat_perf, x='Sub-Category', y='Sales', text='Sales', title='Top Performing Sub-Categories')
fig_subcat_perf.update_traces(textposition='outside')


prod_perf = df.groupby('Product Name', as_index=False).agg({'Sales': 'sum', 'Profit': 'sum'})
fig_prod_scatter = px.scatter(prod_perf, x='Sales', y='Profit', hover_name='Product Name',
                              title='Top Products by Sales vs. Profit')


top15 = prod_perf.sort_values('Sales', ascending=False).head(15)
fig_top15 = px.bar(top15, x='Product Name', y='Sales', text='Profit', title='Top 15 Products by Sales (with Profit)')
fig_top15.update_traces(textposition='outside')


app.layout = dbc.Container([
    html.H1("Product Performance Insights", className="text-center mt-4 mb-4"),

    html.H4("Top 3 Sub-Categories"),
    dbc.Row([
        dbc.Col(dbc.Card([dbc.CardHeader("By Sales"), dbc.CardBody(html.Ul([html.Li(f"{row['Sub-Category']} (${row['Sales']:.2f})") for _, row in top_sub_sales.iterrows()]))]), width=4),
        dbc.Col(dbc.Card([dbc.CardHeader("By Avg Discount"), dbc.CardBody(html.Ul([html.Li(f"{row['Sub-Category']} ({row['Discount']*100:.2f}%)") for _, row in top_sub_discount.iterrows()]))]), width=4),
        dbc.Col(dbc.Card([dbc.CardHeader("By Profit Margin"), dbc.CardBody(html.Ul([html.Li(f"{row['Sub-Category']} ({row['Profit Margin']*100:.2f}%)") for _, row in top_sub_profit_margin.iterrows()]))]), width=4),
    ], className="mb-4"),

    dbc.Row([
        dbc.Col(dcc.Graph(figure=fig_basket), md=6),
        dbc.Col(dcc.Graph(figure=fig_subcat_perf), md=6),
    ]),
    dbc.Row([
        dbc.Col(dcc.Graph(figure=fig_prod_scatter), md=6),
        dbc.Col(dcc.Graph(figure=fig_top15), md=6),
    ]),
], fluid=True)




if __name__ == '__main__':
    app.run(debug=True)


In [None]:

df['Month'] = df['Order Date'].dt.to_period('M').dt.to_timestamp()


if 'AC' not in df.columns:
    import numpy as np
    np.random.seed(0)
    df['AC'] = np.random.choice(['North', 'South', 'East', 'West'], size=len(df))

df['Profit Margin'] = df['Profit'] / df['Sales']
df = df[df['Sales'] > 0]  


app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
app.title = "Financial Analysis Dashboard"


fig_pvm_category = px.scatter(
    df,
    x='Discount',
    y='Profit',
    color='Category',
    hover_data=['Sub-Category', 'Sales'],
    title='PVM by Category: Profit vs Discount'
)


fig_pvm_subcat = px.scatter(
    df,
    x='Discount',
    y='Profit',
    color='Sub-Category',
    hover_data=['Product Name', 'Category'],
    title='PVM by Sub-Category: Profit vs Discount'
)

df['Discount Bin'] = pd.cut(df['Discount'], bins=[0, 0.1, 0.2, 0.3, 0.5, 1.0], include_lowest=True)
discount_profit = df.groupby('Discount Bin', as_index=False)['Profit'].sum()
fig_discount_profit = px.bar(
    discount_profit, x='Discount Bin', y='Profit', text='Profit',
    title='Profit by Discount Range'
)
fig_discount_profit.update_traces(textposition='outside')


ac_profit = df.groupby('AC', as_index=False)['Profit'].sum()
fig_ac_profit = px.bar(
    ac_profit, x='AC', y='Profit', text='Profit', title='Profit by AC'
)
fig_ac_profit.update_traces(textposition='outside')


app.layout = dbc.Container([
    html.H1("Financial Performance Insights", className="text-center mt-4 mb-4"),

    dbc.Row([
        dbc.Col(dcc.Graph(figure=fig_pvm_category), md=6),
        dbc.Col(dcc.Graph(figure=fig_pvm_subcat), md=6),
    ]),
    dbc.Row([
        dbc.Col(dcc.Graph(figure=fig_discount_profit), md=6),
        dbc.Col(dcc.Graph(figure=fig_ac_profit), md=6),
    ])
], fluid=True)


if __name__ == "__main__":
    app.run(debug=True)
