In [1]:
from bokeh.plotting import figure, output_file, show, gridplot
from bokeh.models import FactorRange, NumeralTickFormatter
import pandas as pd
from bokeh.models import Div
from math import pi
from bokeh.transform import cumsum
from itertools import cycle

# Read data from a CSV file (replace 'Superstore.csv' with the actual file path)
data = pd.read_csv('Superstore.csv', parse_dates=['Order Date'], dayfirst=True)

# Extract year from 'Order Date'
data['Year'] = data['Order Date'].dt.year.astype(str)

# Extract unique categories and values from the dataset
categories = data['Category'].unique().tolist()

# Create the first figure (top 5 products by loss)
top_losses = data.groupby('Product Name')['Profit'].sum().sort_values().head(20).index.tolist()
p1 = figure(y_range=top_losses, width=750, title="Top 20 Products by Loss", toolbar_location=None, tools="")
p1.hbar(y=top_losses, right=data[data['Product Name'].isin(top_losses)].groupby('Product Name')['Profit'].sum(), height=0.9, color="red", legend_label="Loss")
p1.ygrid.grid_line_color = None
p1.xaxis.axis_label = "Loss"

# Create the second figure (top 5 products by profit)
top_profits = data.groupby('Product Name')['Profit'].sum().sort_values(ascending=False).head(5).index.tolist()
p2 = figure(y_range=top_profits, width=750, title="Top 5 Products by Profit", toolbar_location=None, tools="")
p2.hbar(y=top_profits, right=data[data['Product Name'].isin(top_profits)].groupby('Product Name')['Profit'].sum(), height=0.9, color="green", legend_label="Profit")
p2.ygrid.grid_line_color = None
p2.xaxis.axis_label = "Profit"

# Create the third figure (profit year-wise)
years = data['Year'].unique().tolist()
p3 = figure(x_range=FactorRange(*years),width=750, height=550, title="Profit Year-wise", toolbar_location=None, tools="")
p3.vbar(x=years, top=data.groupby('Year')['Profit'].sum().tolist(), width=0.9, color="blue", legend_label="Profit")
p3.xgrid.grid_line_color = None
p3.y_range.start = 0
p3.yaxis.axis_label = "Profit"
p3.yaxis.formatter = NumeralTickFormatter(format="0,0")

# Create the fourth figure (top customers by profit)
top_customers_by_profit = data.groupby('Customer Name')['Profit'].sum().sort_values(ascending=False).head(5).index.tolist()
p4 = figure(y_range=top_customers_by_profit, width=750, title="Top Customers by Profit", toolbar_location=None, tools="")
p4.hbar(y=top_customers_by_profit, right=data[data['Customer Name'].isin(top_customers_by_profit)].groupby('Customer Name')['Profit'].sum(), height=0.9, color="purple", legend_label="Profit")
p4.ygrid.grid_line_color = None
p4.xaxis.axis_label = "Profit"
p4.xaxis.formatter = NumeralTickFormatter(format="0,0")

# Create the fifth figure (sales by region)
sales_by_region = data.groupby('Region')['Sales'].sum().reset_index()
p5 = figure(x_range=sales_by_region['Region'],width=750, height=650, title="Sales by Region", toolbar_location=None, tools="")
p5.vbar(x=sales_by_region['Region'], top=sales_by_region['Sales'], width=0.9, color="orange", legend_label="Sales")
p5.xgrid.grid_line_color = None
p5.y_range.start = 0
p5.yaxis.axis_label = "Sales"
p5.yaxis.formatter = NumeralTickFormatter(format="0,0")

# Create the sixth figure (sales by category using line chart)
sales_by_category = data.groupby('Category')['Sales'].sum().reset_index()
p6 = figure(x_range=sales_by_category['Category'],width=750, height=550, title="Sales by Category", toolbar_location=None, tools="")
p6.line(x=sales_by_category['Category'], y=sales_by_category['Sales'], line_width=2, line_color="purple", legend_label="Sales")
p6.xgrid.grid_line_color = None
p6.y_range.start = 0
p6.yaxis.axis_label = "Sales"
p6.yaxis.formatter = NumeralTickFormatter(format="0,0")

# Create the seventh figure (year-wise line chart for Sales by Category)
sales_by_category_yearwise = data.groupby(['Year', 'Category'])['Sales'].sum().reset_index()

p7 = figure(x_range=sales_by_category['Category'], width=950,height=850, title="Sales by Category (Year-wise)", toolbar_location=None, tools="")
for year in sales_by_category_yearwise['Year'].unique():
    subset = sales_by_category_yearwise[sales_by_category_yearwise['Year'] == year]
    p7.line(x=subset['Category'], y=subset['Sales'], line_width=2, legend_label=f"Sales {year}")

p7.xgrid.grid_line_color = None
p7.y_range.start = 0
p7.yaxis.axis_label = "Sales"
p7.yaxis.formatter = NumeralTickFormatter(format="0,0")

# Create the page title (updated to "Global Superstore Analysis")
page_title = Div(text="<h1 style='text-align:left;'>Global Superstore Analysis</h1>")

# Create a grid layout with the first seven figures
layout = gridplot([[page_title], [p1, p2], [p3, p4], [p5, p6], [p7]])


# Show the plot
output_file("global_superstore_analysis_page.html")
show(layout)
