In [None]:
import pandas as pd
import plotly.express as px

df = pd.read_csv("data/raw/Chocolate_Sales.csv")

# Clean Amount column: remove $ and commas, convert to float
df["Amount"] = df["Amount"].str.replace("$", "", regex=False).str.replace(",", "", regex=False).astype(float)

# Parse Date column
df["Date"] = pd.to_datetime(df["Date"], dayfirst=True)
df["Month"] = df["Date"].dt.to_period("M").astype(str)

df.head()

In [None]:
df.info()
df.describe()

In [None]:
# Distribution of Sales Amount
fig = px.histogram(df, x="Amount", nbins=30, title="Distribution of Sales Amount")
fig.show()

In [None]:
# Total Sales by Country
sales_by_country = df.groupby("Country", as_index=False)["Amount"].sum().sort_values("Amount", ascending=False)
fig = px.bar(sales_by_country, x="Country", y="Amount", title="Total Sales by Country", color="Country")
fig.show()

In [None]:
# Total Sales by Product
sales_by_product = df.groupby("Product", as_index=False)["Amount"].sum().sort_values("Amount", ascending=False)
fig = px.bar(sales_by_product, x="Amount", y="Product", orientation="h", title="Total Sales by Product", color="Product")
fig.update_layout(showlegend=False, yaxis=dict(categoryorder="total ascending"))
fig.show()

In [None]:
# Monthly Sales Trend
monthly_sales = df.groupby("Month", as_index=False)["Amount"].sum()
fig = px.line(monthly_sales, x="Month", y="Amount", title="Monthly Sales Trend", markers=True)
fig.show()

In [None]:
# Top 10 Sales People by Total Sales
top_sellers = df.groupby("Sales Person", as_index=False)["Amount"].sum().sort_values("Amount", ascending=False).head(10)
fig = px.bar(top_sellers, x="Amount", y="Sales Person", orientation="h", title="Top 10 Sales People by Total Sales")
fig.update_layout(yaxis=dict(categoryorder="total ascending"))
fig.show()

In [None]:
# Boxes Shipped vs Sales Amount (colored by Country)
fig = px.scatter(df, x="Boxes Shipped", y="Amount", color="Country", title="Boxes Shipped vs Sales Amount",
                 hover_data=["Sales Person", "Product"])
fig.show()

In [None]:
# Sales Amount Distribution by Country
fig = px.box(df, x="Country", y="Amount", color="Country", title="Sales Amount Distribution by Country", points="outliers")
fig.show()

In [None]:
# Monthly Sales Trend by Country
monthly_country = df.groupby(["Month", "Country"], as_index=False)["Amount"].sum()
fig = px.line(monthly_country, x="Month", y="Amount", color="Country", title="Monthly Sales Trend by Country", markers=True)
fig.show()

In [None]:
# Cumulative Sales by Product Over Time
product_monthly = df.groupby(["Month", "Product"], as_index=False)["Amount"].sum()
fig = px.area(product_monthly, x="Month", y="Amount", color="Product", title="Monthly Sales by Product (Stacked Area)")
fig.show()