# Title : Analyzing Sales Performance by Region in a Retail Company

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv("datasets/customer_shopping_data.csv") 
display("Data Preview:", df.head())
display("\nData Tail:", df.tail())

display(df.groupby("shopping_mall").count())
display(df.groupby("category").count())


In [None]:
def sales_stat(df):
    branch_sales = df.groupby("shopping_mall")["price"].sum()
    category_sales = df.groupby("category")["price"].sum()
    combined_sales = df.groupby(["shopping_mall", "category"])["price"].sum()
    print("\nTotal Sales by Branch:\n", branch_sales)
    print("\nTotal Sales by Category:\n", category_sales)
    print("\nTotal Sales:\n",combined_sales)
    return branch_sales, category_sales, combined_sales

def plot_pie(data, title):
    plt.figure(figsize=(8, 8))
    plt.pie(data, labels=data.index, autopct='%1.1f%%', startangle=140)
    plt.title(title)
    plt.show()

def plot_groupbar(df):
    combined_pivot = df.pivot_table(index="shopping_mall", columns="category", values="price", aggfunc="sum")
    combined_pivot.plot(kind="bar")
    plt.title("Sales by Category across Branches")
    plt.xlabel("Shopping Mall Branch")
    plt.ylabel("Total Sales")
    plt.xticks(rotation=45)
    plt.legend(title="Product Category")
    plt.show()

In [None]:
branch_sales, category_sales, combined_sales = sales_stat(df)

display(branch_sales.sort_values())
display(category_sales.sort_values())
display(combined_sales.sort_values())

plot_pie(branch_sales, "Sales Distribution by Branch")
plot_pie(category_sales, "Sales Distribution by Product Category")

plot_groupbar(df)