In [None]:
import seaborn as sns
import pandas as pd
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
from sklearn.preprocessing import OneHotEncoder, LabelEncoder, StandardScaler

customers = pd.read_csv("D:/Users/Vipra Nagaich/Downloads/Customers.csv")
products = pd.read_csv("D:/Users/Vipra Nagaich/Downloads/Products.csv")
transactions = pd.read_csv("D:/Users/Vipra Nagaich/Downloads/Transactions.csv")

label_encoder = LabelEncoder()

customers['Region_Encoded'] = label_encoder.fit_transform(customers['Region'])
products['Category_Encoded'] = label_encoder.fit_transform(products['Category'])

merged_data = pd.merge(transactions, customers, on='CustomerID')
merged_data = pd.merge(merged_data, products, on='ProductID')

#dates to datetime
merged_data['SignupDate'] = pd.to_datetime(merged_data['SignupDate'])
merged_data['TransactionDate'] = pd.to_datetime(merged_data['TransactionDate'])

merged_data.head()

In [None]:
merged_data.isnull().sum()

In [None]:
merged_data.describe(include='all')

In [None]:
plt.figure(figsize=(6,4))
merged_data['Region'].value_counts().plot(kind='bar')
plt.title("Customer Distribution by Region")
plt.xlabel("Region")
plt.ylabel("Count of Customers")
plt.tight_layout()
plt.show()

In [None]:
sales_by_category = merged_data.groupby("Category")["TotalValue"].sum().sort_values(ascending=False)
plt.figure(figsize=(6,4))
sales_by_category.plot(kind='bar')
plt.title("Total Sales by Category")
plt.xlabel("Category")
plt.ylabel("Total Sales (USD)")
plt.tight_layout()
plt.show()

In [None]:
merged_data["TransactionDate"] = pd.to_datetime(merged_data["TransactionDate"])
transactions_by_month = merged_data.groupby(merged_data["TransactionDate"].dt.to_period("M"))["TransactionID"].count()
plt.figure(figsize=(8,4))
transactions_by_month.plot(marker='o')
plt.title("Transactions Over Time (Monthly)")
plt.xlabel("Month")
plt.ylabel("Number of Transactions")
plt.tight_layout()
plt.show()

In [None]:
merged_data["SignupDate"] = pd.to_datetime(merged_data["SignupDate"])
merged_data["signup_year"] = merged_data["SignupDate"].dt.year
signups_vs_sales = merged_data.groupby("signup_year")["TotalValue"].sum()
newest_year = signups_vs_sales.index.max()
oldest_year = signups_vs_sales.index.min()
signups_vs_sales

In [None]:
peak_month = transactions_by_month.idxmax()
peak_month

In [None]:
merged_data["TransactionMonth"] = merged_data["TransactionDate"].dt.month

monthly_region_tx = (
    merged_data.groupby(["Region", "TransactionMonth"])["TransactionID"]
    .count()
    .reset_index(name="TransactionCount")
)

max_month_for_each_region = monthly_region_tx.loc[
    monthly_region_tx.groupby("Region")["TransactionCount"].idxmax()
]
print("Month with highest transactions for each region:")
print(max_month_for_each_region)

In [None]:
pivot_table = monthly_region_tx.pivot(
    index="TransactionMonth", columns="Region", values="TransactionCount"
)

plt.figure(figsize=(8, 6))
sns.heatmap(pivot_table, annot=True, fmt=".0f", cmap="Blues")
plt.title("Transactions by Month and Region (Heatmap)")
plt.xlabel("Region")
plt.ylabel("Month")
plt.tight_layout()
plt.show()

In [None]:
pivot_table.plot(kind="bar", figsize=(10, 6))
plt.title("Transactions by Month and Region (Grouped Bar)")
plt.xlabel("Month")
plt.ylabel("Number of Transactions")
plt.legend(title="Region")
plt.tight_layout()
plt.show()

In [None]:
category_region_tx = (
    merged_data.groupby(["Region", "Category"])["TransactionID"]
    .count()
    .reset_index(name="TransactionCount")
)

max_category_per_region = category_region_tx.loc[
    category_region_tx.groupby("Region")["TransactionCount"].idxmax()
]
print("Category with highest transactions for each region:")
print(max_category_per_region)

plt.figure(figsize=(10, 6))
sns.barplot(
    x="Region",
    y="TransactionCount",
    hue="Category",
    data=category_region_tx,
    palette="viridis"
)
plt.title("Transactions by Category and Region")
plt.xlabel("Region")
plt.ylabel("Number of Transactions")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()