Import necessary libraries :

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

Load datasets :

In [None]:
customers = pd.read_csv("Customers.csv")
products = pd.read_csv("Products.csv")
transactions = pd.read_csv("Transactions.csv")

Convert date columns to datetime :

In [None]:
customers['SignupDate'] = pd.to_datetime(customers['SignupDate'])
transactions['TransactionDate'] = pd.to_datetime(transactions['TransactionDate'])

1. Overview of datasets :

In [None]:
print("Customers Dataset Overview")
print(customers.info())
print(customers.describe(include='all'))

In [None]:
print("\nProducts Dataset Overview")
print(products.info())
print(products.describe(include='all'))

In [None]:
print("\nTransactions Dataset Overview")
print(transactions.info())
print(transactions.describe(include='all'))

2. Missing Values Check (Ensure Data Integrity) :

In [None]:
print("\nMissing Values:")
print("Customers:\n", customers.isnull().sum())
print("Products:\n", products.isnull().sum())
print("Transactions:\n", transactions.isnull().sum())

3. Basic Analysis (Analyzes unique counts, revenue, and transaction details) :

Customers :

In [None]:
print("\nNumber of unique customers:", customers['CustomerID'].nunique())
print(customers['Region'].value_counts())

Products :

In [None]:
print("\nNumber of unique products:", products['ProductID'].nunique())
print(products['Category'].value_counts())

Transactions :

In [None]:
print("\nNumber of transactions:", transactions['TransactionID'].nunique())
print("Total revenue generated: $", transactions['TotalValue'].sum())
print("Average transaction value: $", transactions['TotalValue'].mean())

4. Visualizations :

Top 10 Products by Revenue :

Customers by Region :

In [None]:
plt.figure(figsize=(8, 5))
sns.countplot(data=customers, x='Region', palette='viridis')
plt.title("Customer Count by Region")
plt.xlabel("Region")
plt.ylabel("Count")
plt.show()

Top 10 Products by Revenue :

In [None]:
top_products = (transactions.groupby('ProductID')['TotalValue']
                .sum()
                .reset_index()
                .merge(products, on='ProductID')
                .sort_values(by='TotalValue', ascending=False)
                .head(10))
plt.figure(figsize=(10, 5))
sns.barplot(data=top_products, y='ProductName', x='TotalValue', palette='magma')
plt.title("Top 10 Products by Revenue")
plt.xlabel("Revenue ($)")
plt.ylabel("Product Name")
plt.show()

Transactions Over Time :

In [None]:
transactions['TransactionDate'].dt.to_period("M").value_counts().sort_index().plot(
    kind='bar', figsize=(12, 6), color='skyblue'
)
plt.title("Monthly Transaction Count")
plt.xlabel("Month")
plt.ylabel("Number of Transactions")
plt.show()

5. Derive Business Insights :

In [None]:
print("\nBusiness Insights:")
print("1. Customers from {} are the majority.".format(customers['Region'].mode()[0]))
print("2. The top revenue-generating product is {}.".format(top_products.iloc[0]['ProductName']))
print("3. Total revenue generated is ${:,.2f}.".format(transactions['TotalValue'].sum()))
print("4. {} products account for the top 10% of revenue.".format(len(top_products)))
print("5. The number of transactions peaked in {}.".format(
    transactions['TransactionDate'].dt.to_period("M").value_counts().idxmax()
))