In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
customers = pd.read_csv("Customers.csv")
print(customers.head())

In [None]:
products = pd.read_csv("Products.csv")

print(products.head())

In [None]:
transactions = pd.read_csv("Transactions.csv")

print(transactions.head())

In [None]:
customers['SignupDate'] = pd.to_datetime(customers['SignupDate'])
transactions['TransactionDate'] = pd.to_datetime(transactions['TransactionDate'])

In [None]:
print(customers.isnull().sum())
print(products.isnull().sum())
print(transactions.isnull().sum())

In [None]:
print(customers.duplicated().sum())
print(products.duplicated().sum())
print(transactions.duplicated().sum())

In [None]:
merged_data = transactions.merge(customers, on="CustomerID").merge(products, on="ProductID")
print(merged_data.head())

**Customer Growth by Region**

In [None]:
# Group by Signup Year and Region
customers['SignupYear'] = customers['SignupDate'].dt.year
signup_trends = customers.groupby(['SignupYear', 'Region']).size().reset_index(name='CustomerCount')

# Plot trends
plt.figure(figsize=(10, 6))
sns.lineplot(data=signup_trends, x='SignupYear', y='CustomerCount', hue='Region', marker='o')
plt.title('Customer Signup Trends by Region')
plt.xlabel('Year')
plt.ylabel('Number of Customers')
plt.legend(title='Region')
plt.show()


**Top Revenue-Generating Products**

In [None]:
# Total revenue per product
product_revenue = merged_data.groupby('ProductName')['TotalValue'].sum().sort_values(ascending=False).head(10)

# Plot top 10 products by revenue
plt.figure(figsize=(10, 6))
product_revenue.plot(kind='bar', color='skyblue')
plt.title('Top 10 Revenue-Generating Products')
plt.xlabel('Product Name')
plt.ylabel('Total Revenue (USD)')
plt.xticks(rotation=45)
plt.show()

**Seasonal Trends**

In [None]:
# Extract month from transaction date
merged_data['TransactionMonth'] = merged_data['TransactionDate'].dt.month

# Monthly revenue
monthly_revenue = merged_data.groupby('TransactionMonth')['TotalValue'].sum().reset_index()

# Plot monthly revenue
plt.figure(figsize=(10, 6))
sns.barplot(data=monthly_revenue, x='TransactionMonth', y='TotalValue', hue='TransactionMonth', palette='viridis', legend = False)
plt.title('Monthly Revenue Trends')
plt.xlabel('Month')
plt.ylabel('Total Revenue (USD)')
plt.show()

**High-Value Customers**

In [None]:
# Total revenue per customer
customer_revenue = merged_data.groupby('CustomerID')['TotalValue'].sum().sort_values(ascending=False)

# Calculate cumulative contribution
customer_revenue_cumsum = customer_revenue.cumsum() / customer_revenue.sum()

# Plot the Pareto distribution
plt.figure(figsize=(10, 6))
customer_revenue_cumsum.plot(drawstyle='steps-post', color='purple')
plt.axhline(y=0.8, color='r', linestyle='--', label='80% of Revenue')
plt.title('Cumulative Revenue Contribution by Customers')
plt.xlabel('Customer Rank')
plt.ylabel('Cumulative Revenue Contribution')
plt.legend()
plt.show()

**Product Pricing Impact**

In [None]:
# Group products by price ranges
merged_data.rename(columns={"Price_y": "Price"}, inplace=True)
merged_data['PriceRange'] = pd.cut(merged_data['Price'], bins=[0, 20, 50, 100, 200], labels=['0-20', '20-50', '50-100', '100-200'])
price_sales = merged_data.groupby('PriceRange', observed=False)['Quantity'].sum().reset_index()

# Plot price range vs sales volume
plt.figure(figsize=(10, 6))
sns.barplot(data=price_sales, x='PriceRange', y='Quantity', hue='PriceRange', dodge=False, palette='coolwarm', legend=False)
plt.title('Sales Volume by Price Range')
plt.xlabel('Price Range (USD)')
plt.ylabel('Total Quantity Sold')
plt.show()