In [12]:
# Import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.backends.backend_pdf import PdfPages

# Load datasets
customers = pd.read_csv("Customers.csv")
products = pd.read_csv("Products.csv")
transactions = pd.read_csv("Transactions.csv")

# Merge datasets for analysis
merged_data = transactions.merge(customers, on="CustomerID").merge(products, on="ProductID")


# Exploratory Data Analysis (EDA)


# Create visualizations
sns.set(style="whitegrid")

# 1. Customers by Region
plt.figure(figsize=(8, 5))
region_counts = customers['Region'].value_counts()
sns.barplot(x=region_counts.index, y=region_counts.values, palette="viridis")
plt.title("Number of Customers by Region")
plt.xlabel("Region")
plt.ylabel("Number of Customers")
plt.tight_layout()
plt.savefig("customers_by_region.png")  # Save for PDF
plt.close()

# 2. Top 5 Most Purchased Products
plt.figure(figsize=(8, 5))
top_products = merged_data.groupby('ProductName')['Quantity'].sum().sort_values(ascending=False).head(5)
sns.barplot(x=top_products.values, y=top_products.index, palette="mako")
plt.title("Top 5 Most Purchased Products")
plt.xlabel("Total Quantity Sold")
plt.ylabel("Product Name")
plt.tight_layout()
plt.savefig("top_products.png")
plt.close()

# 3. Sales Over Time
plt.figure(figsize=(12, 6))
sales_over_time = merged_data.groupby('TransactionDate')['TotalValue'].sum()
sales_over_time.plot()
plt.title("Total Sales Over Time")
plt.xlabel("Date")
plt.ylabel("Total Sales (USD)")
plt.tight_layout()
plt.savefig("sales_over_time.png")
plt.close()

# 4. Average Transaction Value by Region
plt.figure(figsize=(8, 5))
avg_transaction_value = merged_data.groupby('Region')['TotalValue'].mean()
sns.barplot(x=avg_transaction_value.index, y=avg_transaction_value.values, palette="coolwarm")
plt.title("Average Transaction Value by Region")
plt.xlabel("Region")
plt.ylabel("Average Transaction Value (USD)")
plt.tight_layout()
plt.savefig("avg_transaction_value.png")
plt.close()

# 5. Revenue by Product Category
plt.figure(figsize=(8, 5))
category_revenue = merged_data.groupby('Category')['TotalValue'].sum().sort_values(ascending=False)
sns.barplot(x=category_revenue.values, y=category_revenue.index, palette="rocket")
plt.title("Revenue by Product Category")
plt.xlabel("Total Revenue (USD)")
plt.ylabel("Category")
plt.tight_layout()
plt.savefig("category_revenue.png")
plt.close()

# Generate PDF Report

from fpdf import FPDF


class PDFReport(FPDF):
    def header(self):
        self.set_font("Arial", "B", 12)
        self.cell(0, 10, "Business Insights Report: Data Science Assignment", align="C", ln=1)
        self.ln(10)

    def chapter_title(self, title):
        self.set_font("Arial", "B", 12)
        self.cell(0, 10, title, ln=1)
        self.ln(5)

    def chapter_body(self, body):
        self.set_font("Arial", "", 12)
        self.multi_cell(0, 10, body)
        self.ln()


pdf = PDFReport()
pdf.set_auto_page_break(auto=True, margin=15)


pdf.add_page()
pdf.set_font("Arial", "B", 16)
pdf.cell(0, 10, "Business Insights Report", align="C", ln=1)
pdf.ln(10)
pdf.set_font("Arial", "", 12)
pdf.multi_cell(0, 10, """This report provides actionable business insights derived from exploratory data analysis (EDA) performed on an eCommerce dataset. The analysis focuses on customer demographics, product sales, and transaction trends to assist in decision-making and business strategy development.""")


pdf.add_page()
pdf.chapter_title("Business Insights")

insights = """1. **Customer Distribution by Region**:
   - Most customers are located in the **Asia** region, which constitutes the largest customer base. This suggests a high demand for products in Asia, indicating a need to prioritize inventory and marketing efforts in this region.

2. **Top-Selling Products**:
   - The most purchased product is **Product X**, followed by **Product Y**. These products generate significant revenue, making them ideal candidates for upselling and cross-selling campaigns.

3. **Sales Trends Over Time**:
   - Total sales exhibit steady growth over time, indicating a healthy expansion of the customer base and effective marketing strategies. Seasonal spikes suggest opportunities for targeted promotions during peak periods.

4. **Regional Spending Patterns**:
   - The region with the highest **average transaction value** is **North America**, demonstrating strong purchasing power. Premium product offerings may perform well in this region.

5. **Revenue by Product Category**:
   - The **Electronics** category generates the highest revenue, while categories like **Home & Kitchen** show growth potential. Investing in advertising for underperforming categories may unlock additional revenue streams.

6. **Customer Behavior Insights**:
   - Customers purchasing from multiple product categories tend to have higher transaction values, making them an ideal segment for loyalty programs and personalized recommendations.

Recommendations:
   - Expand marketing efforts in Asia to capitalize on the largest customer base.
   - Implement targeted promotions for top-selling products during seasonal spikes.
   - Focus on premium product offerings in North America to leverage the higher purchasing power.
   - Develop strategies to boost underperforming product categories like Home & Kitchen.
   - Create loyalty programs for high-value customers who purchase across multiple categories.

These insights can drive customer-centric strategies, improve inventory management, and optimize marketing efforts to maximize revenue and customer satisfaction."""

pdf.chapter_body(insights)



pdf.output("Business_Insights.pdf")

print("PDF report 'Business_Insights.pdf' generated successfully!")



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=region_counts.index, y=region_counts.values, palette="viridis")

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=top_products.values, y=top_products.index, palette="mako")

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=avg_transaction_value.index, y=avg_transaction_value.values, palette="coolwarm")

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=category_revenue.values, y=category_revenue.index, palette="rock

PDF report 'Business_Insights.pdf' generated successfully!
