In [20]:
!pip install fpdf



In [21]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from fpdf import FPDF

In [22]:
customers_df = pd.read_csv("Customers.csv")
products_df = pd.read_csv("Products.csv")
transactions_df = pd.read_csv("Transactions.csv")

In [23]:
customers_df['SignupDate'] = pd.to_datetime(customers_df['SignupDate'])
transactions_df['TransactionDate'] = pd.to_datetime(transactions_df['TransactionDate'])

In [24]:
merged_df = transactions_df.merge(customers_df, on='CustomerID').merge(products_df, on='ProductID')


# 1. Summary statistics

In [25]:
summary_stats = merged_df.describe()

# 2. Top 5 regions by transaction count


In [26]:
region_transaction_counts = merged_df['Region'].value_counts()


# 3. Top 5 products by quantity sold


In [27]:
top_products = merged_df.groupby('ProductName')['Quantity'].sum().sort_values(ascending=False).head(5)


# 4. Revenue by category


In [28]:
revenue_by_category = merged_df.groupby('Category')['TotalValue'].sum().sort_values(ascending=False)


# 5. Customer lifetime value


In [29]:
customer_lifetime_value = merged_df.groupby('CustomerID')['TotalValue'].sum().sort_values(ascending=False).head(5)


# Visualizations


In [30]:
plt.figure(figsize=(10, 6))
sns.barplot(x=region_transaction_counts.index, y=region_transaction_counts.values)
plt.title("Top Regions by Transaction Count")
plt.xlabel("Region")
plt.ylabel("Transaction Count")
plt.xticks(rotation=45)
plt.savefig("Anshul_Meshram_region_transaction_counts.png")
plt.close()

plt.figure(figsize=(10, 6))
sns.barplot(x=top_products.index, y=top_products.values)
plt.title("Top 5 Products by Quantity Sold")
plt.xlabel("Product")
plt.ylabel("Quantity Sold")
plt.xticks(rotation=45)
plt.savefig("Anshul_Meshram_top_products.png")
plt.close()

plt.figure(figsize=(10, 6))
sns.barplot(x=revenue_by_category.index, y=revenue_by_category.values)
plt.title("Revenue by Product Category")
plt.xlabel("Category")
plt.ylabel("Revenue")
plt.xticks(rotation=45)
plt.savefig("Anshul_Meshram_revenue_by_category.png")
plt.close()

PDF Report

In [31]:
class PDF(FPDF):
    def header(self):
        self.set_font('Arial', 'B', 12)
        self.cell(0, 10, 'EDA Report - eCommerce Transactions', 0, 1, 'C')

    def chapter_title(self, title):
        self.set_font('Arial', 'B', 12)
        self.cell(0, 10, title, 0, 1, 'L')
        self.ln(5)

    def chapter_body(self, body):
        self.set_font('Arial', '', 12)
        self.multi_cell(0, 10, body)
        self.ln()

In [32]:
pdf = PDF()
pdf.add_page()

In [33]:
pdf.chapter_title("Business Insights")
pdf.chapter_body(
    "1. Top regions by transaction count: {}".format(region_transaction_counts.head(3).to_dict()) +
    "\n2. Top products by quantity sold: {}".format(top_products.to_dict()) +
    "\n3. Revenue by category: {}".format(revenue_by_category.head(3).to_dict()) +
    "\n4. Top customers by lifetime value: {}".format(customer_lifetime_value.to_dict())
)

In [34]:
pdf.chapter_title("Visualizations")
pdf.image("Anshul_Meshram_region_transaction_counts.png", w=150)
pdf.image("Anshul_Meshram_top_products.png", w=150)
pdf.image("Anshul_Meshram_revenue_by_category.png", w=150)

In [35]:
pdf.output("Anshul_Meshram_EDA.pdf")

''

In [36]:
print("EDA Report and visualizations generated successfully with proper naming convention.")


EDA Report and visualizations generated successfully with proper naming convention.
