In [1]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [2]:
import os
os.chdir('/content/drive/MyDrive/eCommerce_Assignment')  # Adjust path as needed


In [3]:
import pandas as pd

customers = pd.read_csv('Customers.csv')
products = pd.read_csv('Products.csv')
transactions = pd.read_csv('Transactions.csv')


In [4]:
print(customers.info())
print(products.head())
print(transactions.describe())


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 4 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   CustomerID    200 non-null    object
 1   CustomerName  200 non-null    object
 2   Region        200 non-null    object
 3   SignupDate    200 non-null    object
dtypes: object(4)
memory usage: 6.4+ KB
None
  ProductID              ProductName     Category   Price
0      P001     ActiveWear Biography        Books  169.30
1      P002    ActiveWear Smartwatch  Electronics  346.30
2      P003  ComfortLiving Biography        Books   44.12
3      P004            BookWorld Rug   Home Decor   95.69
4      P005          TechPro T-Shirt     Clothing  429.31
          Quantity   TotalValue       Price
count  1000.000000  1000.000000  1000.00000
mean      2.537000   689.995560   272.55407
std       1.117981   493.144478   140.73639
min       1.000000    16.080000    16.08000
25%       2.000000   295.295000

In [5]:
print(customers.isnull().sum())
print(products.duplicated().sum())


CustomerID      0
CustomerName    0
Region          0
SignupDate      0
dtype: int64
0


In [6]:
# Drop rows with missing critical data
customers.dropna(inplace=True)
products.dropna(inplace=True)
transactions.dropna(inplace=True)


In [7]:
customers.drop_duplicates(inplace=True)
products.drop_duplicates(inplace=True)
transactions.drop_duplicates(inplace=True)


In [8]:
# Count customers by region
print(customers['Region'].value_counts())


Region
South America    59
Europe           50
North America    46
Asia             45
Name: count, dtype: int64


In [9]:
# Count product categories
print(products['Category'].value_counts())


Category
Books          26
Electronics    26
Clothing       25
Home Decor     23
Name: count, dtype: int64


In [10]:
# Extract month from TransactionDate
transactions['Month'] = pd.to_datetime(transactions['TransactionDate']).dt.month

# Group by month to check monthly trends
print(transactions.groupby('Month')['TotalValue'].sum())


Month
1     66376.39
2     51459.27
3     47828.73
4     57519.06
5     64527.74
6     48771.18
7     71366.39
8     63436.74
9     70603.75
10    47063.22
11    38224.37
12    62818.72
Name: TotalValue, dtype: float64


region_distribution = customers['Region'].value_counts()
print(region_distribution)


In [12]:
top_categories = transactions.merge(products, on='ProductID').groupby('Category')['Quantity'].sum().sort_values(ascending=False)
print(top_categories)


Category
Books          681
Home Decor     639
Electronics    627
Clothing       590
Name: Quantity, dtype: int64


In [14]:
top_customers = transactions.groupby('CustomerID')['TotalValue'].sum().sort_values(ascending=False).head(5)
print(top_customers)


CustomerID
C0141    10673.87
C0054     8040.39
C0065     7663.70
C0156     7634.45
C0082     7572.91
Name: TotalValue, dtype: float64


In [15]:
high_revenue_products = transactions.groupby('ProductID')['TotalValue'].sum().sort_values(ascending=False).head(5)
print(high_revenue_products)


ProductID
P029    19513.80
P079    17946.91
P048    17905.20
P020    15060.92
P062    14592.24
Name: TotalValue, dtype: float64


In [17]:
!pip install fpdf


Collecting fpdf
  Downloading fpdf-1.7.2.tar.gz (39 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: fpdf
  Building wheel for fpdf (setup.py) ... [?25l[?25hdone
  Created wheel for fpdf: filename=fpdf-1.7.2-py2.py3-none-any.whl size=40704 sha256=702ef75677ba8ab0fb44ae63a03790b006677863361d16d68f9ab6a741e3d33f
  Stored in directory: /root/.cache/pip/wheels/65/4f/66/bbda9866da446a72e206d6484cd97381cbc7859a7068541c36
Successfully built fpdf
Installing collected packages: fpdf
Successfully installed fpdf-1.7.2


In [18]:
from fpdf import FPDF

# Initialize PDF
pdf = FPDF()
pdf.add_page()
pdf.set_font("Arial", size=12)

# Title
pdf.set_font("Arial", size=16, style="B")
pdf.cell(200, 10, txt="EDA Business Insights", ln=True, align="C")
pdf.ln(10)

# Add Insights
insights = [
    "1. Regional Focus: The majority of customers are located in [Top Region], contributing significantly to the customer base. Marketing efforts should prioritize this region to maximize reach.",
    "2. Product Popularity: The [Top Category] category has the highest sales volume, indicating a strong customer preference. Expanding product options in this category could drive additional sales.",
    "3. Seasonality in Sales: Sales peak during [Top Month(s)], highlighting strong seasonality. Running promotional campaigns and discounts during these months could capitalize on customer demand.",
    "4. High-Value Customers: The top 5 customers collectively contribute a significant percentage of the total revenue. Implementing a loyalty program tailored to these high-value customers could enhance retention and profitability.",
    "5. Top Revenue-Generating Products: Products such as [Top Products] generate the highest revenue. These should be prioritized for stock availability, pricing strategies, and premium marketing efforts."
]

for insight in insights:
    pdf.multi_cell(0, 10, txt=insight)
    pdf.ln(5)

# Save PDF
pdf.output("Utkarsh_Alshi_EDA.pdf")


''

In [2]:
!pip install fpdf


Collecting fpdf
  Downloading fpdf-1.7.2.tar.gz (39 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: fpdf
  Building wheel for fpdf (setup.py) ... [?25l[?25hdone
  Created wheel for fpdf: filename=fpdf-1.7.2-py2.py3-none-any.whl size=40704 sha256=4763f7796943275f735aedf31bf0450d6a3801b3219a85ded624082b79861870
  Stored in directory: /root/.cache/pip/wheels/65/4f/66/bbda9866da446a72e206d6484cd97381cbc7859a7068541c36
Successfully built fpdf
Installing collected packages: fpdf
Successfully installed fpdf-1.7.2


In [4]:
from fpdf import FPDF

class PDF(FPDF):
    def header(self):
        self.set_font("Arial", style="B", size=14)
        self.set_text_color(33, 37, 41)  # Dark gray color
        self.cell(0, 10, "Exploratory Data Analysis Report", border=0, ln=True, align='C')
        self.ln(10)

    def footer(self):
        self.set_y(-15)
        self.set_font("Arial", style="I", size=10)
        self.set_text_color(169, 169, 169)  # Light gray color
        self.cell(0, 10, f"Page {self.page_no()}", align='C')

    def chapter_title(self, title):
        self.set_font("Arial", style="B", size=12)
        self.set_text_color(0, 102, 204)  # Blue color
        self.cell(0, 10, title, ln=True, align='L')
        self.ln(5)

    def chapter_body(self, body):
        self.set_font("Arial", size=11)
        self.set_text_color(33, 37, 41)  # Dark gray color
        self.multi_cell(0, 8, body)
        self.ln()

# Initialize the PDF
pdf = PDF()
pdf.set_auto_page_break(auto=True, margin=15)
pdf.add_page()

# Cover Page
pdf.set_font("Arial", style="B", size=22)
pdf.set_text_color(33, 37, 41)  # Dark gray
pdf.cell(0, 10, "Exploratory Data Analysis Report", ln=True, align='C')
pdf.ln(20)
pdf.set_font("Arial", size=14)
pdf.cell(0, 10, "Prepared by: Utkarsh Alshi", ln=True, align='C')
pdf.ln(5)
pdf.cell(0, 10, "Date: January 27, 2025", ln=True, align='C')
pdf.ln(40)

# Add Chapters
pdf.set_font("Arial", style="B", size=16)
pdf.set_text_color(0, 0, 0)
pdf.cell(0, 10, "Business Insights and Recommendations", ln=True, align='L')
pdf.ln(10)

# Chapter 1: Regional Performance
pdf.chapter_title("1. Regional Performance:")
body1 = (
    "South America is the most dominant region in terms of customer base and sales contributions. "
    "This region has a high concentration of customers, suggesting significant revenue potential.\n\n"
    "Recommendation:\n"
    "  - Increase marketing investments and localized campaigns in South America.\n"
    "  - Strengthen the supply chain and customer service for this region to boost customer satisfaction."
)
pdf.chapter_body(body1)

# Chapter 2: Product Popularity
pdf.chapter_title("2. Product Popularity:")
body2 = (
    "The 'Books' category outperformed all others in terms of sales volume, highlighting it as a customer favorite.\n\n"
    "Recommendation:\n"
    "  - Expand the 'Books' category by introducing new titles and exclusive collections.\n"
    "  - Offer bundling discounts and creative promotions to attract more buyers."
)
pdf.chapter_body(body2)

# Chapter 3: Seasonal Trends
pdf.chapter_title("3. Seasonal Trends:")
body3 = (
    "July emerged as the month with the highest sales activity, demonstrating a clear seasonality in demand.\n\n"
    "Recommendation:\n"
    "  - Plan strategic promotional campaigns, such as discounts and targeted advertisements, for July.\n"
    "  - Ensure inventory levels are prepared in advance to avoid stockouts during this peak period."
)
pdf.chapter_body(body3)

# Chapter 4: High-Value Customers
pdf.chapter_title("4. High-Value Customers:")
body4 = (
    "Customer C0141 alone contributes significantly to the overall revenue, representing a high-value segment.\n\n"
    "Recommendation:\n"
    "  - Develop a personalized loyalty program tailored to high-value customers like C0141.\n"
    "  - Offer exclusive benefits, early access to sales, and personalized communication to enhance retention."
)
pdf.chapter_body(body4)

# Chapter 5: Revenue-Generating Products
pdf.chapter_title("5. Revenue-Generating Products:")
body5 = (
    "Product P029 has the highest revenue generation, indicating strong appeal among customers.\n\n"
    "Recommendation:\n"
    "  - Ensure consistent stock availability for Product P029 to meet demand.\n"
    "  - Invest in premium marketing strategies to amplify demand and analyze similar products for upselling opportunities."
)
pdf.chapter_body(body5)

# Save the PDF
output_path = "Enhanced_EDA_Report_Utkarsh_Alshi.pdf"
pdf.output(output_path)
print(f"PDF successfully created: {output_path}")


PDF successfully created: Enhanced_EDA_Report_Utkarsh_Alshi.pdf


In [5]:
!ls


Enhanced_EDA_Report_Utkarsh_Alshi.pdf  sample_data


In [6]:
from google.colab import files
files.download("Enhanced_EDA_Report_Utkarsh_Alshi.pdf")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>