# EcomScrape Analysis

Quick exploration of the latest scraped products. Make sure you have run the scraper so that `outputs/processed/latest_products.json` exists.

In [None]:
from pathlib import Path

import matplotlib.pyplot as plt
import pandas as pd

%matplotlib inline

data_path = Path("outputs/processed/latest_products.json")
if not data_path.exists():
    raise FileNotFoundError(f"Data file not found: {data_path}. Run the scraper first.")

df = pd.read_json(data_path)
df.head()

## Basic counts

Show the number of products and a preview.

In [None]:
print(f"Number of products: {len(df)}")
df.head(10)

## Top categories

Count products per category to see where coverage is strongest.

In [None]:
category_counts = df.get("category", pd.Series(dtype=str)).fillna("unknown").value_counts()
category_counts.head(10)

## Average price per category

Compute mean of `price_current` for each category.

In [None]:
avg_price = (
    df.dropna(subset=["price_current"])
    .groupby(df.get("category", pd.Series(dtype=str)).fillna("unknown"))
    ["price_current"]
    .mean()
    .sort_values(ascending=False)
)
avg_price.head(10).to_frame("avg_price")

## Price distribution

Histogram of current prices.

In [None]:
fig, ax = plt.subplots(figsize=(8, 4))
ax.hist(df["price_current"].dropna(), bins=20, color="#4C78A8", edgecolor="white")
ax.set_title("Price distribution")
ax.set_xlabel("Price (currency as scraped)")
ax.set_ylabel("Count")
plt.tight_layout()
plt.show()

## Counts per category

Bar chart for the top categories by product count.

In [None]:
top_counts = category_counts.head(10)
fig, ax = plt.subplots(figsize=(8, 4))
top_counts.plot(kind="bar", ax=ax, color="#72B7B2")
ax.set_title("Top categories by count")
ax.set_xlabel("Category")
ax.set_ylabel("Count")
plt.xticks(rotation=45, ha="right")
plt.tight_layout()
plt.show()