In [1]:
import pandas as pd
import numpy as np

# -----------------------------
# Mock Sales Dataset
# -----------------------------
data = {
    "date": pd.date_range(start="2024-01-01", periods=180, freq="D"),
    "book_title": ["Business Growth Playbook"] * 180,
    "platform": np.random.choice(
        ["Amazon KDP", "IngramSpark", "Barnes & Noble"], size=180, p=[0.6, 0.25, 0.15]
    ),
    "format": np.random.choice(
        ["Ebook", "Paperback", "Audiobook"], size=180, p=[0.45, 0.4, 0.15]
    ),
    "units_sold": np.random.poisson(lam=8, size=180),
    "list_price": np.random.choice([9.99, 14.99, 19.99], size=180),
    "platform_fee_pct": np.random.choice([0.30, 0.40, 0.55], size=180),
    "page_views": np.random.randint(50, 500, size=180),
    "ad_spend": np.random.uniform(5, 60, size=180),
    "reviews": np.random.poisson(lam=0.3, size=180),
    "avg_rating": np.round(np.random.uniform(3.8, 4.8, size=180), 2)
}

df = pd.DataFrame(data)

# Revenue calculations
df["gross_revenue"] = df["units_sold"] * df["list_price"]
df["royalty_earned"] = df["gross_revenue"] * (1 - df["platform_fee_pct"])


In [2]:
units_vs_revenue = df.groupby("format")[["units_sold", "gross_revenue"]].sum()
units_vs_revenue


Unnamed: 0_level_0,units_sold,gross_revenue
format,Unnamed: 1_level_1,Unnamed: 2_level_1
Audiobook,230,3387.7
Ebook,504,7489.96
Paperback,656,10143.44


In [3]:
royalty_summary = df.groupby("platform")["royalty_earned"].sum().sort_values(ascending=False)
royalty_summary


platform
Amazon KDP        6775.3695
IngramSpark       3178.5140
Barnes & Noble    2193.0445
Name: royalty_earned, dtype: float64

In [4]:
df["7_day_sales_velocity"] = (
    df.groupby("platform")["units_sold"]
      .rolling(7)
      .mean()
      .reset_index(level=0, drop=True)
)


In [5]:
df["month"] = df["date"].dt.month
monthly_sales = df.groupby("month")["units_sold"].sum()

monthly_sales


month
1    243
2    248
3    234
4    228
5    240
6    197
Name: units_sold, dtype: int64

In [6]:
platform_format = (
    df.groupby(["platform", "format"])
      .agg({
          "units_sold": "sum",
          "gross_revenue": "sum",
          "royalty_earned": "sum"
      })
      .reset_index()
)

platform_format


Unnamed: 0,platform,format,units_sold,gross_revenue,royalty_earned
0,Amazon KDP,Audiobook,145,2173.55,1236.1695
1,Amazon KDP,Ebook,314,4756.86,2551.802
2,Amazon KDP,Paperback,322,5201.78,2987.398
3,Barnes & Noble,Audiobook,34,474.66,249.3135
4,Barnes & Noble,Ebook,78,1164.22,657.7995
5,Barnes & Noble,Paperback,136,2113.64,1285.9315
6,IngramSpark,Audiobook,51,739.49,444.1865
7,IngramSpark,Ebook,112,1568.88,957.814
8,IngramSpark,Paperback,198,2828.02,1776.5135


In [7]:
df["conversion_rate"] = df["units_sold"] / df["page_views"]
df["conversion_rate"].mean()


np.float64(0.04083998808766726)

In [9]:
import numpy as np

# Calculate marketing ROI
df["marketing_roi"] = (
    (df["royalty_earned"] - df["ad_spend"]) / df["ad_spend"]
)

# Replace infinite values with NaN
df["marketing_roi"] = df["marketing_roi"].replace(
    [np.inf, -np.inf],
    np.nan
)

# Calculate mean ROI (NaNs ignored automatically)
mean_marketing_roi = df["marketing_roi"].mean()


In [10]:
rating_analysis = df.groupby("platform").agg({
    "reviews": "sum",
    "avg_rating": "mean"
})

rating_analysis


Unnamed: 0_level_0,reviews,avg_rating
platform,Unnamed: 1_level_1,Unnamed: 2_level_1
Amazon KDP,35,4.307048
Barnes & Noble,6,4.315357
IngramSpark,10,4.25766


In [11]:
# Assume starting inventory
starting_inventory = 2000

print_sales = df[df["format"] == "Paperback"]["units_sold"].sum()
sell_through_rate = print_sales / starting_inventory

sell_through_rate


np.float64(0.328)

In [12]:
df["year"] = df["date"].dt.year

yoy_sales = df.groupby(["year", "month"])["units_sold"].sum().unstack(0)
yoy_sales


year,2024
month,Unnamed: 1_level_1
1,243
2,248
3,234
4,228
5,240
6,197
