In [1]:
# Step 2: Import libraries

import pandas as pd
import numpy as np
import random

# Set seed for reproducibility
np.random.seed(42)
random.seed(42)

In [2]:
# Step 3: Create list of marketing business books

books = [
    "Digital Marketing Strategy",
    "Content Marketing Mastery",
    "Social Media Marketing Guide",
    "Marketing Analytics Essentials",
    "Brand Management Handbook",
    "Email Marketing Secrets",
    "Growth Hacking Techniques",
    "Consumer Behavior Explained",
    "Marketing Data Science",
    "SEO Optimization Blueprint"
]

authors = [
    "John Smith",
    "Sarah Johnson",
    "Michael Brown",
    "Emily Davis",
    "David Wilson",
    "Laura Taylor",
    "James Anderson",
    "Olivia Thomas",
    "Daniel Moore",
    "Sophia Martin"
]

In [3]:
# Step 4: Define demographic categories

age_groups = ["18-25", "26-35", "36-45", "46-60", "60+"]
genders = ["Male", "Female"]
income_levels = ["Low", "Medium", "High"]
education_levels = ["High School", "Bachelor", "Master", "PhD"]
regions = ["North", "South", "East", "West", "Central"]

In [4]:
# Step 5: Create dataset

data = []

for i in range(500):  # 500 records
    
    book_index = random.randint(0, len(books)-1)
    
    units_sold = random.randint(50, 500)
    
    price = round(random.uniform(15, 60), 2)
    
    cost = round(price * random.uniform(0.4, 0.7), 2)
    
    revenue = round(units_sold * price, 2)
    
    total_cost = round(units_sold * cost, 2)
    
    profit = round(revenue - total_cost, 2)
    
    profit_margin = round((profit / revenue) * 100, 2)
    
    record = {
        "Book_ID": i+1,
        "Book_Title": books[book_index],
        "Author": authors[book_index],
        "Units_Sold": units_sold,
        "Price": price,
        "Cost": cost,
        "Revenue": revenue,
        "Total_Cost": total_cost,
        "Profit": profit,
        "Profit_Margin (%)": profit_margin,
        "Age_Group": random.choice(age_groups),
        "Gender": random.choice(genders),
        "Income_Level": random.choice(income_levels),
        "Education_Level": random.choice(education_levels),
        "Region": random.choice(regions)
    }
    
    data.append(record)

In [5]:
# Step 6: Convert to DataFrame

df = pd.DataFrame(data)

# Show first 5 rows
df.head()

Unnamed: 0,Book_ID,Book_Title,Author,Units_Sold,Price,Cost,Revenue,Total_Cost,Profit,Profit_Margin (%),Age_Group,Gender,Income_Level,Education_Level,Region
0,1,Content Marketing Mastery,Sarah Johnson,62,48.37,22.9,2998.94,1419.8,1579.14,52.66,26-35,Male,High,High School,Central
1,2,Growth Hacking Techniques,James Anderson,66,16.34,7.61,1078.44,502.26,576.18,53.43,60+,Male,High,Bachelor,Central
2,3,Growth Hacking Techniques,James Anderson,162,35.21,17.02,5704.02,2757.24,2946.78,51.66,18-25,Male,High,PhD,East
3,4,Brand Management Handbook,David Wilson,129,24.69,15.53,3185.01,2003.37,1181.64,37.1,18-25,Male,Medium,High School,East
4,5,Email Marketing Secrets,Laura Taylor,359,26.9,11.11,9657.1,3988.49,5668.61,58.7,46-60,Male,Medium,High School,Central


In [6]:
# Step 7: Save dataset

df.to_csv("marketing_business_books_dataset.csv", index=False)

print("Dataset saved successfully!")

Dataset saved successfully!


In [7]:
# Step 8: Dataset info

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 500 entries, 0 to 499
Data columns (total 15 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Book_ID            500 non-null    int64  
 1   Book_Title         500 non-null    object 
 2   Author             500 non-null    object 
 3   Units_Sold         500 non-null    int64  
 4   Price              500 non-null    float64
 5   Cost               500 non-null    float64
 6   Revenue            500 non-null    float64
 7   Total_Cost         500 non-null    float64
 8   Profit             500 non-null    float64
 9   Profit_Margin (%)  500 non-null    float64
 10  Age_Group          500 non-null    object 
 11  Gender             500 non-null    object 
 12  Income_Level       500 non-null    object 
 13  Education_Level    500 non-null    object 
 14  Region             500 non-null    object 
dtypes: float64(6), int64(2), object(7)
memory usage: 58.7+ KB


In [8]:
total_revenue = df["Revenue"].sum()

print("Total Revenue:", total_revenue)

Total Revenue: 5333622.84


In [9]:
total_profit = df["Profit"].sum()

print("Total Profit:", total_profit)

Total Profit: 2378099.09


In [10]:
avg_margin = df["Profit_Margin (%)"].mean()

print("Average Profit Margin:", round(avg_margin,2), "%")

Average Profit Margin: 44.62 %


In [11]:
revenue_by_age = df.groupby("Age_Group")["Revenue"].sum()

print(revenue_by_age)

Age_Group
18-25    1218339.49
26-35    1009900.17
36-45     945866.57
46-60    1107333.40
60+      1052183.21
Name: Revenue, dtype: float64


In [12]:
revenue_by_gender = df.groupby("Gender")["Revenue"].sum()

print(revenue_by_gender)

Gender
Female    2426379.79
Male      2907243.05
Name: Revenue, dtype: float64


In [13]:
revenue_by_region = df.groupby("Region")["Revenue"].sum()

print(revenue_by_region)

Region
Central    1017866.49
East       1025443.71
North      1060886.35
South       999879.46
West       1229546.83
Name: Revenue, dtype: float64


In [14]:
margin_by_book = df.groupby("Book_Title")["Profit_Margin (%)"].mean()

print(margin_by_book)

Book_Title
Brand Management Handbook         44.397292
Consumer Behavior Explained       43.309273
Content Marketing Mastery         45.114200
Digital Marketing Strategy        46.283500
Email Marketing Secrets           44.295833
Growth Hacking Techniques         44.256852
Marketing Analytics Essentials    47.050182
Marketing Data Science            45.788727
SEO Optimization Blueprint        43.109000
Social Media Marketing Guide      42.564727
Name: Profit_Margin (%), dtype: float64


In [15]:
top_book = df.groupby("Book_Title")["Units_Sold"].sum().sort_values(ascending=False)

print(top_book)

Book_Title
Marketing Analytics Essentials    16618
Social Media Marketing Guide      15186
Growth Hacking Techniques         15183
Marketing Data Science            14803
Brand Management Handbook         14249
Email Marketing Secrets           13704
Consumer Behavior Explained       13336
Content Marketing Mastery         13023
Digital Marketing Strategy        10746
SEO Optimization Blueprint         9552
Name: Units_Sold, dtype: int64
