In [1]:
# Step 2: Import libraries
import pandas as pd
import numpy as np
import random

print("Libraries imported successfully")

Libraries imported successfully


In [2]:
# Step 3: Define number of records
num_records = 1000

print("Number of records:", num_records)

Number of records: 1000


In [3]:
# Step 4: Create book-related data

book_titles = [
    "Business Strategy", "Startup Guide", "Marketing 101",
    "Financial Freedom", "Leadership Secrets", "Entrepreneur Mindset",
    "Corporate Success", "Digital Marketing", "Investment Basics",
    "Management Skills"
]

authors = [
    "John Smith", "Sarah Johnson", "Michael Brown",
    "Emily Davis", "Robert Wilson", "Jessica Taylor"
]

genres = ["Business", "Finance", "Marketing", "Leadership"]

book_data = {
    "Book_ID": np.arange(1, num_records + 1),
    "Book_Title": np.random.choice(book_titles, num_records),
    "Author": np.random.choice(authors, num_records),
    "Genre": np.random.choice(genres, num_records)
}

book_df = pd.DataFrame(book_data)

book_df.head()

Unnamed: 0,Book_ID,Book_Title,Author,Genre
0,1,Investment Basics,John Smith,Marketing
1,2,Digital Marketing,Michael Brown,Leadership
2,3,Digital Marketing,Sarah Johnson,Leadership
3,4,Entrepreneur Mindset,Jessica Taylor,Finance
4,5,Startup Guide,John Smith,Leadership


In [4]:
# Step 5: Create customer demographics

customer_data = {
    "Customer_ID": np.random.randint(10000, 99999, num_records),
    "Age": np.random.randint(18, 70, num_records),
    "Gender": np.random.choice(["Male", "Female"], num_records),
    "Country": np.random.choice(
        ["UK", "USA", "Canada", "Australia", "Germany"], num_records
    ),
    "Income_Level": np.random.choice(
        ["Low", "Medium", "High"], num_records
    )
}

customer_df = pd.DataFrame(customer_data)

customer_df.head()

Unnamed: 0,Customer_ID,Age,Gender,Country,Income_Level
0,65632,57,Female,USA,Medium
1,27290,64,Female,Canada,High
2,36136,34,Female,UK,Low
3,81270,32,Male,UK,Medium
4,12622,40,Male,Australia,Medium


In [5]:
# Step 6: Create sales and profit data

price = np.random.uniform(10, 100, num_records).round(2)

quantity = np.random.randint(1, 10, num_records)

cost = price * np.random.uniform(0.4, 0.8, num_records)

sales = price * quantity

profit = sales - (cost * quantity)

profit_margin = (profit / sales) * 100

sales_data = {
    "Price": price,
    "Quantity": quantity,
    "Cost": cost.round(2),
    "Sales": sales.round(2),
    "Profit": profit.round(2),
    "Profit_Margin (%)": profit_margin.round(2)
}

sales_df = pd.DataFrame(sales_data)

sales_df.head()

Unnamed: 0,Price,Quantity,Cost,Sales,Profit,Profit_Margin (%)
0,26.04,1,17.87,26.04,8.17,31.39
1,14.14,4,6.72,56.56,29.67,52.46
2,84.18,5,51.94,420.9,161.22,38.3
3,84.71,5,54.83,423.55,149.41,35.27
4,78.12,5,31.3,390.6,234.08,59.93


In [6]:
# Step 7: Combine all data into one dataset

business_books_df = pd.concat(
    [book_df, customer_df, sales_df],
    axis=1
)

business_books_df.head()

Unnamed: 0,Book_ID,Book_Title,Author,Genre,Customer_ID,Age,Gender,Country,Income_Level,Price,Quantity,Cost,Sales,Profit,Profit_Margin (%)
0,1,Investment Basics,John Smith,Marketing,65632,57,Female,USA,Medium,26.04,1,17.87,26.04,8.17,31.39
1,2,Digital Marketing,Michael Brown,Leadership,27290,64,Female,Canada,High,14.14,4,6.72,56.56,29.67,52.46
2,3,Digital Marketing,Sarah Johnson,Leadership,36136,34,Female,UK,Low,84.18,5,51.94,420.9,161.22,38.3
3,4,Entrepreneur Mindset,Jessica Taylor,Finance,81270,32,Male,UK,Medium,84.71,5,54.83,423.55,149.41,35.27
4,5,Startup Guide,John Smith,Leadership,12622,40,Male,Australia,Medium,78.12,5,31.3,390.6,234.08,59.93


In [7]:
# Step 8: Add purchase date

business_books_df["Purchase_Date"] = pd.to_datetime(
    np.random.choice(
        pd.date_range("2023-01-01", "2025-12-31"),
        num_records
    )
)

business_books_df.head()

Unnamed: 0,Book_ID,Book_Title,Author,Genre,Customer_ID,Age,Gender,Country,Income_Level,Price,Quantity,Cost,Sales,Profit,Profit_Margin (%),Purchase_Date
0,1,Investment Basics,John Smith,Marketing,65632,57,Female,USA,Medium,26.04,1,17.87,26.04,8.17,31.39,2024-10-08
1,2,Digital Marketing,Michael Brown,Leadership,27290,64,Female,Canada,High,14.14,4,6.72,56.56,29.67,52.46,2024-11-11
2,3,Digital Marketing,Sarah Johnson,Leadership,36136,34,Female,UK,Low,84.18,5,51.94,420.9,161.22,38.3,2023-03-31
3,4,Entrepreneur Mindset,Jessica Taylor,Finance,81270,32,Male,UK,Medium,84.71,5,54.83,423.55,149.41,35.27,2024-06-05
4,5,Startup Guide,John Smith,Leadership,12622,40,Male,Australia,Medium,78.12,5,31.3,390.6,234.08,59.93,2025-10-23


In [8]:
# Step 9: Dataset info

business_books_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 16 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   Book_ID            1000 non-null   int64         
 1   Book_Title         1000 non-null   object        
 2   Author             1000 non-null   object        
 3   Genre              1000 non-null   object        
 4   Customer_ID        1000 non-null   int64         
 5   Age                1000 non-null   int64         
 6   Gender             1000 non-null   object        
 7   Country            1000 non-null   object        
 8   Income_Level       1000 non-null   object        
 9   Price              1000 non-null   float64       
 10  Quantity           1000 non-null   int64         
 11  Cost               1000 non-null   float64       
 12  Sales              1000 non-null   float64       
 13  Profit             1000 non-null   float64       
 14  Profit_Ma

In [9]:
# Step 10: Save dataset

business_books_df.to_csv(
    "business_books_sales_dataset.csv",
    index=False
)

print("Dataset saved successfully")

Dataset saved successfully


In [10]:
# Step 11: Basic analysis

print("Total Sales:", business_books_df["Sales"].sum())

print("Total Profit:", business_books_df["Profit"].sum())

print("Average Profit Margin:",
      business_books_df["Profit_Margin (%)"].mean())

Total Sales: 285507.94999999995
Total Profit: 113946.04000000001
Average Profit Margin: 40.11762
