In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


In [2]:
# bookstore dataset

bookstores = pd.DataFrame({
    "store_id": range(1, 21),
    "company": np.random.choice(["Waterstones", "WHSmith"], 20, p=[0.6, 0.4]),
    "city": np.random.choice([
        "London", "Manchester", "Liverpool", "Birmingham",
        "Leeds", "Bristol", "Oxford", "Cambridge"
    ], 20),
    "weekly_customers": np.random.randint(500, 5000, 20),
    "avg_book_price": np.random.uniform(8, 15, 20)
})

bookstores.head()


Unnamed: 0,store_id,company,city,weekly_customers,avg_book_price
0,1,Waterstones,Cambridge,3327,12.246813
1,2,WHSmith,Leeds,3961,11.280495
2,3,Waterstones,Birmingham,2401,13.606817
3,4,Waterstones,London,4462,14.037615
4,5,WHSmith,London,2455,9.368226


In [3]:
# Jo Nesbo sales simulation

bookstores["jo_nesbo_sales_percent"] = np.random.uniform(0.02, 0.08, 20)

bookstores["jo_nesbo_books_sold_weekly"] = (
    bookstores["weekly_customers"] *
    bookstores["jo_nesbo_sales_percent"]
).astype(int)

bookstores.head()


Unnamed: 0,store_id,company,city,weekly_customers,avg_book_price,jo_nesbo_sales_percent,jo_nesbo_books_sold_weekly
0,1,Waterstones,Cambridge,3327,12.246813,0.028136,93
1,2,WHSmith,Leeds,3961,11.280495,0.055587,220
2,3,Waterstones,Birmingham,2401,13.606817,0.074359,178
3,4,Waterstones,London,4462,14.037615,0.023875,106
4,5,WHSmith,London,2455,9.368226,0.065035,159


In [4]:
demographics = pd.DataFrame({
    "store_id": bookstores["store_id"],
    "avg_customer_age": np.random.randint(25, 65, 20),
    "avg_income": np.random.randint(20000, 70000, 20),
    "education_level": np.random.choice(
        ["High School", "Bachelor", "Master", "PhD"], 20,
        p=[0.3, 0.4, 0.2, 0.1]
    )
})

demographics.head()


Unnamed: 0,store_id,avg_customer_age,avg_income,education_level
0,1,36,44365,High School
1,2,34,23621,Bachelor
2,3,64,53829,High School
3,4,42,48025,Bachelor
4,5,48,59665,Bachelor


In [10]:
import pandas as pd

# Example dataset
dataset = pd.DataFrame({
    "avg_book_price": [10, 12, 15, 8, 20],
    "jo_nesbo_books_sold_weekly": [100, 80, 120, 90, 60]
})

profit_margin = 0.40

dataset["profit_per_book"] = dataset["avg_book_price"] * profit_margin

dataset["weekly_profit"] = (
    dataset["jo_nesbo_books_sold_weekly"] * dataset["profit_per_book"]
)

dataset["yearly_profit"] = dataset["weekly_profit"] * 52

print(dataset.head())


   avg_book_price  jo_nesbo_books_sold_weekly  profit_per_book  weekly_profit  \
0              10                         100              4.0          400.0   
1              12                          80              4.8          384.0   
2              15                         120              6.0          720.0   
3               8                          90              3.2          288.0   
4              20                          60              8.0          480.0   

   yearly_profit  
0        20800.0  
1        19968.0  
2        37440.0  
3        14976.0  
4        24960.0  


In [12]:
dataset.to_csv("jo_nesbo_england_bookstore_dataset.csv", index=False)
