In [1]:
# Step 1: Import required libraries
import pandas as pd
import numpy as np

# Display settings
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)


In [2]:
# Step 2: Create sales dataset for River Island and Zara

sales_data = {
    "Company": [
        "River Island", "River Island", "River Island", "River Island",
        "ZARA", "ZARA", "ZARA", "ZARA"
    ],
    
    "Year": [
        2021, 2022, 2023, 2024,
        2021, 2022, 2023, 2024
    ],
    
    "Revenue (£ millions)": [
        750, 701, 578, 537,      # River Island approx values
        19000, 21000, 22540, 24730   # Zara approx values
    ],
    
    "Cost (£ millions)": [
        520, 510, 470, 480,
        9500, 10200, 11000, 11800
    ],
    
    "Operating Profit (£ millions)": [
        80, 60, -33, -65,
        4200, 5000, 5400, 6000
    ]
}

sales_df = pd.DataFrame(sales_data)

sales_df


Unnamed: 0,Company,Year,Revenue (£ millions),Cost (£ millions),Operating Profit (£ millions)
0,River Island,2021,750,520,80
1,River Island,2022,701,510,60
2,River Island,2023,578,470,-33
3,River Island,2024,537,480,-65
4,ZARA,2021,19000,9500,4200
5,ZARA,2022,21000,10200,5000
6,ZARA,2023,22540,11000,5400
7,ZARA,2024,24730,11800,6000


In [3]:
# Step 3: Calculate profit margin

sales_df["Profit Margin (%)"] = (
    sales_df["Operating Profit (£ millions)"] /
    sales_df["Revenue (£ millions)"]
) * 100

sales_df


Unnamed: 0,Company,Year,Revenue (£ millions),Cost (£ millions),Operating Profit (£ millions),Profit Margin (%)
0,River Island,2021,750,520,80,10.666667
1,River Island,2022,701,510,60,8.559201
2,River Island,2023,578,470,-33,-5.709343
3,River Island,2024,537,480,-65,-12.104283
4,ZARA,2021,19000,9500,4200,22.105263
5,ZARA,2022,21000,10200,5000,23.809524
6,ZARA,2023,22540,11000,5400,23.957409
7,ZARA,2024,24730,11800,6000,24.26203


In [4]:
# Step 4: Create demographic dataset

demographics_data = {
    
    "Company": [
        "River Island", "River Island", "River Island", "River Island",
        "ZARA", "ZARA", "ZARA", "ZARA"
    ],
    
    "Age Group": [
        "18-24", "25-34", "35-44", "45-54",
        "18-24", "25-34", "35-44", "45-54"
    ],
    
    "Customers (%)": [
        35, 30, 20, 15,   # River Island
        40, 35, 15, 10    # Zara
    ],
    
    "Gender Female (%)": [
        65, 62, 58, 55,
        70, 68, 60, 55
    ],
    
    "Gender Male (%)": [
        35, 38, 42, 45,
        30, 32, 40, 45
    ],
    
    "Average Income (£)": [
        22000, 28000, 35000, 42000,
        24000, 30000, 38000, 45000
    ]
}

demographics_df = pd.DataFrame(demographics_data)

demographics_df


Unnamed: 0,Company,Age Group,Customers (%),Gender Female (%),Gender Male (%),Average Income (£)
0,River Island,18-24,35,65,35,22000
1,River Island,25-34,30,62,38,28000
2,River Island,35-44,20,58,42,35000
3,River Island,45-54,15,55,45,42000
4,ZARA,18-24,40,70,30,24000
5,ZARA,25-34,35,68,32,30000
6,ZARA,35-44,15,60,40,38000
7,ZARA,45-54,10,55,45,45000


In [5]:
# Step 5: Merge datasets

merged_df = pd.merge(
    sales_df,
    demographics_df,
    on="Company"
)

merged_df.head(10)


Unnamed: 0,Company,Year,Revenue (£ millions),Cost (£ millions),Operating Profit (£ millions),Profit Margin (%),Age Group,Customers (%),Gender Female (%),Gender Male (%),Average Income (£)
0,River Island,2021,750,520,80,10.666667,18-24,35,65,35,22000
1,River Island,2021,750,520,80,10.666667,25-34,30,62,38,28000
2,River Island,2021,750,520,80,10.666667,35-44,20,58,42,35000
3,River Island,2021,750,520,80,10.666667,45-54,15,55,45,42000
4,River Island,2022,701,510,60,8.559201,18-24,35,65,35,22000
5,River Island,2022,701,510,60,8.559201,25-34,30,62,38,28000
6,River Island,2022,701,510,60,8.559201,35-44,20,58,42,35000
7,River Island,2022,701,510,60,8.559201,45-54,15,55,45,42000
8,River Island,2023,578,470,-33,-5.709343,18-24,35,65,35,22000
9,River Island,2023,578,470,-33,-5.709343,25-34,30,62,38,28000


In [6]:
# Step 6: Add geographic distribution

geo_data = {
    "Region": ["UK", "Europe", "Asia", "North America"],
    
    "River Island Customers (%)": [70, 15, 5, 10],
    
    "ZARA Customers (%)": [20, 35, 25, 20]
}

geo_df = pd.DataFrame(geo_data)

geo_df


Unnamed: 0,Region,River Island Customers (%),ZARA Customers (%)
0,UK,70,20
1,Europe,15,35
2,Asia,5,25
3,North America,10,20


In [7]:
# Step 7: Save files

sales_df.to_csv("sales_dataset.csv", index=False)
demographics_df.to_csv("demographics_dataset.csv", index=False)
merged_df.to_csv("full_dataset.csv", index=False)

print("Datasets saved successfully")


Datasets saved successfully
