In [2]:
import pandas as pd
import random

# Real condition data and weights from DWP 2023–2024
conditions = [
    "Not recorded", "Arms or hands", "Legs or feet", "Back or neck",
    "Stomach, liver, kidney or digestion", "Heart, blood, blood pressure or circulation", "Chest or breathing", 
    "Skin conditions and severe disfigurement", "Deaf or hard of hearing", "Difficulty in seeing", "Difficulty in speaking",
    "Learning disability", "Progressive illness", "Dyslexia", "Epilepsy", "Diabetes", "Mental health condition", 
    "Cerebral Palsy", "Spina Bifida", "Other"
]

conditions_weights = [
    0.003515, 0.014359, 0.027045, 0.052105, 0.001986, 0.004430, 0.003819, 0.000764, 0.077741,
    0.047359, 0.001375, 0.103134, 0.021542, 0.095787, 0.016496, 0.004430, 0.336335, 0.005807, 0.001375, 0.215552
]

# Job roles with weights (ONS 2023 data)
job_roles = [
    "Programmers and Software Development Professionals", "Care Workers and Home Carers", 
    "Administrative and Clerical Assistants", "Secondary Education Teaching Professionals",
    "Financial Managers and Directors", "Warehouse Operatives", "Other Nursing Professionals", 
    "Sales Accounts and Business Development Managers", "Business and Financial Project Management Professionals",
    "Sales and Retail Assistants", "Book-Keepers, Payroll Managers, and Wages Clerks", 
    "Primary Education Teaching Professionals", "Managers and Directors in Retail and Wholesale",
    "Finance and Investment Analysts and Advisers", "Large Goods Vehicle (LGV) Drivers", 
    "IT Managers", "Higher Education Teaching Professionals", "Production Managers and Directors in Manufacturing",
    "Customer Service Occupations", "IT Business Analysts, Architects, and Systems Designers"
]

job_weights = [
    0.081586, 0.067079, 0.058145, 0.054685, 0.048133, 0.046883, 0.045732, 0.044703, 0.042443, 0.041998,
    0.041922, 0.041831, 0.037821, 0.036236, 0.035280, 0.034324, 0.031798, 0.031599, 0.030116, 0.027668
]

# Support services with weights and cost estimates where costs are linked with DWP data of no. individuals receiving element and expenditure on element
supports = [
    ("Mental health support plan", 10, 770),
    ("Support worker", 10, 10000),
    ("Taxi to work", 10, 3233),
    ("Speech-to-text software", 3, 1200),
    ("Screen reader", 3, 1000),
    ("Flexible hours", 2, 500),
    ("Noise-cancelling headphones", 2, 300),
    ("Ramp installation", 2, 2000),
    ("Adapted desk", 1, 800),
    ("BSL interpreter", 1, 2800),
    ("Job coach", 1, 1600),
    ("Ergonomic equipment", 1, 750),
    ("Visual alarms", 1, 700),
    ("Voice recognition software", 1, 900),
    ("Accessible toilet adjustments", 1, 2500)
]

# Extract lists
support_items_list = [s[0] for s in supports]
support_weights = [s[1] for s in supports]
support_costs_dict = {s[0]: s[2] for s in supports}

# Generate dummy data
data = []
for i in range(60000):
    name = f"User_{i+1}"
    condition = random.choices(conditions, weights=conditions_weights, k=1)[0]
    job_role = random.choices(job_roles, weights=job_weights, k=1)[0]
    selected_supports = random.choices(support_items_list, weights=support_weights, k=random.randint(1, 3))
    unique_supports = list(set(selected_supports))  # Remove duplicates
    support_provided = ", ".join(unique_supports)
    approx_cost = sum(support_costs_dict[s] for s in unique_supports)
    notes = f"Support tailored for {condition.lower()} in role as {job_role.lower()}."
    data.append([name, condition, job_role, support_provided, approx_cost, notes])

# Create and export the DataFrame
df = pd.DataFrame(data, columns=[
    "Name", "Condition", "Job Role", "Support Provided", "Approx. Cost (£)", "Notes"
])
df.to_csv("Access_to_Work_Cost_Weighted_2000.csv", index=False)