<link href="https://fonts.googleapis.com/css2?family=Inter:wght@400&display=swap" rel="stylesheet">
<div style="font-family: 'Inter'; font-size: 24px; color: #749857;"><B>🍏 Show Up</b> for Health</div>
<div style="font-family: 'Inter'; font-size: 14px; color: #7a7979;"><B>Predicting Missed Apppointment in Primary Care</b> - Brompton Health PCN</div>

[janduplessis883](https://github.com/janduplessis883)<BR>
[AlexAlexRose](https://github.com/AlexAlexRose)<BR>
[FabySp](https://github.com/FabySp)<BR>
[mogleyza](https://github.com/mogleyza)

# GitHub Username | Notebook Title 

### Importing Libraries

In [9]:
# Importing default Libraries
import matplotlib.pyplot as plt
import pandas as pd 
import numpy as np
import seaborn as sns
import warnings

# Hi-resolution Plots and Matplotlib inline
%config InlineBackend.figure_format = 'retina'
%matplotlib inline

# Set the maximum number of rows and columns to be displayed
pd.options.display.max_rows = 1000
pd.options.display.max_columns = 1000
warnings.filterwarnings('ignore')

# "magic commands" to enable autoreload of your imported packages
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Params

In [10]:
RAW_DATA = '~/code/janduplessis883/data-showup/data/raw-data/'
WEATHER_DATA = '~/code/janduplessis883/data-showup/data/weather/weather.csv'
IMD_DATA = '~/code/janduplessis883/data-showup/data/imd-master/imd_master.csv'

OUTPUT_DATA = '~/code/janduplessis883/data-showup/data/output-data/'

In [11]:
import math

def haversine_distance(surgery_prefix, lat2, lon2):
    R = 6371.0  # Radius of the Earth in kilometers

    if surgery_prefix == 'ECS':
        lat1, lon1 = 51.488721, -0.191873
    elif surgery_prefix == 'SMW':
        lat1, lon1 = 51.494474, -0.181931
    elif surgery_prefix == 'TCP':
        lat1, lon1 = 51.48459, -0.171887
    elif surgery_prefix == 'HPVM':
        lat1, lon1 = 51.48459, -0.171887
    elif surgery_prefix == 'KMC':
        lat1, lon1 = 51.49807, -0.159918
    elif surgery_prefix == 'TGP':
        lat1, lon1 = 51.482652, -0.178066


    # Convert degrees to radians
    lat1_rad = math.radians(lat1)
    lon1_rad = math.radians(lon1)
    lat2_rad = math.radians(lat2)
    lon2_rad = math.radians(lon2)

    # Differences
    dlat = lat2_rad - lat1_rad
    dlon = lon2_rad - lon1_rad

    # Haversine formula
    a = math.sin(dlat / 2)**2 + math.cos(lat1_rad) * math.cos(lat2_rad) * math.sin(dlon / 2)**2
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))

    distance = R * c
    return distance  # in kilometers

In [15]:
def make_global_disease_register(surgery_list = ['ECS', 'TCP', 'TGP', 'SMW', 'KMC', 'HPVM']):
    print('=== Preparing Global Disease Register + IMD2023 info ======================================')

    disease_register = []
    for surgery in surgery_list:
        register_path = f'{RAW_DATA}{surgery}/{surgery}'

        idnhs = pd.read_excel(f'{register_path}_NHS_PTID.xlsx', dtype='str')
        idnhs.dropna(inplace=True)
        frail = pd.read_csv(f'{register_path}_FRAILTY.csv', dtype='str')
        dep = pd.read_csv(f'{register_path}_DEPRESSION.csv', dtype='str')
        obesity = pd.read_csv(f'{register_path}_OBESITY.csv', dtype='str')
        chd = pd.read_csv(f'{register_path}_IHD.csv', dtype='str')
        dm = pd.read_csv(f'{register_path}_DM.csv', dtype='str')
        hpt = pd.read_csv(f'{register_path}_HPT.csv', dtype='str')
        ndhg = pd.read_csv(f'{register_path}_NDHG.csv', dtype='str')
        smi = pd.read_csv(f'{register_path}_SMI.csv', dtype='str')

        ptid = idnhs.merge(frail, how='left', on='NHS number')
        ptid = ptid.drop(columns='NHS number')

        register = (ptid.merge(dep, how='left', on='Patient ID')
                    .merge(obesity, how='left', on='Patient ID')
                    .merge(chd, how='left', on='Patient ID')
                    .merge(dm, how='left', on='Patient ID')
                    .merge(hpt, how='left', on='Patient ID')
                    .merge(ndhg, how='left', on='Patient ID')
                    .merge(smi, how='left', on='Patient ID')
                    .fillna(0)
                    )
        print(f'💊 {surgery} Disease Register completed')
        # Add IMD and distance from station
        imd = pd.read_csv(IMD_DATA)

        full_register = register.merge(imd, how='left', on='Postcode')
        print(f'🔸 {surgery} IMD2023')
        full_register['distance_from_surg'] = full_register.apply(lambda row: haversine_distance(surgery, row['Latitude'], row['Longitude']), axis=1)
        disease_register.append(full_register)

    global_register = pd.concat(disease_register, axis=0, ignore_index=True)
    print(f"🦠 Concat Registers into ONE REGISTER")
    
    global_register.dropna(inplace=True)
    print(f'❌ Dropped NaN')
    
    output_path = f'{OUTPUT_DATA}global_disease_register.csv'
    global_register.to_csv(output_path, index=False)
    print(f'✅ Global Disease Register Saved to output-data: {global_register.shape}')
    print()
    return global_register

In [16]:
disease_register = make_global_disease_register()

💊 ECS Disease Register completed
🔸 ECS IMD2023
💊 TCP Disease Register completed
🔸 TCP IMD2023
💊 TGP Disease Register completed
🔸 TGP IMD2023
💊 SMW Disease Register completed
🔸 SMW IMD2023
💊 KMC Disease Register completed
🔸 KMC IMD2023
💊 HPVM Disease Register completed
🔸 HPVM IMD2023
🦠 Concat Registers into ONE REGISTER
❌ Dropped NaN
✅ Global Disease Register Saved to output-data: (86301, 20)



### Make global desease register with NHS number

In [18]:
def make_global_disease_register_with_NHS(surgery_list = ['ECS', 'TCP', 'TGP', 'SMW', 'KMC', 'HPVM']):
    print('=== Preparing Global Disease Register + IMD2023 info ======================================')

    disease_register = []
    for surgery in surgery_list:
        register_path = f'{RAW_DATA}{surgery}/{surgery}'

        idnhs = pd.read_excel(f'{register_path}_NHS_PTID.xlsx', dtype='str')
        idnhs.dropna(inplace=True)
        frail = pd.read_csv(f'{register_path}_FRAILTY.csv', dtype='str')
        dep = pd.read_csv(f'{register_path}_DEPRESSION.csv', dtype='str')
        obesity = pd.read_csv(f'{register_path}_OBESITY.csv', dtype='str')
        chd = pd.read_csv(f'{register_path}_IHD.csv', dtype='str')
        dm = pd.read_csv(f'{register_path}_DM.csv', dtype='str')
        hpt = pd.read_csv(f'{register_path}_HPT.csv', dtype='str')
        ndhg = pd.read_csv(f'{register_path}_NDHG.csv', dtype='str')
        smi = pd.read_csv(f'{register_path}_SMI.csv', dtype='str')

        ptid = idnhs.merge(frail, how='left', on='NHS number')
#         ptid = ptid.drop(columns='NHS number')

        register = (ptid.merge(dep, how='left', on='Patient ID')
                    .merge(obesity, how='left', on='Patient ID')
                    .merge(chd, how='left', on='Patient ID')
                    .merge(dm, how='left', on='Patient ID')
                    .merge(hpt, how='left', on='Patient ID')
                    .merge(ndhg, how='left', on='Patient ID')
                    .merge(smi, how='left', on='Patient ID')
                    .fillna(0)
                    )
        print(f'💊 {surgery} Disease Register completed')
        # Add IMD and distance from station
        imd = pd.read_csv(IMD_DATA)

        full_register = register.merge(imd, how='left', on='Postcode')
        print(f'🔸 {surgery} IMD2023')
        full_register['distance_from_surg'] = full_register.apply(lambda row: haversine_distance(surgery, row['Latitude'], row['Longitude']), axis=1)
        disease_register.append(full_register)

    global_register = pd.concat(disease_register, axis=0, ignore_index=True)
    print(f"🦠 Concat Registers into ONE REGISTER")
    
    global_register.dropna(inplace=True)
    print(f'❌ Dropped NaN')
    
    output_path = f'{OUTPUT_DATA}global_disease_register_with_NHS.csv'
    global_register.to_csv(output_path, index=False)
    print(f'✅ Global Disease Register Saved to output-data: {global_register.shape}')
    print()
    return global_register

In [19]:
global_disease_NHS = make_global_disease_register_with_NHS()

💊 ECS Disease Register completed
🔸 ECS IMD2023
💊 TCP Disease Register completed
🔸 TCP IMD2023
💊 TGP Disease Register completed
🔸 TGP IMD2023
💊 SMW Disease Register completed
🔸 SMW IMD2023
💊 KMC Disease Register completed
🔸 KMC IMD2023
💊 HPVM Disease Register completed
🔸 HPVM IMD2023
🦠 Concat Registers into ONE REGISTER
❌ Dropped NaN
✅ Global Disease Register Saved to output-data: (86301, 21)



In [20]:
global_disease_NHS

Unnamed: 0,NHS number,Patient ID,Age in years,Postcode,Sex,Registration date,Ethnicity category,Language,FRAILTY,DEPRESSION,OBESITY,IHD,DM,HPT,NDHG,SMI,Latitude,Longitude,IMD2023,dist_to_station,distance_from_surg
0,?384557,17412852,58,W8 6QL,Male,1996-10-08 00:00:00,African,(XaJOr) Main spoken language Amharic,0,0,0,0,0,0,0,0,51.494874,-0.196148,15885.0,0.410963,0.745456
1,?491533,17407925,31,SW5 9LD,Female,1999-06-28 00:00:00,British or Mixed British,(XaG5t) Main spoken language English,0,0,0,0,0,0,0,0,51.487913,-0.194968,7626.0,0.076006,0.232365
2,?493228,17407922,50,SW5 9LA,Male,2005-01-28 00:00:00,Other White,(XaG5u) Main spoken language French,0,0,0,0,0,0,0,0,51.488687,-0.194981,7626.0,0.155802,0.215224
3,?510243,17407912,48,SW5 0NF,Female,1999-12-03 00:00:00,Other White,(XaG5u) Main spoken language French,0,0,0,0,0,0,0,0,51.493928,-0.186736,10320.0,0.229803,0.679501
4,?521958,19579898,55,SW5 0TN,Female,2000-03-29 00:00:00,Other White,(XaJD5) Main spoken language Italian,0,0,0,0,0,0,0,0,51.494199,-0.191479,11573.0,0.371809,0.609736
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
87331,7289044835,62118862,0,W8 4RU,Female,2022-09-13 00:00:00,British or Mixed British,(XaG5t) Main spoken language English,0,0,0,0,0,0,0,0,51.508110,-0.193767,22084.0,0.204853,3.022256
87332,7289267796,62873973,25,SW10 9EX,Female,2023-02-20 00:00:00,Other White,(XaG5t) Main spoken language English,0,0,0,0,0,0,0,0,51.484674,-0.185533,24135.0,0.744862,0.944947
87333,7290369939,62648273,0,SW10 9AW,Female,2022-09-12 00:00:00,British or Mixed British,(XaG5t) Main spoken language English,0,0,0,0,0,0,0,0,51.487373,-0.190813,14414.0,0.327030,1.346511
87334,7296152354,62138222,1,SW6 1FY,Female,2022-10-20 00:00:00,Other White,(XaG64) Main spoken language Russian,0,0,0,0,0,0,0,0,51.486260,-0.194967,11772.0,0.098873,1.608872
