In [72]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from showupforhealth.params import *
pd.set_option('display.max_columns',45)

from sklearn.preprocessing import StandardScaler


In [73]:
data = pd.read_csv('/Users/alessio/code/janduplessis883/data-showup/data/output-data/full_train_data.csv')

## Explore the 'Age' feature

### Should under 16 been condidered 'depending' from adults so excluded from the study?

In [55]:
data['Appointment_status'].value_counts(normalize=True)

In [56]:
for i in range(16,1,-3):
    age_filter = (data['Age'] < i) & (data['Appointment_status'] == 0)
    print(f'We have {data[age_filter].shape[0]} DNAs under the age of {i}')

In [57]:
len(data['Age'].value_counts())

In [58]:
fig = plt.figure(figsize=(8, 5))
sns.histplot(data=data, x='Age', bins=len(data['Age'].value_counts()));

In [59]:
data.head()

## Split training set in X and y

In [67]:
X = data.drop(columns='Appointment_status')
y = data['Appointment_status']

In [68]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3,random_state=42)

## Scale dataset

In [51]:
def standard_scaler(X_train, X_test):
    scaler = StandardScaler()
    scaler.fit(X_train)
    X_train_scaled = scaler.transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    return X_train_scaled, X_test_scaled

In [65]:
X_train_scaled, X_test_scaled = standard_scaler(X_train, X_test)

In [66]:
X_train_scaled

In [48]:
X_scaled_df = pd.DataFrame(X_train_scaled, columns=X.columns)
X_scaled_df

Unnamed: 0,temp,precipitation,Age,Sex,FRAILTY,DEPRESSION,OBESITY,IHD,DM,HPT,NDHG,SMI,IMD2023,dist_to_station,distance_to_surg,book_to_app_days,booked_by_clinician,registered_for_months,sin_week,cos_week,sin_Appointment_time,cos_Appointment_time,sin_month,cos_month,sin_day_of_week,cos_day_of_week,No_shows,Rota_ARRS,Rota_GP,Rota_HCA,Rota_Nurse,Ethnicity_Asian,Ethnicity_Black,Ethnicity_Mixed,Ethnicity_Other,Ethnicity_White
0,-1.978668,-0.268124,-0.389155,1.315082,-0.186677,1.984380,2.333295,-0.279561,2.821233,-0.638647,-0.412425,-0.248913,-1.410619,-0.662564,5.157992,-0.595881,-0.507051,-0.728277,0.696999,1.286306,1.201030,0.012694,0.754270,1.255306,0.842475,0.870146,0.204979,-0.278102,0.569711,-0.220398,-0.381282,-0.341907,-0.291362,-0.247856,-0.371365,0.753240
1,0.296496,-0.268124,0.979427,1.315082,0.627970,-0.503936,-0.428579,-0.279561,-0.354455,1.565809,-0.412425,-0.248913,0.660841,-0.877874,-0.584725,-0.012406,-0.507051,0.882431,1.441773,-0.141902,-0.995787,0.012694,1.267959,-0.694763,-1.500851,-1.083471,-0.468087,-0.278102,0.569711,-0.220398,-0.381282,-0.341907,-0.291362,-0.247856,-0.371365,0.753240
2,-1.225511,-0.268124,0.705711,1.315082,1.442618,1.984380,-0.428579,-0.279561,2.821233,1.565809,-0.412425,-0.248913,0.837988,-0.508888,-0.293876,-0.595881,-0.507051,0.345529,0.541667,1.356666,-0.995787,0.012694,0.754270,1.255306,-1.500851,-1.083471,2.762628,-0.278102,0.569711,-0.220398,-0.381282,-0.341907,-0.291362,-0.247856,-0.371365,0.753240
3,-0.456662,-0.268124,-0.526013,-0.760400,-1.001324,-0.503936,-0.428579,-0.279561,-0.354455,-0.638647,-0.412425,-0.248913,0.908483,0.707394,-0.323784,1.008675,-0.507051,-0.370341,1.290482,0.688991,-1.242648,0.762743,1.267959,0.732787,-0.664528,1.352649,-0.198861,-0.278102,-1.755277,-0.220398,2.622729,-0.341907,-0.291362,-0.247856,-0.371365,0.753240
4,0.955509,1.630823,0.660091,-0.760400,1.294500,1.984380,2.333295,3.577037,-0.354455,1.565809,-0.412425,-0.248913,-1.525925,-0.997591,1.402174,-0.450013,-0.507051,-0.131718,-1.119433,-0.777257,0.879314,-0.562838,-1.162841,-0.694763,0.171795,-1.083471,-0.198861,-0.278102,0.569711,-0.220398,-0.381282,-0.341907,-0.291362,-0.247856,2.692768,-1.327598
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
631056,-1.978668,-0.268124,-1.073446,1.315082,-1.001324,-0.503936,-0.428579,-0.279561,-0.354455,-0.638647,-0.412425,-0.248913,0.806997,-0.306767,0.183993,0.571069,-0.507051,-0.608965,0.976860,1.091885,-1.450762,2.573521,1.267959,0.732787,0.842475,0.870146,-0.468087,-0.278102,0.569711,-0.220398,-0.381282,-0.341907,-0.291362,-0.247856,-0.371365,0.753240
631057,-1.319655,-0.268124,-1.301543,-0.760400,-0.186677,-0.503936,-0.428579,-0.279561,-0.354455,-0.638647,-0.412425,-0.248913,1.185162,-0.416718,-0.312437,-0.595881,-0.507051,-1.086212,0.842772,1.197614,1.201030,0.012694,1.267959,0.732787,1.214673,-0.214030,-0.468087,-0.278102,0.569711,-0.220398,-0.381282,-0.341907,-0.291362,-0.247856,-0.371365,0.753240
631058,0.939818,-0.268124,-0.297916,1.315082,-0.779147,-0.503936,-0.428579,-0.279561,-0.354455,-0.638647,-0.412425,-0.248913,0.037825,1.780632,-0.562034,-0.595881,1.972187,-0.012406,0.541667,-1.298218,-0.995787,0.012694,0.052559,-1.408538,0.171795,-1.083471,-0.468087,3.595801,-1.755277,-0.220398,-0.381282,-0.341907,-0.291362,-0.247856,-0.371365,0.753240
631059,-0.174228,2.263806,-0.024200,1.315082,-1.001324,-0.503936,-0.428579,-0.279561,-0.354455,-0.638647,-0.412425,-0.248913,0.857671,0.245607,-0.427332,-0.595881,-0.507051,-0.787932,0.379041,-1.349222,-0.995787,0.012694,0.052559,-1.408538,-0.664528,1.352649,-0.198861,-0.278102,0.569711,-0.220398,-0.381282,-0.341907,-0.291362,-0.247856,-0.371365,0.753240
