## Data Science for Clustering Hospital based on Waiting Time and Distance

In [58]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from math import radians, sin, cos, sqrt, atan2

# Preprocessing
from sklearn.preprocessing import MinMaxScaler

from sklearn.neighbors import NearestNeighbors



In [59]:
# Import data
df = pd.read_csv('mockdata.csv')
df.head()

Unnamed: 0,name,long,lat,general_time,high_time,low_time,admitted_time
0,Toronto General Hospital,-75.0989,44.8255,3.33,4.42,2.9,24.44
1,Ottawa Civic Hospital,-79.0541,42.4423,2.72,4.1,2.48,20.12
2,Mississauga Trillium Health,-81.467,44.404,0.45,6.05,3.5,18.59
3,Hamilton Health Sciences,-83.3044,43.6272,1.99,3.86,2.59,15.13
4,Kingston General,-79.9998,45.3473,2.62,4.6,5.26,22.46


In [60]:
# User Input
long = -77
lat = 41
urgent = True
admitted = False
max_wait_time = 0
max_wait_time = 120


In [61]:
# Choosing correct waiting time column
if urgent and not admitted:
    wait_col = 'high_time'
elif not urgent and not admitted:
    wait_col = 'low_time'
elif admitted:
    wait_col = 'admitted_time'
else:
    wait_col = 'general_time'

df["wait_time"] = df[wait_col]

In [62]:
# Function to calculate Haversine distance
def haversine(lat1, lon1, lat2, lon2):
    R = 6371  # Earth radius in km
    dlat = radians(lat2 - lat1)
    dlon = radians(lon2 - lon1)
    a = sin(dlat/2)**2 + cos(radians(lat1)) * cos(radians(lat2)) * sin(dlon/2)**2
    c = 2 * atan2(sqrt(a), sqrt(1-a))
    return R * c


In [63]:
# Column for distance calculation
df["distance"] = np.sqrt((df["lat"] - lat)**2 + (df["long"] - long)**2)
# df["distance"] = df.apply(
#     lambda row: haversine(lat, long, row["lat"], row["long"]),
#     axis=1
# )

## Run KNN to find Top Best K Hospitals

In [64]:
scaler = MinMaxScaler()
df[["distance", "wait_time"]] = scaler.fit_transform(df[["distance", "wait_time"]])

In [65]:
user_point = scaler.transform([[0, max_wait_time]])  # 0 distance, baseline wait




In [66]:
knn = NearestNeighbors(n_neighbors=5)   # Top 5 hospitals
knn.fit(df[["distance", "wait_time"]])

distances, indices = knn.kneighbors(user_point)



In [67]:
best_hospitals = df.iloc[indices[0]]
print("Recommended Hospitals:")
print(best_hospitals[["name", wait_col, "distance"]])

Recommended Hospitals:
                                    name  high_time  distance
17  The Ottawa Hospital - General Campus       6.07  0.473618
2            Mississauga Trillium Health       6.05  0.627603
29       Humber River Hospital (Toronto)       5.67  0.198967
20                    North Bay Regional       5.59  0.548904
16             Lakeridge Health (Oshawa)       5.56  0.290330


In [68]:
df.distance

0     0.428430
1     0.167381
2     0.627603
3     0.807430
4     0.578070
5     0.577418
6     0.787559
7     0.334709
8     0.280811
9     0.476128
10    1.000000
11    0.488787
12    0.869900
13    0.375177
14    0.525708
15    0.376296
16    0.290330
17    0.473618
18    0.528577
19    0.431097
20    0.548904
21    0.120373
22    0.891660
23    0.325333
24    0.948444
25    0.000000
26    0.415403
27    0.672182
28    0.628790
29    0.198967
Name: distance, dtype: float64