# Primary Health Care Accessibility in Ontario

In [1]:
#importing all necessary libraries for data analysis and visualization

import pandas as pd 
import numpy as np 
import seaborn as sns 
import matplotlib.pyplot as plt
import geopandas

## 1. Data Inspection and Cleaning 

In [4]:
#source data downloaded from:https://data.ontario.ca/dataset/family-health-team-fht-locations

# data text file loaded in pandas and inspected to identify significant attribute for our analysis 
url = 'Ministry_of_Health_Service_Provider_Locations.csv'
location_data = pd.read_csv(url, sep = ',',  encoding='latin-1')
location_data.info()
location_data.drop(['FRENCH_NAME', 'FRENCH_NAME_ALT', 'ENGLISH_NAME_ALT'], axis = 1)
location_data['SERVICE_TYPE'].unique()

#Service Type re-cateegorized to identify group of Primary Care Provider services:
location_data['NEW_SERVICE_TYPE'] = location_data['SERVICE_TYPE']
location_data.loc[location_data['SERVICE_TYPE'].str.contains('Family Health Team|Nurse Practitioner-Led Clinic|Indigenous Primary Health Care Organization|Integrated Community Health Services Centre|Nursing Station'), 'NEW_SERVICE_TYPE'] = 'Primary Care Provider'
location_data['NEW_SERVICE_TYPE'].unique()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11749 entries, 0 to 11748
Data columns (total 19 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   ï»¿X                        11749 non-null  float64
 1   Y                           11749 non-null  float64
 2   OGF_ID                      11749 non-null  int64  
 3   MOH_SERVICE_PROVIDER_IDENT  11749 non-null  object 
 4   SERVICE_TYPE                11749 non-null  object 
 5   SERVICE_TYPE_DETAIL         9976 non-null   object 
 6   ENGLISH_NAME                11749 non-null  object 
 7   FRENCH_NAME                 88 non-null     object 
 8   ENGLISH_NAME_ALT            916 non-null    object 
 9   FRENCH_NAME_ALT             4 non-null      object 
 10  ADDRESS_LINE_1              11749 non-null  object 
 11  ADDRESS_LINE_2              2813 non-null   object 
 12  ADDRESS_DESCRIPTOR          2173 non-null   object 
 13  COMMUNITY                   117

array(['Licensed Lab and Specimen Collection Centre Locations',
       'Licenced Retirement Home', 'Pharmacy', 'Long-Term Care Home',
       'Community Support Services', 'Primary Care Provider',
       'Mental Health and Addiction Organization',
       'HIV/AIDS and Hep C Programs', 'Seniors Active Living Centre',
       'Hospital', "Children's Treatment Centre", 'Midwifery Clinic',
       'Public Health Unit Office Locations', 'Community Health Centre'],
      dtype=object)

In [7]:
# Mainly : primary care provider density: count/km2 for regional can be calculated 
# Q1. Rural areas vs highly populated cities, type of primary care provider services offered 
# Q2. primary care provider: km2/population identification to understand the accessibility by the population around the clinics
# A. Is the primary care provider serving a population size that is an outlier from provincial standards measured by the distance from any other provider 
# B. Is the primary care provider, nearest provider to a population centre which will be considered inaccessable; i.e. greater 30 min commute time especially for small towns perhaps in northern ontario 


In [9]:
#source data : https://www150.statcan.gc.ca/t1/tbl1/en/tv.action?pid=9810001901
census_data = pd.read_csv('population count per fsc.csv', sep = ',', skiprows = 11, skipfooter = 8, names = ['forward sortation area', 'population_2021', 'Total private dwelling_2021', 'private dwelling by usual resident_2021'], engine =  'python')
census_data.info()
location_data.info()
location_data = location_data.rename(columns = {'ï»¿X' :'X'})


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1646 entries, 0 to 1645
Data columns (total 4 columns):
 #   Column                                   Non-Null Count  Dtype 
---  ------                                   --------------  ----- 
 0   forward sortation area                   1646 non-null   object
 1   population_2021                          1646 non-null   object
 2   Total private dwelling_2021              1646 non-null   object
 3   private dwelling by usual resident_2021  1646 non-null   object
dtypes: object(4)
memory usage: 51.6+ KB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11749 entries, 0 to 11748
Data columns (total 20 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   ï»¿X                        11749 non-null  float64
 1   Y                           11749 non-null  float64
 2   OGF_ID                      11749 non-null  int64  
 3   MOH_SERVICE_PROVIDER_IDENT  11749 non-nu

## 2. Data Preparation

In [26]:
#create FSC column and match the FSC values from population dataset to combined corresponding population that the clinics located in. 
location_data['FSC'] = location_data['POSTAL_CODE'].str[:3]
population_fsc = census_data.drop(['Total private dwelling_2021', 'private dwelling by usual resident_2021'], axis =1)
population_fsc.set_index('forward sortation area', inplace = True)
population_fsc_dict = population_fsc.to_dict()

location_data['matched_FSC'] = location_data['FSC'].apply(lambda x: population_fsc_dict['population_2021'].get(x))
location_data['population'] = location_data['matched_FSC']
location_data['population'] = location_data['population'].str.split(',').str.join('').apply(int)

#grouped_location_df = location_data.groupby('NEW_SERVICE_TYPE')
#grouped_location_df.info()

TypeError: int() argument must be a string, a bytes-like object or a real number, not 'NoneType'