## Optimizing a Healthcare Network for Improved Service Delivery

# data Gathering

In [102]:
import pandas as pd
data=pd.read_html('https://www.washington-demographics.com/zip_codes_by_population',header=0)
population_data=data[0]
population_data.head()

Unnamed: 0,Rank,Zip Code,Population
0,1,99301,78767
1,2,98052,67779
2,3,98012,64417
3,4,98682,58178
4,5,98208,57306


# Getting info about data

In [103]:
population_data.shape

(564, 3)

In [104]:
population_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 564 entries, 0 to 563
Data columns (total 3 columns):
Rank          564 non-null object
Zip Code      564 non-null object
Population    564 non-null object
dtypes: object(3)
memory usage: 13.3+ KB


**There is no any null values**

In [105]:
population_data.describe()

Unnamed: 0,Rank,Zip Code,Population
count,564,564,564
unique,564,564,564
top,159,98029,7248
freq,1,1,1


In [106]:
population_data.duplicated().sum()

0

## Data cleaning

In [107]:
population_data.tail()

Unnamed: 0,Rank,Zip Code,Population
559,560,98853,16
560,561,98852,15
561,562,99136,12
562,563,99034,1
563,United States Census Bureau. B01001 SEX BY AGE...,United States Census Bureau. B01001 SEX BY AGE...,United States Census Bureau. B01001 SEX BY AGE...


In [108]:
#removing row number 563 beacuse it is not no use it contains description
population_data=population_data.drop([563],axis=0)

In [109]:
population_data.head()

Unnamed: 0,Rank,Zip Code,Population
0,1,99301,78767
1,2,98052,67779
2,3,98012,64417
3,4,98682,58178
4,5,98208,57306


In [110]:
#changing data type of populatin into integer
population_data = population_data.astype({"Population": int})

In [111]:
population_data['Population'].dtypes


dtype('int32')

In [112]:
#drop rank coloumn it is not in our use
population_data=population_data.drop(['Rank'],axis=1)

In [113]:
population_data.head()

Unnamed: 0,Zip Code,Population
0,99301,78767
1,98052,67779
2,98012,64417
3,98682,58178
4,98208,57306


## Data manipulation(Finalizing data for desired result)

In [114]:
#extracting only those zipcode data which is mentioned in the document
df=population_data[population_data['Zip Code'].isin(['98007','98290','98065','98801','98104'])]
df

Unnamed: 0,Zip Code,Population
25,98801,43291
51,98290,35990
96,98007,29174
189,98065,15548
198,98104,14144


In [115]:
df.reset_index(drop=True)

Unnamed: 0,Zip Code,Population
0,98801,43291
1,98290,35990
2,98007,29174
3,98065,15548
4,98104,14144


In [116]:
facility_id=['A','B','C','D','E']
Zipcode=['98007','98290','98065','98801','98104']
staff=[21,52,43,9,64]
df_1=pd.DataFrame({'Facility ID':facility_id,
                         'Zip Code':Zipcode,
                          'Facility Staff Count':staff})
df_1

Unnamed: 0,Facility ID,Zip Code,Facility Staff Count
0,A,98007,21
1,B,98290,52
2,C,98065,43
3,D,98801,9
4,E,98104,64


In [117]:
#merging df and df_1
final_data=pd.merge(df_1,df,on='Zip Code')
final_data.head()

Unnamed: 0,Facility ID,Zip Code,Facility Staff Count,Population
0,A,98007,21,29174
1,B,98290,52,35990
2,C,98065,43,15548
3,D,98801,9,43291
4,E,98104,64,14144


In [118]:
# Set the order of above dataframe
final_data = final_data[['Facility ID', 'Zip Code', 'Population', 'Facility Staff Count']]
final_data

Unnamed: 0,Facility ID,Zip Code,Population,Facility Staff Count
0,A,98007,29174,21
1,B,98290,35990,52
2,C,98065,15548,43
3,D,98801,43291,9
4,E,98104,14144,64


# Optimisation Technique


In [119]:
#finding staff_population ratio
final_data['staff to population ratio']=final_data['Population']/final_data['Facility Staff Count']
final_data

Unnamed: 0,Facility ID,Zip Code,Population,Facility Staff Count,staff to population ratio
0,A,98007,29174,21,1389.238095
1,B,98290,35990,52,692.115385
2,C,98065,15548,43,361.581395
3,D,98801,43291,9,4810.111111
4,E,98104,14144,64,221.0


In [120]:
#converting staff to population ratio to int
final_data = final_data.astype({"staff to population ratio": int})
final_data

Unnamed: 0,Facility ID,Zip Code,Population,Facility Staff Count,staff to population ratio
0,A,98007,29174,21,1389
1,B,98290,35990,52,692
2,C,98065,15548,43,361
3,D,98801,43291,9,4810
4,E,98104,14144,64,221


In [121]:
final_data.head()

Unnamed: 0,Facility ID,Zip Code,Population,Facility Staff Count,staff to population ratio
0,A,98007,29174,21,1389
1,B,98290,35990,52,692
2,C,98065,15548,43,361
3,D,98801,43291,9,4810
4,E,98104,14144,64,221


**here we can see Facility D has less staff to populatio ratio so we need to shift worker towards D**

In [127]:
#finding population percent in each area
population_percent=[]
total_population=final_data['Population'].sum()
for i in final_data['Population']:
    p=(i/total_population)*100
    population_percent.append(p)
#assigning staff in each area which is equally distributed
assigned_staff=[]
total_staff=final_data['Facility Staff Count'].sum()
for i in final_data.index:
    s=(population_percent[i]*total_staff)/100
    assigned_staff.append(round(s))

In [129]:
print(population_percent)
print(assigned_staff)

[21.118084359414247, 26.051959144968766, 11.254677987940383, 31.33690923436629, 10.238369273310315]
[40.0, 49.0, 21.0, 59.0, 19.0]


In [134]:
#our result with updated staff
calculted_data=final_data.drop(['Facility Staff Count','staff to population ratio'],axis=1)
calculted_data['updated staff count']=assigned_staff


In [138]:
calculted_data.head()

Unnamed: 0,Facility ID,Zip Code,Population,updated staff count
0,A,98007,29174,40.0
1,B,98290,35990,49.0
2,C,98065,15548,21.0
3,D,98801,43291,59.0
4,E,98104,14144,19.0


In [136]:
# install geopy
! pip install geopy

Collecting geopy
  Downloading https://files.pythonhosted.org/packages/ab/97/25def417bf5db4cc6b89b47a56961b893d4ee4fec0c335f5b9476a8ff153/geopy-1.22.0-py2.py3-none-any.whl (113kB)
Collecting geographiclib<2,>=1.49 (from geopy)
  Downloading https://files.pythonhosted.org/packages/8b/62/26ec95a98ba64299163199e95ad1b0e34ad3f4e176e221c40245f211e425/geographiclib-1.50-py3-none-any.whl
Installing collected packages: geographiclib, geopy
Successfully installed geographiclib-1.50 geopy-1.22.0


In [140]:
from geopy.geocoders import Nominatim 
geolocator = Nominatim()
area = []
for i in calculted_data['Zip Code']:
    location = geolocator.geocode(i) 
    area.append(location.address)
    
area

calculted_data['Area'] = area
calculted_data

  


Unnamed: 0,Facility ID,Zip Code,Population,updated staff count,Area
0,A,98007,29174,40.0,"Bellevue, King County, Washington, 98007, Unit..."
1,B,98290,35990,49.0,"Snohomish County, Washington, 98290, United St..."
2,C,98065,15548,21.0,"King County, Washington, 98065, United States ..."
3,D,98801,43291,59.0,"Wenatchee, Chelan County, Washington, 98801, U..."
4,E,98104,14144,19.0,"International District/Chinatown, Seattle, Kin..."


## Summary

**So in this project, we modeled the number of staff members needed in each area**