In [1]:
import pandas as pd
from sqlalchemy import create_engine

In [2]:
#reading csv file into dataframe for hospitals
csv_file = 'Resources/Hospitals.csv'
hospital_data_df = pd.read_csv(csv_file)
hospital_data_df.head()

Unnamed: 0,X,Y,OBJECTID,ID,NAME,ADDRESS,CITY,STATE,ZIP,ZIP4,...,VAL_DATE,WEBSITE,STATE_ID,ALT_NAME,ST_FIPS,OWNER,TTL_STAFF,BEDS,TRAUMA,HELIPAD
0,-94.945477,29.74762,8497,76777520,HOUSTON METHODIST SAN JACINTO HOSPITAL ALEXAND...,1700 JAMES BOWIE DRIVE,BAYTOWN,TX,77520,NOT AVAILABLE,...,2017-12-18T00:00:00.000Z,http://www.houstonmethodist.org/locations/san-...,NOT AVAILABLE,NOT AVAILABLE,48.0,NON-PROFIT,-999.0,182.0,NOT AVAILABLE,Y
1,-82.881843,40.027143,8498,129043230,"WOODS AT PARKSIDE,THE",349 OLDE RIDENOUR ROAD,COLUMBUS,OH,43230,NOT AVAILABLE,...,2018-04-26T00:00:00.000Z,http://www.thewoodsatparkside.com/,1815,NOT AVAILABLE,39.0,PROPRIETARY,-999.0,50.0,NOT AVAILABLE,NOT AVAILABLE
2,-84.168027,39.774242,8499,130045404,DAYTON CHILDREN'S HOSPITAL,ONE CHILDRENS PLAZA,DAYTON,OH,45404,NOT AVAILABLE,...,2018-04-26T00:00:00.000Z,http://www.childrensdayton.org/cms/home/index....,1411,NOT AVAILABLE,39.0,NON-PROFIT,-999.0,155.0,PEDIATRIC LEVEL II,Y
3,-80.632972,41.005169,8500,128844512,VIBRA HOSPITAL OF MAHONING VALLEY,8049 SOUTH AVENUE,BOARDMAN,OH,44512,NOT AVAILABLE,...,2018-04-26T00:00:00.000Z,http://www.mahoningvalleyhospital.com/,1428,MAHONING VALLEY HOSPITAL BOARDMAN CAMPUS,39.0,PROPRIETARY,-999.0,45.0,NOT AVAILABLE,NOT AVAILABLE
4,-84.199398,39.74774,8501,129845417,HAVEN BEHAVIORAL SENIOR CARE OF DAYTON,"ONE ELIZABETH PLACE,E3 SUITE A",DAYTON,OH,45417,NOT AVAILABLE,...,2018-04-26T00:00:00.000Z,https://dayton.havenbehavioral.com/,1506,NOT AVAILABLE,39.0,PROPRIETARY,-999.0,32.0,NOT AVAILABLE,NOT AVAILABLE


In [3]:
#reading csv file into dataframe for income
csv_file = 'Resources/income.csv'
income_data_df = pd.read_csv(csv_file)
income_data_df.head()

Unnamed: 0,id,State_Code,State_Name,State_ab,County,City,Place,Type,Primary,Zip_Code,Area_Code,ALand,AWater,Lat,Lon,Mean,Median,Stdev,sum_w
0,1011000,1,Alabama,AL,Mobile County,Chickasaw,Chickasaw city,City,place,36611,251,10894952,909156,30.77145,-88.079697,38773,30506,33101,1638.260513
1,1011010,1,Alabama,AL,Barbour County,Louisville,Clio city,City,place,36048,334,26070325,23254,31.708516,-85.611039,37725,19528,43789,258.017685
2,1011020,1,Alabama,AL,Shelby County,Columbiana,Columbiana city,City,place,35051,205,44835274,261034,33.191452,-86.615618,54606,31930,57348,926.031
3,1011030,1,Alabama,AL,Mobile County,Satsuma,Creola city,City,place,36572,251,36878729,2374530,30.874343,-88.009442,63919,52814,47707,378.114619
4,1011040,1,Alabama,AL,Mobile County,Dauphin Island,Dauphin Island,Town,place,36528,251,16204185,413605152,30.250913,-88.171268,77948,67225,54270,282.320328


In [4]:
dup_hospital_data = hospital_data_df.duplicated()
dup_hospital_data

0       False
1       False
2       False
3       False
4       False
        ...  
7565    False
7566    False
7567    False
7568    False
7569    False
Length: 7570, dtype: bool

In [6]:
dup_income_data = income_data_df.duplicated()
dup_income_data

0        False
1        False
2        False
3        False
4        False
         ...  
32521    False
32522    False
32523    False
32524    False
32525    False
Length: 32526, dtype: bool

In [7]:
#creating sub df for just the columns we want
hospital_cols = ["ID", "ZIP", "STATE", "CITY", "POPULATION", "BEDS", "NAME", "OWNER", "STATUS", "TYPE"]
hospital_transformed = hospital_data_df[hospital_cols].copy()


income_cols = ["id", "Zip_Code", "Mean", "Median", "Stdev", "sum_w"]
income_transformed = income_data_df[income_cols].copy()

In [8]:
#rename the column headers
hospital_transformed = hospital_transformed.rename(columns= {"ID":"Hospital_id", 
                                                             "ZIP":"Zip", 
                                                             "STATE":"Us_State", 
                                                             "CITY":"City", 
                                                             "POPULATION":"Pop_100k", 
                                                             "BEDS":"Beds", 
                                                             "NAME":"Hospital_Name", 
                                                             "OWNER":"Type_Owner", 
                                                             "STATUS":"Status", 
                                                             "TYPE":"Care"})
income_transformed = income_transformed.rename(columns= {"Zip_Code":"Zip",
                                                         "sum_w":"Households"})

In [9]:
#creating connection to sql 
conn_str = 'postgres:postgres@localhost:5432/Income_vs_healthcare'
engine = create_engine(f'postgresql://{conn_str}')

In [10]:
#check connection
engine.table_names()

  engine.table_names()


['healthcare', 'income']

In [13]:
#loading cleaned data into sql db healthcare table
hospital_transformed.to_sql(name='healthcare',con=engine,if_exists='append',index=False)

In [14]:
#loading cleaned data into sql db income table
income_transformed.to_sql(name='income',con=engine,if_exists='append',index=False)

In [15]:
#check hospital data
pd.read_sql_query('select * from healthcare', con=engine).head()

Unnamed: 0,Hospital_id,Zip,Us_State,City,Pop_100k,Beds,Hospital_Name,Type_Owner,Status,Care
0,76777520,77520,TX,BAYTOWN,182,182.0,HOUSTON METHODIST SAN JACINTO HOSPITAL ALEXAND...,NON-PROFIT,OPEN,GENERAL ACUTE CARE
1,129043230,43230,OH,COLUMBUS,50,50.0,"WOODS AT PARKSIDE,THE",PROPRIETARY,OPEN,SPECIAL
2,130045404,45404,OH,DAYTON,155,155.0,DAYTON CHILDREN'S HOSPITAL,NON-PROFIT,OPEN,CHILDREN
3,128844512,44512,OH,BOARDMAN,45,45.0,VIBRA HOSPITAL OF MAHONING VALLEY,PROPRIETARY,OPEN,LONG TERM CARE
4,129845417,45417,OH,DAYTON,32,32.0,HAVEN BEHAVIORAL SENIOR CARE OF DAYTON,PROPRIETARY,OPEN,PSYCHIATRIC


In [16]:
#check income data
pd.read_sql_query('select * from income', con=engine).head()

Unnamed: 0,id,Zip,Mean,Median,Households,Stdev
0,1011000,36611,38773,30506,1638,33101
1,1011010,36048,37725,19528,258,43789
2,1011020,35051,54606,31930,926,57348
3,1011030,36572,63919,52814,378,47707
4,1011040,36528,77948,67225,282,54270
