In [1]:
#importing dependencies
import pandas as pd
import numpy as np
import gmaps
import requests
import json

#google developer API key
from config import gkey


In [2]:
#Reading in CSV files
#Hospital data was not encoded in UTF-8, recommended we try windows 1252 encoding
raw_ratings_df = pd.read_csv("../Resources/HospitalGeneralInformation.csv", encoding = 'cp1252')
raw_income_df = pd.read_csv("../Resources/Income_data_with_overlays.csv")


In [3]:

#*************CLEANING UP INCOME DATA ONE ACTION
#Creating a list to hold the desired column names
newColumnNames = []

#Loop through columns adding the first row item to the desired column names list
for column in raw_income_df.columns:
    name = raw_income_df.loc[0,column]
    newColumnNames.append(name)

#Changing column names to the correct names
raw_income_df.columns = newColumnNames

#Removing no longer needed first data row
raw_income_df.drop([0], inplace=True)

#List of the rows we want to analyze
columns_to_keep = ['Geographic Area Name',
                   'Estimate!!Households!!Total',
                   'Estimate!!Households!!Total!!Less than $10,000',
                   'Estimate!!Households!!Total!!$10,000 to $14,999',
                   'Estimate!!Households!!Total!!$15,000 to $24,999',
                   'Estimate!!Households!!Total!!$25,000 to $34,999',
                   'Estimate!!Households!!Total!!$35,000 to $49,999',
                   'Estimate!!Households!!Total!!$50,000 to $74,999',
                   'Estimate!!Households!!Total!!$75,000 to $99,999',
                   'Estimate!!Households!!Total!!$100,000 to $149,999',
                   'Estimate!!Households!!Total!!$150,000 to $199,999',
                   'Estimate!!Households!!Total!!$200,000 or more',
                   'Estimate!!Households!!Median income (dollars)',
                   'Estimate!!Households!!Mean income (dollars)'
                  ]

#Creating a new dataframe that keeps only the columns we want to analyze
income_df = raw_income_df.loc[:,columns_to_keep]

#Clean up column names now that we have the correct columns
#List of new names to be used
clean_names = ['State',
               'Total Households',
               '<$10,000(%)',
               '$10,000 to $14,999(%)',
               '$15,000 to $24,999(%)',
               '$25,000 to $34,999(%)',
               '$35,000 to $49,999(%)',
               '$50,000 to $74,999(%)',
               '$75,000 to $99,999(%)',
               '$100,000 to $149,999(%)',
               '$150,000 to $199,999(%)',
               '>$200,000(%)',
               'Median Income(dollars)',
               'Mean Income(dollars)'
              ]
#Change the badly formatted Column names to the list of clean names
income_df.columns = clean_names
income_df['Median Income(dollars)'] = income_df['Median Income(dollars)'].astype(float)

#Show Final clean income DF
#income_df.head()


In [4]:
#****************CLEANING UP RATINGS DATA
#create new DF to remove non-important columns

#Listing the columns to keep for easy editability
columns_to_keep = ['Hospital Name',
                    'Address',
                    'City',
                    'State',
                    'ZIP Code',
                    'County Name',
                    'Hospital Type',
                    'Hospital Ownership',
                    'Hospital overall rating',
                    'Mortality national comparison',
                    'Safety of care national comparison',
                    'Readmission national comparison',
                    'Patient experience national comparison',
                    'Effectiveness of care national comparison',
                    'Timeliness of care national comparison',
                    'Efficient use of medical imaging national comparison'
                   ]
#Creating new DF(ratings_df) with only columns of interest
ratings_df = raw_ratings_df.loc[:,columns_to_keep]

#Removing Rows where overall rating is "not available"
ratings_df = ratings_df[ratings_df['Hospital overall rating'] != "Not Available"]

#converting the overall ratings column to numeric type
ratings_df['Hospital overall rating'] = ratings_df['Hospital overall rating'].astype(int)


#TESTING TO SEE NEW DF
ratings_df.head()

Unnamed: 0,Hospital Name,Address,City,State,ZIP Code,County Name,Hospital Type,Hospital Ownership,Hospital overall rating,Mortality national comparison,Safety of care national comparison,Readmission national comparison,Patient experience national comparison,Effectiveness of care national comparison,Timeliness of care national comparison,Efficient use of medical imaging national comparison
0,SOUTHEAST ALABAMA MEDICAL CENTER,1108 ROSS CLARK CIRCLE,DOTHAN,AL,36301,HOUSTON,Acute Care Hospitals,Government - Hospital District or Authority,3,Same as the national average,Above the national average,Same as the national average,Below the national average,Same as the national average,Same as the national average,Same as the national average
1,MARSHALL MEDICAL CENTER SOUTH,2505 U S HIGHWAY 431 NORTH,BOAZ,AL,35957,MARSHALL,Acute Care Hospitals,Government - Hospital District or Authority,3,Below the national average,Same as the national average,Above the national average,Same as the national average,Same as the national average,Above the national average,Below the national average
2,ELIZA COFFEE MEMORIAL HOSPITAL,205 MARENGO STREET,FLORENCE,AL,35631,LAUDERDALE,Acute Care Hospitals,Government - Hospital District or Authority,2,Below the national average,Same as the national average,Same as the national average,Below the national average,Same as the national average,Above the national average,Same as the national average
3,MIZELL MEMORIAL HOSPITAL,702 N MAIN ST,OPP,AL,36467,COVINGTON,Acute Care Hospitals,Voluntary non-profit - Private,2,Same as the national average,Not Available,Below the national average,Same as the national average,Below the national average,Above the national average,Not Available
4,CRENSHAW COMMUNITY HOSPITAL,101 HOSPITAL CIRCLE,LUVERNE,AL,36049,CRENSHAW,Acute Care Hospitals,Proprietary,3,Same as the national average,Not Available,Same as the national average,Not Available,Same as the national average,Above the national average,Not Available


In [5]:
#*/*/*/*/HERE WE HAVE CLEANED UP DATAFRAMES FOR OUR ANALYSIS


In [6]:
#MAPPING OUT US HOSPITALS W/ HIGHTEST RATINGS

In [7]:
#Create a Dataframe with hospitals rated above X overall rating


top_hospital_df = ratings_df.loc[ratings_df['Hospital overall rating'] > 4]

#create a way to hold longitude and latitude
#hospital_df["Lat"] = ''
#hospital_df["Lng"] = ''
Latitudes = []
Longitudes = []
 
#hospital_df.head()

In [8]:
#Create necessary components for api call
base_url = "https://maps.googleapis.com/api/geocode/json"
params = {"key": gkey}
    

In [9]:
#Loop through rows of the DF to pull Lat and Long from google api and add to DF
#FOR HOSPITAL DF for index, row in hospital_df.iterrows():
for index, row in top_hospital_df.iterrows():
    base_url = "https://maps.googleapis.com/maps/api/geocode/json?"

    
    #Pull Address from the dataframe and add to params for api call
    params['address'] = f"{row['Address']},{row['City']},{row['State']}"
    
    #Make request to google api and convert to json
    hospital_geodata = requests.get(base_url, params=params).json()
    
    #Add Longitude and Latitude to Dataframe
   # hospital_df.loc[index,"Lat"] = hospital_geodata["results"][0]["geometry"]["location"]["lat"]
    #hospital_df.loc[index,"Lng"] = hospital_geodata["results"][0]["geometry"]["location"]["lng"]
    #List instead
    Latitudes.append(hospital_geodata["results"][0]["geometry"]["location"]["lat"])
    Longitudes.append(hospital_geodata["results"][0]["geometry"]["location"]["lng"])
    
top_hospital_df['Lat']=Latitudes
top_hospital_df['Lng']=Longitudes
top_hospital_df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_hospital_df['Lat']=Latitudes
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_hospital_df['Lng']=Longitudes


Unnamed: 0,Hospital Name,Address,City,State,ZIP Code,County Name,Hospital Type,Hospital Ownership,Hospital overall rating,Mortality national comparison,Safety of care national comparison,Readmission national comparison,Patient experience national comparison,Effectiveness of care national comparison,Timeliness of care national comparison,Efficient use of medical imaging national comparison,Lat,Lng
23,MARION REGIONAL MEDICAL CENTER,1256 MILITARY STREET SOUTH,HAMILTON,AL,35570,MARION,Acute Care Hospitals,Voluntary non-profit - Private,5,Same as the national average,Not Available,Same as the national average,Above the national average,Same as the national average,Above the national average,Not Available,34.126799,-87.991465
65,LAKELAND COMMUNITY HOSPITAL,42024 HIGHWAY 195 E,HALEYVILLE,AL,35565,WINSTON,Acute Care Hospitals,Voluntary non-profit - Church,5,Above the national average,Not Available,Above the national average,Same as the national average,Same as the national average,Above the national average,Same as the national average,34.241956,-87.591402
76,BAPTIST MEDICAL CENTER EAST,400 TAYLOR ROAD,MONTGOMERY,AL,36117,MONTGOMERY,Acute Care Hospitals,Government - Hospital District or Authority,5,Same as the national average,Above the national average,Above the national average,Above the national average,Same as the national average,Below the national average,Same as the national average,32.376024,-86.174858
145,MAYO CLINIC HOSPITAL,5777 EAST MAYO BOULEVARD,PHOENIX,AZ,85054,MARICOPA,Acute Care Hospitals,Voluntary non-profit - Private,5,Above the national average,Above the national average,Above the national average,Above the national average,Same as the national average,Same as the national average,Same as the national average,33.659052,-111.95645
146,BANNER HEART HOSPITAL,6750 EAST BAYWOOD AVENUE,MESA,AZ,85206,MARICOPA,Acute Care Hospitals,Voluntary non-profit - Private,5,Above the national average,Above the national average,Above the national average,Above the national average,Not Available,Not Available,Not Available,33.411242,-111.688921


In [10]:
#GMAPS needs a list of tuples for the marker layer

#creating a list to hold hospital coordinates
hospital_coordinates = []
#creating a list to hold string for marker descriptions
hospital_info = []

#looping through the rows of the df to create a tuple using the lat and long columns
#and add them to the list above
for index, row in top_hospital_df.iterrows():
    hospital_coordinates.append((row["Lat"],row["Lng"]))
    #ATTEMPT at html to make a Description list for hospital information. 
    #triple quote to allow a string to span multiple lines instead of one giant long line
    hospital_info.append(f"""
                        <dl>
                        <dt>Hospital:</dt><dd>{row['Hospital Name']}</dd>
                        <dt>Rating:</dt><dd>{row['Hospital overall rating']}</dd>
                        </dl>""")

In [11]:
#Creating a gmaps marker layer using hospital coordinates list and hospital info list
marker_layer = gmaps.marker_layer(hospital_coordinates,info_box_content=hospital_info)

#Creating a gmaps figure base layer map
fig=gmaps.figure()

#adding the marker layer to the figure base layer map
fig.add_layer(marker_layer)

#show the map
fig

Figure(layout=FigureLayout(height='420px'))

In [12]:
#MAPING OUT CA HOSPITALS W/ 4+ RATING

california_df = ratings_df.loc[ratings_df['State']=="CA"]
california_df = california_df[california_df['Hospital overall rating']>3]

Latitudes = []
Longitudes = []

In [13]:
for index, row in california_df.iterrows():
    base_url = "https://maps.googleapis.com/maps/api/geocode/json?"

    
    #Pull Address from the dataframe and add to params for api call
    params['address'] = f"{row['Address']},{row['City']},{row['State']}"
    
    #Make request to google api and convert to json
    hospital_geodata = requests.get(base_url, params=params).json()
    
    #Add Longitude and Latitude to Dataframe
   # hospital_df.loc[index,"Lat"] = hospital_geodata["results"][0]["geometry"]["location"]["lat"]
    #hospital_df.loc[index,"Lng"] = hospital_geodata["results"][0]["geometry"]["location"]["lng"]
    #List instead
    Latitudes.append(hospital_geodata["results"][0]["geometry"]["location"]["lat"])
    Longitudes.append(hospital_geodata["results"][0]["geometry"]["location"]["lng"])
    
california_df['Lat']=Latitudes
california_df['Lng']=Longitudes
california_df.head()

Unnamed: 0,Hospital Name,Address,City,State,ZIP Code,County Name,Hospital Type,Hospital Ownership,Hospital overall rating,Mortality national comparison,Safety of care national comparison,Readmission national comparison,Patient experience national comparison,Effectiveness of care national comparison,Timeliness of care national comparison,Efficient use of medical imaging national comparison,Lat,Lng
261,MILLS-PENINSULA MEDICAL CENTER,1501 TROUSDALE DRIVE,BURLINGAME,CA,94010,SAN MATEO,Acute Care Hospitals,Voluntary non-profit - Private,4,Above the national average,Same as the national average,Above the national average,Above the national average,Same as the national average,Same as the national average,Same as the national average,37.591609,-122.383717
285,KAWEAH DELTA MEDICAL CENTER,400 W MINERAL KING AVE,VISALIA,CA,93291,TULARE,Acute Care Hospitals,Government - Hospital District or Authority,4,Same as the national average,Above the national average,Above the national average,Below the national average,Same as the national average,Below the national average,Same as the national average,36.328572,-119.294958
296,KAISER FOUNDATION HOSPITAL - SAN FRANCISCO,2425 GEARY BLVD,SAN FRANCISCO,CA,94115,SAN FRANCISCO,Acute Care Hospitals,Voluntary non-profit - Other,4,Not Available,Same as the national average,Not Available,Above the national average,Same as the national average,Not Available,Not Available,37.782459,-122.443097
302,SONOMA VALLEY HOSPITAL,347 ANDRIEUX ST,SONOMA,CA,95476,SONOMA,Acute Care Hospitals,Government - Hospital District or Authority,4,Same as the national average,Same as the national average,Above the national average,Same as the national average,Same as the national average,Same as the national average,Same as the national average,38.28815,-122.465688
307,SHARP MEMORIAL HOSPITAL,7901 FROST ST,SAN DIEGO,CA,92123,SAN DIEGO,Acute Care Hospitals,Voluntary non-profit - Other,4,Same as the national average,Above the national average,Above the national average,Above the national average,Above the national average,Same as the national average,Above the national average,32.799676,-117.1546


In [14]:
#creating a list to hold hospital coordinates
hospital_coordinates = []
#creating a list to hold string for marker descriptions
hospital_info = []

#looping through the rows of the df to create a tuple using the lat and long columns
#and add them to the list above
for index, row in california_df.iterrows():
    hospital_coordinates.append((row["Lat"],row["Lng"]))
    #ATTEMPT at html to make a Description list for hospital information. 
    #triple quote to allow a string to span multiple lines instead of one giant long line
    hospital_info.append(f"""
                        <dl>
                        <dt>Hospital:</dt><dd>{row['Hospital Name']}</dd>
                        <dt>Rating:</dt><dd>{row['Hospital overall rating']}</dd>
                        </dl>""")

In [15]:
#Creating a gmaps marker layer using hospital coordinates list and hospital info list
marker_layer = gmaps.marker_layer(hospital_coordinates,info_box_content=hospital_info)

#Creating a gmaps figure base layer map
fig=gmaps.figure()

#adding the marker layer to the figure base layer map
fig.add_layer(marker_layer)

#show the map
fig

Figure(layout=FigureLayout(height='420px'))