# Medicare Stars Project

In [24]:
# I imported the data from Data.CMS.gov -- the Provider Information dataset. 
# Objective: to discover any existing trends in star ratings (overall quality) over time and investigate factors that affect the ratings-- in particular,
# health inspections, quality measures, and staffing.

### Data Analysis

#### Data Cleaning

In [59]:
%pip install altair


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip3 install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [26]:
import pandas as pd
import numpy as np
import altair as alt
import plotly.express as px
from vega_datasets import data
provider_info = pd.read_csv('provider_info.csv')
provider_info.head()

Unnamed: 0,CMS Certification Number (CCN),Provider Name,Provider Address,City/Town,State,ZIP Code,Telephone Number,Provider SSA County Code,County/Parish,Ownership Type,...,Number of Citations from Infection Control Inspections,Number of Fines,Total Amount of Fines in Dollars,Number of Payment Denials,Total Number of Penalties,Location,Latitude,Longitude,Geocoding Footnote,Processing Date
0,15009,"BURNS NURSING HOME, INC.",701 MONROE STREET NW,RUSSELLVILLE,AL,35653,2563324110,290,Franklin,For profit - Corporation,...,,2,24644.14,0,2,"701 MONROE STREET NW,RUSSELLVILLE,AL,35653",34.5149,-87.736,,2024-07-01
1,15010,COOSA VALLEY HEALTHCARE CENTER,260 WEST WALNUT STREET,SYLACAUGA,AL,35150,2562495604,600,Talladega,For profit - Corporation,...,0.0,0,0.0,0,0,"260 WEST WALNUT STREET,SYLACAUGA,AL,35150",33.1637,-86.254,,2024-07-01
2,15012,HIGHLANDS HEALTH AND REHAB,380 WOODS COVE ROAD,SCOTTSBORO,AL,35768,2562183708,350,Jackson,Government - County,...,,0,0.0,0,0,"380 WOODS COVE ROAD,SCOTTSBORO,AL,35768",34.6611,-86.047,,2024-07-01
3,15014,EASTVIEW REHABILITATION & HEALTHCARE CENTER,7755 FOURTH AVENUE SOUTH,BIRMINGHAM,AL,35206,2058330146,360,Jefferson,For profit - Individual,...,0.0,0,0.0,0,0,"7755 FOURTH AVENUE SOUTH,BIRMINGHAM,AL,35206",33.5595,-86.722,,2024-07-01
4,15015,PLANTATION MANOR NURSING HOME,6450 OLD TUSCALOOSA HIGHWAY,MC CALLA,AL,35111,2054776161,360,Jefferson,For profit - Individual,...,,0,0.0,0,0,"6450 OLD TUSCALOOSA HIGHWAY,MC CALLA,AL,35111",33.3221,-87.034,,2024-07-01


In [27]:
provider_info[provider_info['Staffing Rating'].isna()]

Unnamed: 0,CMS Certification Number (CCN),Provider Name,Provider Address,City/Town,State,ZIP Code,Telephone Number,Provider SSA County Code,County/Parish,Ownership Type,...,Number of Citations from Infection Control Inspections,Number of Fines,Total Amount of Fines in Dollars,Number of Payment Denials,Total Number of Penalties,Location,Latitude,Longitude,Geocoding Footnote,Processing Date
133,015217,BIRMINGHAM NURSING AND REHABILITATION CTR LLC,1000 DUGAN AVENUE,BIRMINGHAM,AL,35214,2057988780,360,Jefferson,For profit - Limited Liability company,...,0.0,2,323706.50,0,2,"1000 DUGAN AVENUE,BIRMINGHAM,AL,35214",33.5460,-86.881,,2024-07-01
227,025019,PETERSBURG MEDICAL CENTER LTC,103 FRAM STREET,PETERSBURG,AK,99833,9077724291,195,Petersburg Borough,Government - City/county,...,,0,0.00,0,0,"103 FRAM STREET,PETERSBURG,AK,99833",56.8125,-132.956,,2024-07-01
232,025026,QUYANNA CARE CENTER,1100 GREG KRUSCHEK AVENUE,NOME,AK,99762,9074433311,180,Nome,Non profit - Corporation,...,,1,975.00,0,1,"1100 GREG KRUSCHEK AVENUE,NOME,AK,99762",64.7874,-165.197,22.0,2024-07-01
235,025030,PROVIDENCE KODIAK ISLAND MED LTC,1915 E REZANOF DRIVE,KODIAK,AK,99615,9074867800,150,Kodiak Island Borough,Non profit - Corporation,...,,1,15000.00,0,1,"1915 E REZANOF DRIVE,KODIAK,AK,99615",57.8010,-152.374,,2024-07-01
303,035146,LIFE CARE CENTER OF PARADISE VALLEY,4065 EAST BELL ROAD,PHOENIX,AZ,85032,6028670212,60,Maricopa,For profit - Corporation,...,0.0,1,72146.75,0,1,"4065 EAST BELL ROAD,PHOENIX,AZ,85032",33.6407,-111.994,,2024-07-01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14491,676132,TRAIL LAKE NURSING & REHABILITATION,7100 TRAIL LAKE DR,FORT WORTH,TX,76133,8172632224,910,Tarrant,For profit - Limited Liability company,...,9.0,9,178390.46,0,9,"7100 TRAIL LAKE DR,FORT WORTH,TX,76133",32.6418,-97.401,,2024-07-01
14791,676476,THE HEIGHTS ON VALLEY RANCH,23200 VALLEY RANCH PARKWAY,PORTER,TX,77365,3463265085,801,Montgomery,For profit - Limited Liability company,...,0.0,0,0.00,0,0,"23200 VALLEY RANCH PARKWAY,PORTER,TX,77365",30.1157,-95.251,,2024-07-01
14818,686127,ADVENTHEALTH DELAND,701 W PLYMOUTH AVE,DELAND,FL,32720,3869434522,630,Volusia,Non profit - Corporation,...,,0,0.00,0,0,"701 W PLYMOUTH AVE,DELAND,FL,32720",29.0426,-81.316,,2024-07-01
14831,745038,TIERRA ESTE NURSING AND REHABILITATION CENTER,14300 PEBBLE HILLS BLVD,EL PASO,TX,79938,9159559998,480,El Paso,For profit - Corporation,...,,7,31574.79,0,7,"14300 PEBBLE HILLS BLVD,EL PASO,TX,79938",31.7822,-106.230,,2024-07-01


In [28]:
# We have a lot of missing data! I will impute the NaN values by calculating the mean values for each state and filling them in respectively. 
def removeNA(col):
    citations_present = provider_info[~provider_info[col].isna()]
    mean_citations_state = citations_present[['State', col]].groupby('State').mean()
    mean_citations_state_dict = mean_citations_state[col].to_dict()
    return mean_citations_state_dict

infection_control_dict = removeNA('Number of Citations from Infection Control Inspections')
health_inspection_dict = removeNA('Health Inspection Rating')
average_resident_num_dict = removeNA('Average Number of Residents per Day')
overall_rating_dict = removeNA('Overall Rating')
qm_rating_dict = removeNA('QM Rating')
staffing_rating_dict = removeNA('Staffing Rating')

In [29]:
def fillNA(col, state_mean_dct):
    provider_info[col] = provider_info[col].fillna(provider_info['State'].map(state_mean_dct))
    return provider_info
provider_info = fillNA('Number of Citations from Infection Control Inspections', infection_control_dict)
provider_info = fillNA('Average Number of Residents per Day', average_resident_num_dict)
provider_info = fillNA('Overall Rating', overall_rating_dict)
provider_info = fillNA('QM Rating', qm_rating_dict)
provider_info = fillNA('Staffing Rating', staffing_rating_dict)
provider_info = fillNA('Health Inspection Rating', health_inspection_dict)

smaller_provider_info = provider_info[['CMS Certification Number (CCN)', 'State', 'Ownership Type', 'Number of Certified Beds', 'Average Number of Residents per Day', \
                                       'Provider Type', 'Special Focus Status', 'Abuse Icon', 'Overall Rating', 'Health Inspection Rating',\
                                        'QM Rating', 'Staffing Rating', 'Number of Citations from Infection Control Inspections', 'Total Amount of Fines in Dollars']]
smaller_provider_info.head()

Unnamed: 0,CMS Certification Number (CCN),State,Ownership Type,Number of Certified Beds,Average Number of Residents per Day,Provider Type,Special Focus Status,Abuse Icon,Overall Rating,Health Inspection Rating,QM Rating,Staffing Rating,Number of Citations from Infection Control Inspections,Total Amount of Fines in Dollars
0,15009,AL,For profit - Corporation,57,50.0,Medicare and Medicaid,,N,2.0,2.0,4.0,4.0,0.717391,24644.14
1,15010,AL,For profit - Corporation,85,76.9,Medicare and Medicaid,,N,4.0,4.0,4.0,3.0,0.0,0.0
2,15012,AL,Government - County,50,45.1,Medicare and Medicaid,,N,4.0,4.0,2.0,3.0,0.717391,0.0
3,15014,AL,For profit - Individual,92,76.9,Medicare and Medicaid,,N,2.0,3.0,3.0,1.0,0.0,0.0
4,15015,AL,For profit - Individual,103,86.6,Medicare and Medicaid,,N,2.0,2.0,2.0,4.0,0.717391,0.0


In [30]:
# Let's replace all the 'N' in the 'Abuse Icon' with 0. We will also replace all the 'Y' in this column with 1 to make statistical analysis easier.
smaller_provider_info['Abuse Icon'] = smaller_provider_info['Abuse Icon'].replace('N', 0)
smaller_provider_info['Abuse Icon'] = smaller_provider_info['Abuse Icon'].replace('Y', 1)
smaller_provider_info.head()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Downcasting behavior in `replace` is deprecated and will be removed in a future version. To retain the old behavior, explicitly call `result.infer_objects(copy=False)`. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,CMS Certification Number (CCN),State,Ownership Type,Number of Certified Beds,Average Number of Residents per Day,Provider Type,Special Focus Status,Abuse Icon,Overall Rating,Health Inspection Rating,QM Rating,Staffing Rating,Number of Citations from Infection Control Inspections,Total Amount of Fines in Dollars
0,15009,AL,For profit - Corporation,57,50.0,Medicare and Medicaid,,0,2.0,2.0,4.0,4.0,0.717391,24644.14
1,15010,AL,For profit - Corporation,85,76.9,Medicare and Medicaid,,0,4.0,4.0,4.0,3.0,0.0,0.0
2,15012,AL,Government - County,50,45.1,Medicare and Medicaid,,0,4.0,4.0,2.0,3.0,0.717391,0.0
3,15014,AL,For profit - Individual,92,76.9,Medicare and Medicaid,,0,2.0,3.0,3.0,1.0,0.0,0.0
4,15015,AL,For profit - Individual,103,86.6,Medicare and Medicaid,,0,2.0,2.0,2.0,4.0,0.717391,0.0


#### Exploratory Data Analysis (EDA)

In [31]:
# QM (quality measure) map on USA
qm_rating = smaller_provider_info.groupby('State')[['QM Rating']].mean().sort_values(by = 'QM Rating', ascending = False).reset_index()
qm_rating.head()

Unnamed: 0,State,QM Rating
0,HI,4.658537
1,NJ,4.438235
2,UT,4.371134
3,CA,4.328371
4,ID,4.282051


In [32]:
qm_county_graph = px.choropleth(
    qm_rating,
    locations = 'State',
    locationmode= 'USA-states',
    color = 'QM Rating',
    color_continuous_scale= 'RdBu',
    scope = 'usa',
    title = 'Average QM Rating by State'
)
qm_county_graph

In [33]:
provider_info.head()

Unnamed: 0,CMS Certification Number (CCN),Provider Name,Provider Address,City/Town,State,ZIP Code,Telephone Number,Provider SSA County Code,County/Parish,Ownership Type,...,Number of Citations from Infection Control Inspections,Number of Fines,Total Amount of Fines in Dollars,Number of Payment Denials,Total Number of Penalties,Location,Latitude,Longitude,Geocoding Footnote,Processing Date
0,15009,"BURNS NURSING HOME, INC.",701 MONROE STREET NW,RUSSELLVILLE,AL,35653,2563324110,290,Franklin,For profit - Corporation,...,0.717391,2,24644.14,0,2,"701 MONROE STREET NW,RUSSELLVILLE,AL,35653",34.5149,-87.736,,2024-07-01
1,15010,COOSA VALLEY HEALTHCARE CENTER,260 WEST WALNUT STREET,SYLACAUGA,AL,35150,2562495604,600,Talladega,For profit - Corporation,...,0.0,0,0.0,0,0,"260 WEST WALNUT STREET,SYLACAUGA,AL,35150",33.1637,-86.254,,2024-07-01
2,15012,HIGHLANDS HEALTH AND REHAB,380 WOODS COVE ROAD,SCOTTSBORO,AL,35768,2562183708,350,Jackson,Government - County,...,0.717391,0,0.0,0,0,"380 WOODS COVE ROAD,SCOTTSBORO,AL,35768",34.6611,-86.047,,2024-07-01
3,15014,EASTVIEW REHABILITATION & HEALTHCARE CENTER,7755 FOURTH AVENUE SOUTH,BIRMINGHAM,AL,35206,2058330146,360,Jefferson,For profit - Individual,...,0.0,0,0.0,0,0,"7755 FOURTH AVENUE SOUTH,BIRMINGHAM,AL,35206",33.5595,-86.722,,2024-07-01
4,15015,PLANTATION MANOR NURSING HOME,6450 OLD TUSCALOOSA HIGHWAY,MC CALLA,AL,35111,2054776161,360,Jefferson,For profit - Individual,...,0.717391,0,0.0,0,0,"6450 OLD TUSCALOOSA HIGHWAY,MC CALLA,AL,35111",33.3221,-87.034,,2024-07-01


In [45]:
qm_rating_counties = provider_info.groupby(['State', 'Latitude', 'Longitude', 'County/Parish'])[['QM Rating']].mean().reset_index()
qm_rating_csv = qm_rating_counties.to_csv()
qm_rating_csv

',State,Latitude,Longitude,County/Parish,QM Rating\n0,AK,55.3119,-131.587,Ketchikan Gateway,5.0\n1,AK,56.4575,-132.369,Wrangell,4.0\n2,AK,56.8125,-132.956,Petersburg Borough,4.0\n3,AK,57.0594,-135.344,Sitka Borough,4.0\n4,AK,57.801,-152.374,Kodiak Island Borough,4.0\n5,AK,58.3311,-134.466,Juneau,4.0\n6,AK,59.6516,-151.55,Kenai Peninsula,5.0\n7,AK,60.1328,-149.442,Kenai Peninsula,1.0\n8,AK,60.4932,-151.081,Kenai Peninsula,5.0\n9,AK,60.5416,-145.75,Chugach,5.0\n10,AK,60.7858,-161.786,Bethel,3.0\n11,AK,61.1363,-146.35,Chugach,5.0\n12,AK,61.2045,-149.748,Anchorage,4.5\n13,AK,61.2257,-149.714,Anchorage,4.0\n14,AK,61.5879,-149.36,Matanuska-Susitna,5.0\n15,AK,61.6167,-149.116,Matanuska-Susitna,3.0\n16,AK,64.7874,-165.197,Nome,2.0\n17,AK,64.8304,-147.738,Fairbanks North Star,5.0\n18,AK,66.8953,-162.589,Northwest Arctic,2.0\n19,AL,30.4269,-87.685,Baldwin,5.0\n20,AL,30.4906,-88.331,Mobile,4.0\n21,AL,30.5216,-87.905,Baldwin,2.0\n22,AL,30.5547,-87.9,Baldwin,3.0\n23,AL,30.5694,-87.631,Baldwin,3.0\n

In [35]:
counties = alt.topo_feature(data.us_10m.url, 'counties')
source = data.unemployment.url
source

'https://cdn.jsdelivr.net/npm/vega-datasets@v1.29.0/data/unemployment.tsv'