In [1]:
%matplotlib notebook

In [2]:
# Dependencies
import requests
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [3]:
url = "https://data.cdc.gov/resource/pp7x-dyj2.json"
# Print the response object to the console
print(requests.get(url))

<Response [200]>


In [4]:
print(requests.get(url).json())

[{'geoid': 'State', 'state': 'New Hampshire', 'age': 'All', 'season': '2017-18', 'mmwr_year_week': '201805', 'deaths_from_pneumonia_and_influenza': '28', 'all_deaths': '286', 'pecent_of_deaths_due_to_pneumonia_or_influenza': '9.7902097902', 'pecent_complete': '143.27764328'}, {'geoid': 'State', 'state': 'New York City', 'age': 'All', 'season': '2015-16', 'mmwr_year_week': '201629', 'deaths_from_pneumonia_and_influenza': '64', 'all_deaths': '972', 'pecent_of_deaths_due_to_pneumonia_or_influenza': '6.5843621399', 'pecent_complete': '97.618498443'}, {'geoid': 'State', 'state': 'Missouri', 'age': 'All', 'season': '2009-10', 'mmwr_year_week': '201020', 'deaths_from_pneumonia_and_influenza': '84', 'all_deaths': '984', 'pecent_of_deaths_due_to_pneumonia_or_influenza': '8.5365853659', 'pecent_complete': '93.231242095'}, {'geoid': 'Region', 'region': '3', 'age': 'All', 'season': '2010-11', 'mmwr_year_week': '201134', 'deaths_from_influenza': '0', 'deaths_from_pneumonia': '280', 'deaths_from_pne

In [5]:
response = requests.get(url).json()
print(json.dumps(response, indent=4, sort_keys=True))

[
    {
        "age": "All",
        "all_deaths": "286",
        "deaths_from_pneumonia_and_influenza": "28",
        "geoid": "State",
        "mmwr_year_week": "201805",
        "pecent_complete": "143.27764328",
        "pecent_of_deaths_due_to_pneumonia_or_influenza": "9.7902097902",
        "season": "2017-18",
        "state": "New Hampshire"
    },
    {
        "age": "All",
        "all_deaths": "972",
        "deaths_from_pneumonia_and_influenza": "64",
        "geoid": "State",
        "mmwr_year_week": "201629",
        "pecent_complete": "97.618498443",
        "pecent_of_deaths_due_to_pneumonia_or_influenza": "6.5843621399",
        "season": "2015-16",
        "state": "New York City"
    },
    {
        "age": "All",
        "all_deaths": "984",
        "deaths_from_pneumonia_and_influenza": "84",
        "geoid": "State",
        "mmwr_year_week": "201020",
        "pecent_complete": "93.231242095",
        "pecent_of_deaths_due_to_pneumonia_or_influenza": "8.536585

In [6]:
df = pd.DataFrame(response)
df.head()

Unnamed: 0,geoid,state,age,season,mmwr_year_week,deaths_from_pneumonia_and_influenza,all_deaths,pecent_of_deaths_due_to_pneumonia_or_influenza,pecent_complete,region,deaths_from_influenza,deaths_from_pneumonia
0,State,New Hampshire,All,2017-18,201805,28,286,9.7902097902,143.27764328,,,
1,State,New York City,All,2015-16,201629,64,972,6.5843621399,97.618498443,,,
2,State,Missouri,All,2009-10,201020,84,984,8.5365853659,93.231242095,,,
3,Region,,All,2010-11,201134,280,4662,6.006006006,92.565116153,3.0,0.0,280.0
4,State,Kansas,All,2017-18,201820,19,496,3.8306451613,105.19458116,,,


In [7]:
df.columns

Index(['geoid', 'state', 'age', 'season', 'mmwr_year_week',
       'deaths_from_pneumonia_and_influenza', 'all_deaths',
       'pecent_of_deaths_due_to_pneumonia_or_influenza', 'pecent_complete',
       'region', 'deaths_from_influenza', 'deaths_from_pneumonia'],
      dtype='object')

In [8]:
df = df.drop(columns=['mmwr_year_week','all_deaths','age','region',
                 'pecent_of_deaths_due_to_pneumonia_or_influenza', 
                 'pecent_complete', 'deaths_from_influenza', 'deaths_from_pneumonia'])


In [9]:
df.columns

Index(['geoid', 'state', 'season', 'deaths_from_pneumonia_and_influenza'], dtype='object')

In [10]:
df = df.rename(columns={'geoid':'Geo ID', 'state':'State',
                       'season':'Season', 'deaths_from_pneumonia_and_influenza':'Deaths by Pneumonia & Influenza'})
df.head()

Unnamed: 0,Geo ID,State,Season,Deaths by Pneumonia & Influenza
0,State,New Hampshire,2017-18,28
1,State,New York City,2015-16,64
2,State,Missouri,2009-10,84
3,Region,,2010-11,280
4,State,Kansas,2017-18,19


In [11]:
df['Deaths by Pneumonia & Influenza'] = pd.to_numeric(df['Deaths by Pneumonia & Influenza'])

In [12]:
df['Deaths by Pneumonia & Influenza'].dtype

dtype('int64')

In [13]:
df = df.loc[df['Geo ID'] == 'State',:]
df.head()

Unnamed: 0,Geo ID,State,Season,Deaths by Pneumonia & Influenza
0,State,New Hampshire,2017-18,28
1,State,New York City,2015-16,64
2,State,Missouri,2009-10,84
4,State,Kansas,2017-18,19
5,State,Georgia,2009-10,89


In [14]:
df = df.drop(columns=['Geo ID'])
df.head()

Unnamed: 0,State,Season,Deaths by Pneumonia & Influenza
0,New Hampshire,2017-18,28
1,New York City,2015-16,64
2,Missouri,2009-10,84
4,Kansas,2017-18,19
5,Georgia,2009-10,89


In [15]:
df.dtypes

State                              object
Season                             object
Deaths by Pneumonia & Influenza     int64
dtype: object

In [16]:
df = df.set_index('State')
df.head()

Unnamed: 0_level_0,Season,Deaths by Pneumonia & Influenza
State,Unnamed: 1_level_1,Unnamed: 2_level_1
New Hampshire,2017-18,28
New York City,2015-16,64
Missouri,2009-10,84
Kansas,2017-18,19
Georgia,2009-10,89


In [18]:
seventeen_eighteen_df = df.loc[df['Season'] == "2017-18", :]
seventeen_eighteen_df = seventeen_eighteen_df.groupby(['State']).agg('sum')
seventeen_eighteen_df = seventeen_eighteen_df['Deaths by Pneumonia & Influenza'].sort_values(ascending=False)
seventeen_eighteen_df.head()

State
Maryland     461
Minnesota    269
Alabama      264
Texas        238
New York     205
Name: Deaths by Pneumonia & Influenza, dtype: int64

In [42]:
# y_axis = [461, 269, 264, 238, 205]
# x_axis = np.arange(len(y_axis))
# plt.bar(x_axis, y_axis, color='r', alpha=0.5, align="center")
# tick_locations = [value for value in x_axis]
# plt.xticks(tick_locations, ['Maryland','Minnesota','Alabama','Texas','New York'])
# plt.xlim(-0.75, len(x_axis)-0.25)
# plt.ylim(0, max(y_axis)+100)
# plt.title("Top Five States for Pneumonia & Influenza Deaths of 2017-18")
# plt.xlabel("State")
# plt.ylabel("Number of Deaths")
# plt.tight_layout()
# plt.savefig("Top Five States for Pneumonia & Influenza Deaths of 2017-18.png")
# plt.show()

In [43]:
sixteen_seventeen_df = df.loc[df['Season'] == "2016-17", :]
sixteen_seventeen_df = sixteen_seventeen_df.groupby(['State']).agg('sum')
sixteen_seventeen_df = sixteen_seventeen_df['Deaths by Pneumonia & Influenza'].sort_values(ascending=False)
sixteen_seventeen_df.head()

State
Alabama          315
New Jersey       274
Pennsylvania     269
Massachusetts    228
Maryland         219
Name: Deaths by Pneumonia & Influenza, dtype: int64

In [45]:
# y_axis = [315,274,269,228,219]
# x_axis = np.arange(len(y_axis))

# plt.bar(x_axis, y_axis, color='b', alpha=0.5, align="center")
# tick_locations = [value for value in x_axis]
# plt.xticks(tick_locations, ['Alabama','New Jersey','Pennsylvania','Massachusetts','Maryland'])
# plt.xlim(-0.75, len(x_axis)-0.25)
# plt.ylim(0, max(y_axis)+100)
# plt.title("Top Five States for Pneumonia & Influenza Deaths of 2016-17")
# plt.xlabel("State")
# plt.ylabel("Number of Deaths")
# plt.tight_layout()
# plt.savefig("Top Five States for Pneumonia & Influenza Deaths of 2016-17.png")
# plt.show()

In [46]:
fifteen_sixteen_df = df.loc[df['Season'] == "2015-16", :]
fifteen_sixteen_df  = fifteen_sixteen_df.groupby(['State']).agg('sum')
fifteen_sixteen_df = fifteen_sixteen_df['Deaths by Pneumonia & Influenza'].sort_values(ascending=False)
fifteen_sixteen_df.head()

State
Texas         502
California    427
Ohio          375
Illinois      279
Georgia       274
Name: Deaths by Pneumonia & Influenza, dtype: int64

In [48]:
# y_axis = [502, 427, 375, 279, 274]
# x_axis = np.arange(len(y_axis))

# plt.bar(x_axis, y_axis, color='g', alpha=0.5, align="center")
# tick_locations = [value for value in x_axis]
# plt.xticks(tick_locations, ['Texas','California','Ohio','Illinois','Georgia'])
# plt.xlim(-0.75, len(x_axis)-0.25)
# plt.ylim(0, max(y_axis)+100)
# plt.title("Top Five States for Pneumonia & Influenza Deaths of 2015-16")
# plt.xlabel("State")
# plt.ylabel("Number of Deaths")
# plt.tight_layout()
# plt.savefig("Top Five States for Pneumonia & Influenza Deaths of 2015-16.png")
# plt.show()

In [49]:
fourteen_fifteen_df = df.loc[df['Season'] == "2014-15", :]
fourteen_fifteen_df = fourteen_fifteen_df.groupby(['State']).agg('sum')
fourteen_fifteen_df = fourteen_fifteen_df['Deaths by Pneumonia & Influenza'].sort_values(ascending=False)
fourteen_fifteen_df.head()

State
California      1841
Texas           1222
Florida          702
Ohio             472
Pennsylvania     340
Name: Deaths by Pneumonia & Influenza, dtype: int64

In [52]:
# y_axis = [1841, 1222, 702, 472, 340]
# x_axis = np.arange(len(y_axis))

# plt.bar(x_axis, y_axis, color='y', alpha=0.5, align="center")
# tick_locations = [value for value in x_axis]
# plt.xticks(tick_locations, ['California','Texas','Florida','Ohio','Pennsylvania'])
# plt.xlim(-0.75, len(x_axis)-0.25)
# plt.ylim(0, max(y_axis)+100)
# plt.title("Top Five States for Pneumonia & Influenza Deaths of 2014-15")
# plt.xlabel("State")
# plt.ylabel("Number of Deaths")
# plt.tight_layout()
# plt.savefig("Top Five States for Pneumonia & Influenza Deaths of 2014-15.png")
# plt.show()

In [53]:
thirteen_fourteen_df = df.loc[df['Season'] == "2013-14", :]
thirteen_fourteen_df = thirteen_fourteen_df.groupby(['State']).agg('sum')
thirteen_fourteen_df = thirteen_fourteen_df['Deaths by Pneumonia & Influenza'].sort_values(ascending=False)
thirteen_fourteen_df.head()

State
California        596
Illinois          429
Tennessee         347
North Carolina    335
Indiana           241
Name: Deaths by Pneumonia & Influenza, dtype: int64

In [60]:
# y_axis = [596, 429, 347, 335, 241]
# x_axis = np.arange(len(y_axis))

# plt.bar(x_axis, y_axis, color='teal',alpha=0.5, align="center")
# tick_locations = [value for value in x_axis]
# plt.xticks(tick_locations, ['California','Illinois','Tennessee','North Carolina','Indiana'])
# plt.xlim(-0.75, len(x_axis)-0.25)
# plt.ylim(0, max(y_axis)+100)
# plt.title("Top Five States for Pneumonia & Influenza Deaths of 2013-14")
# plt.xlabel("State")
# plt.ylabel("Number of Deaths")
# plt.tight_layout()
# plt.savefig("Top Five States for Pneumonia & Influenza Deaths of 2013-14.png")
# plt.show()

In [56]:
twelfth_thirteen_df = df.loc[df['Season'] == "2012-13", :]
twelfth_thirteen_df = twelfth_thirteen_df.groupby(['State']).agg('sum')
twelfth_thirteen_df = twelfth_thirteen_df['Deaths by Pneumonia & Influenza'].sort_values(ascending=False)
twelfth_thirteen_df.head()

State
California      2091
Pennsylvania     649
Georgia          575
Ohio             538
Illinois         532
Name: Deaths by Pneumonia & Influenza, dtype: int64

In [58]:
# y_axis = [2091, 649, 575, 538, 532]
# x_axis = np.arange(len(y_axis))

# plt.bar(x_axis, y_axis, color='orange',alpha=0.5, align="center")
# tick_locations = [value for value in x_axis]
# plt.xticks(tick_locations, ['California','Pennsylvania','Georgia','Ohio','Illinois'])
# plt.xlim(-0.75, len(x_axis)-0.25)
# plt.ylim(0, max(y_axis)+100)
# plt.title("Top Five States for Pneumonia & Influenza Deaths of 2012-13")
# plt.xlabel("State")
# plt.ylabel("Number of Deaths")
# plt.tight_layout()
# plt.savefig("Top Five States for Pneumonia & Influenza Deaths of 2012-13.png")
# plt.show()

In [61]:
eleventh_twelfth_df = df.loc[df['Season'] == "2011-12", :]
eleventh_twelfth_df = eleventh_twelfth_df.groupby(['State']).agg('sum')
eleventh_twelfth_df = eleventh_twelfth_df['Deaths by Pneumonia & Influenza'].sort_values(ascending=False)
eleventh_twelfth_df.head()

State
Tennessee     709
Michigan      434
New Jersey    404
California    386
Indiana       350
Name: Deaths by Pneumonia & Influenza, dtype: int64

In [65]:
# y_axis = [709, 434, 404, 386, 350]
# x_axis = np.arange(len(y_axis))

# plt.bar(x_axis, y_axis, color='purple',alpha=0.5, align="center")
# tick_locations = [value for value in x_axis]
# plt.xticks(tick_locations, ['Tennessee','Michigan','New Jersey','California','Indiana'])
# plt.xlim(-0.75, len(x_axis)-0.25)
# plt.ylim(0, max(y_axis)+100)
# plt.title("Top Five States for Pneumonia & Influenza Deaths of 2011-12")
# plt.xlabel("State")
# plt.ylabel("Number of Deaths")
# plt.tight_layout()
# plt.savefig("Top Five States for Pneumonia & Influenza Deaths of 2011-12.png")
# plt.show()

In [62]:
tenth_eleventh_df = df.loc[df['Season'] == "2010-11", :]
tenth_eleventh_df = tenth_eleventh_df.groupby(['State']).agg('sum')
tenth_eleventh_df = tenth_eleventh_df['Deaths by Pneumonia & Influenza'].sort_values(ascending=False)
tenth_eleventh_df.head()

State
California    1541
Illinois       625
Ohio           509
Michigan       385
Maryland       379
Name: Deaths by Pneumonia & Influenza, dtype: int64

In [67]:
# y_axis = [1541, 625, 509, 385, 379]
# x_axis = np.arange(len(y_axis))

# plt.bar(x_axis, y_axis, color='pink',alpha=0.5, align="center")
# tick_locations = [value for value in x_axis]
# plt.xticks(tick_locations, ['California','Illinois','Ohio','Michigan','Maryland'])
# plt.xlim(-0.75, len(x_axis)-0.25)
# plt.ylim(0, max(y_axis)+100)
# plt.title("Top Five States for Pneumonia & Influenza Deaths of 2010-11")
# plt.xlabel("State")
# plt.ylabel("Number of Deaths")
# plt.tight_layout()
# plt.savefig("Top Five States for Pneumonia & Influenza Deaths of 2010-11.png")
# plt.show()

In [63]:
ninth_tenth_df = df.loc[df['Season'] == "2009-10",:]
ninth_tenth_df = ninth_tenth_df.groupby(['State']).agg('sum')
ninth_tenth_df = ninth_tenth_df['Deaths by Pneumonia & Influenza'].sort_values(ascending=False)
ninth_tenth_df.head()

State
Michigan        500
Texas           460
Missouri        368
Georgia         351
Pennsylvania    345
Name: Deaths by Pneumonia & Influenza, dtype: int64

In [76]:
# y_axis = [500, 460, 368, 351, 345]
# x_axis = np.arange(len(y_axis))

# plt.bar(x_axis, y_axis, color='aqua',alpha=0.5, align="center")
# tick_locations = [value for value in x_axis]
# plt.xticks(tick_locations, ['Michigan','Texas','Missouri','Georgia','Pennsylvania'])
# plt.xlim(-0.75, len(x_axis)-0.25)
# plt.ylim(0, max(y_axis)+100)
# plt.title("Top Five States for Pneumonia & Influenza Deaths of 2009-10")
# plt.xlabel("State")
# plt.ylabel("Number of Deaths")
# plt.tight_layout()
# plt.savefig("Top Five States for Pneumonia & Influenza Deaths of 2009-10.png")
# plt.show()