In [1]:
#Exploring data from the Provisional COVID-19 death counts in the US, by county.

# This dataset includes a snapshot of total deaths recorded between  01/01/2020 and 10/16/2021, as measured in 10/20/2021. 
# It covers data from all counties in the US.

In [2]:
import pandas as pd
import numpy as np
import seaborn as sns

In [3]:
df = pd.read_csv("../data_sources/Provisional_COVID-19_Death_Counts_in_the_United_States_by_County.csv")

In [4]:
df.head()

Unnamed: 0,Date as of,Start Date,End Date,State,County name,FIPS County Code,Urban Rural Code,Deaths involving COVID-19,Deaths from All Causes,Footnote
0,10/20/2021,01/01/2020,10/16/2021,AK,Aleutians East Borough,2013,Noncore,,14.0,One or more data cells have counts between 1-9...
1,10/20/2021,01/01/2020,10/16/2021,AK,Anchorage Municipality,2020,Medium metro,366.0,4133.0,
2,10/20/2021,01/01/2020,10/16/2021,AK,Bethel Census Area,2050,Noncore,18.0,194.0,
3,10/20/2021,01/01/2020,10/16/2021,AK,Denali Borough,2068,Noncore,,11.0,One or more data cells have counts between 1-9...
4,10/20/2021,01/01/2020,10/16/2021,AK,Dillingham Census Area,2070,Noncore,,45.0,One or more data cells have counts between 1-9...


In [5]:
df.count()

Date as of                   3030
Start Date                   3030
End Date                     3030
State                        3030
County name                  3030
FIPS County Code             3030
Urban Rural Code             3030
Deaths involving COVID-19    2439
Deaths from All Causes       3029
Footnote                      591
dtype: int64

In [6]:
df.dtypes

Date as of                    object
Start Date                    object
End Date                      object
State                         object
County name                   object
FIPS County Code               int64
Urban Rural Code              object
Deaths involving COVID-19    float64
Deaths from All Causes       float64
Footnote                      object
dtype: object

In [7]:
# Noting: CLEAN UP TASK:
# Dates are objects, and not date types.

In [8]:
df_new = df.rename(columns={'Date as of':'Date-as-of', 'Start Date':'Start_date', 'End Date': 'End_date',
                            'County name':'county', 'FIPS County Code':'FIPS_code', 'Urban Rural Code':'Urban-Rural_code',
                           'Deaths involving COVID-19':'Deaths_COVID', 'Deaths from All Causes':'Deaths_All'})

In [9]:
df_new.head()

Unnamed: 0,Date-as-of,Start_date,End_date,State,county,FIPS_code,Urban-Rural_code,Deaths_COVID,Deaths_All,Footnote
0,10/20/2021,01/01/2020,10/16/2021,AK,Aleutians East Borough,2013,Noncore,,14.0,One or more data cells have counts between 1-9...
1,10/20/2021,01/01/2020,10/16/2021,AK,Anchorage Municipality,2020,Medium metro,366.0,4133.0,
2,10/20/2021,01/01/2020,10/16/2021,AK,Bethel Census Area,2050,Noncore,18.0,194.0,
3,10/20/2021,01/01/2020,10/16/2021,AK,Denali Borough,2068,Noncore,,11.0,One or more data cells have counts between 1-9...
4,10/20/2021,01/01/2020,10/16/2021,AK,Dillingham Census Area,2070,Noncore,,45.0,One or more data cells have counts between 1-9...


In [10]:
df_new.dtypes

Date-as-of           object
Start_date           object
End_date             object
State                object
county               object
FIPS_code             int64
Urban-Rural_code     object
Deaths_COVID        float64
Deaths_All          float64
Footnote             object
dtype: object

In [11]:
df_new['Date-as-of']=pd.to_datetime(df_new['Date-as-of'])

In [12]:
df_new.head()

Unnamed: 0,Date-as-of,Start_date,End_date,State,county,FIPS_code,Urban-Rural_code,Deaths_COVID,Deaths_All,Footnote
0,2021-10-20,01/01/2020,10/16/2021,AK,Aleutians East Borough,2013,Noncore,,14.0,One or more data cells have counts between 1-9...
1,2021-10-20,01/01/2020,10/16/2021,AK,Anchorage Municipality,2020,Medium metro,366.0,4133.0,
2,2021-10-20,01/01/2020,10/16/2021,AK,Bethel Census Area,2050,Noncore,18.0,194.0,
3,2021-10-20,01/01/2020,10/16/2021,AK,Denali Borough,2068,Noncore,,11.0,One or more data cells have counts between 1-9...
4,2021-10-20,01/01/2020,10/16/2021,AK,Dillingham Census Area,2070,Noncore,,45.0,One or more data cells have counts between 1-9...


In [13]:
df_new.dtypes

Date-as-of          datetime64[ns]
Start_date                  object
End_date                    object
State                       object
county                      object
FIPS_code                    int64
Urban-Rural_code            object
Deaths_COVID               float64
Deaths_All                 float64
Footnote                    object
dtype: object

In [14]:
df_new['Start_date']=pd.to_datetime(df_new['Start_date'])

In [15]:
df_new['End_date']=pd.to_datetime(df_new['End_date'])

In [16]:
df_new.head()

Unnamed: 0,Date-as-of,Start_date,End_date,State,county,FIPS_code,Urban-Rural_code,Deaths_COVID,Deaths_All,Footnote
0,2021-10-20,2020-01-01,2021-10-16,AK,Aleutians East Borough,2013,Noncore,,14.0,One or more data cells have counts between 1-9...
1,2021-10-20,2020-01-01,2021-10-16,AK,Anchorage Municipality,2020,Medium metro,366.0,4133.0,
2,2021-10-20,2020-01-01,2021-10-16,AK,Bethel Census Area,2050,Noncore,18.0,194.0,
3,2021-10-20,2020-01-01,2021-10-16,AK,Denali Borough,2068,Noncore,,11.0,One or more data cells have counts between 1-9...
4,2021-10-20,2020-01-01,2021-10-16,AK,Dillingham Census Area,2070,Noncore,,45.0,One or more data cells have counts between 1-9...


In [17]:
df_new.dtypes

Date-as-of          datetime64[ns]
Start_date          datetime64[ns]
End_date            datetime64[ns]
State                       object
county                      object
FIPS_code                    int64
Urban-Rural_code            object
Deaths_COVID               float64
Deaths_All                 float64
Footnote                    object
dtype: object

In [27]:
# check spelling of county names 
# FREQUENCEY = the frequency of the name across all states (ex/ 30 states have a "Washington County")


df_new['county'].value_counts()

Washington County    30
Jefferson County     25
Franklin County      23
Lincoln County       22
Jackson County       22
                     ..
Hampden County        1
Dukes County          1
Berkshire County      1
Barnstable County     1
Weston County         1
Name: county, Length: 1811, dtype: int64

In [28]:
#Isolate unique county names into its own dataframe:

df_new.county.unique()

array(['Aleutians East Borough', 'Anchorage Municipality',
       'Bethel Census Area', ..., 'Uinta County', 'Washakie County',
       'Weston County'], dtype=object)

In [20]:
county_unique = df_new.County_name.unique()

AttributeError: 'DataFrame' object has no attribute 'County_name'

In [21]:
county_unique

NameError: name 'county_unique' is not defined

In [22]:
#Isolate unique county names into its own dataframe, take two:
df_counties = pd. DataFrame(county_unique, columns=['County-name'])

NameError: name 'county_unique' is not defined

In [23]:
# MAX Deaths (COVID)
df_new['Deaths_COVID'].max()



24768.0

In [24]:
# MAX Deaths (ALL)
df_new['Deaths_All'].max()

146639.0

In [25]:
# Confirming all measures ended on the same date, 10-16-2021 (Confirmed)

df_new['End_date'].value_counts()

2021-10-16    3030
Name: End_date, dtype: int64

In [26]:
# SLICE - OREGON Data
oregon_data_df=df_new.loc[df_new["State"]=="OR"]
oregon_data_df

Unnamed: 0,Date-as-of,Start_date,End_date,State,county,FIPS_code,Urban-Rural_code,Deaths_COVID,Deaths_All,Footnote
2128,2021-10-20,2020-01-01,2021-10-16,OR,Baker County,41001,Noncore,,390.0,One or more data cells have counts between 1-9...
2129,2021-10-20,2020-01-01,2021-10-16,OR,Benton County,41003,Small metro,69.0,1300.0,
2130,2021-10-20,2020-01-01,2021-10-16,OR,Clackamas County,41005,Large fringe metro,321.0,6544.0,
2131,2021-10-20,2020-01-01,2021-10-16,OR,Clatsop County,41007,Micropolitan,20.0,724.0,
2132,2021-10-20,2020-01-01,2021-10-16,OR,Columbia County,41009,Large fringe metro,15.0,507.0,
2133,2021-10-20,2020-01-01,2021-10-16,OR,Coos County,41011,Micropolitan,55.0,1603.0,
2134,2021-10-20,2020-01-01,2021-10-16,OR,Crook County,41013,Micropolitan,16.0,401.0,
2135,2021-10-20,2020-01-01,2021-10-16,OR,Curry County,41015,Micropolitan,18.0,667.0,
2136,2021-10-20,2020-01-01,2021-10-16,OR,Deschutes County,41017,Small metro,244.0,3328.0,
2137,2021-10-20,2020-01-01,2021-10-16,OR,Douglas County,41019,Micropolitan,183.0,2924.0,


In [30]:
oregon_data_df['new column']=(oregon_data_df['Deaths_COVID']/oregon_data_df['Deaths_All'])*100

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  oregon_data_df['new column']=(oregon_data_df['Deaths_COVID']/oregon_data_df['Deaths_All'])*100


In [31]:
df_new2 = df_new

In [34]:
df_new2['%Deaths_Covid']=(df_new2['Deaths_COVID']/df_new2['Deaths_All'])*100

In [35]:
df_new2.head()

Unnamed: 0,Date-as-of,Start_date,End_date,State,county,FIPS_code,Urban-Rural_code,Deaths_COVID,Deaths_All,Footnote,new column,%Deaths_Covid
0,2021-10-20,2020-01-01,2021-10-16,AK,Aleutians East Borough,2013,Noncore,,14.0,One or more data cells have counts between 1-9...,,
1,2021-10-20,2020-01-01,2021-10-16,AK,Anchorage Municipality,2020,Medium metro,366.0,4133.0,,8.855553,8.855553
2,2021-10-20,2020-01-01,2021-10-16,AK,Bethel Census Area,2050,Noncore,18.0,194.0,,9.278351,9.278351
3,2021-10-20,2020-01-01,2021-10-16,AK,Denali Borough,2068,Noncore,,11.0,One or more data cells have counts between 1-9...,,
4,2021-10-20,2020-01-01,2021-10-16,AK,Dillingham Census Area,2070,Noncore,,45.0,One or more data cells have counts between 1-9...,,


In [47]:
df_new2=df_new2.drop(columns=['new column'])

KeyError: "['new column'] not found in axis"

In [48]:
df_new2.head()

Unnamed: 0,Date-as-of,Start_date,End_date,State,county,FIPS_code,Urban-Rural_code,Deaths_COVID,Deaths_All,Footnote,%Deaths_Covid
0,2021-10-20,2020-01-01,2021-10-16,AK,Aleutians East Borough,2013,Noncore,,14.0,One or more data cells have counts between 1-9...,
1,2021-10-20,2020-01-01,2021-10-16,AK,Anchorage Municipality,2020,Medium metro,366.0,4133.0,,8.855553
2,2021-10-20,2020-01-01,2021-10-16,AK,Bethel Census Area,2050,Noncore,18.0,194.0,,9.278351
3,2021-10-20,2020-01-01,2021-10-16,AK,Denali Borough,2068,Noncore,,11.0,One or more data cells have counts between 1-9...,
4,2021-10-20,2020-01-01,2021-10-16,AK,Dillingham Census Area,2070,Noncore,,45.0,One or more data cells have counts between 1-9...,


In [51]:
# SLICE - OREGON Data take two
oregon_data_2=df_new2.loc[df_new["State"]=="OR"]
oregon_data_2

Unnamed: 0,Date-as-of,Start_date,End_date,State,county,FIPS_code,Urban-Rural_code,Deaths_COVID,Deaths_All,Footnote,%Deaths_Covid
2128,2021-10-20,2020-01-01,2021-10-16,OR,Baker County,41001,Noncore,,390.0,One or more data cells have counts between 1-9...,
2129,2021-10-20,2020-01-01,2021-10-16,OR,Benton County,41003,Small metro,69.0,1300.0,,5.307692
2130,2021-10-20,2020-01-01,2021-10-16,OR,Clackamas County,41005,Large fringe metro,321.0,6544.0,,4.905257
2131,2021-10-20,2020-01-01,2021-10-16,OR,Clatsop County,41007,Micropolitan,20.0,724.0,,2.762431
2132,2021-10-20,2020-01-01,2021-10-16,OR,Columbia County,41009,Large fringe metro,15.0,507.0,,2.95858
2133,2021-10-20,2020-01-01,2021-10-16,OR,Coos County,41011,Micropolitan,55.0,1603.0,,3.431067
2134,2021-10-20,2020-01-01,2021-10-16,OR,Crook County,41013,Micropolitan,16.0,401.0,,3.990025
2135,2021-10-20,2020-01-01,2021-10-16,OR,Curry County,41015,Micropolitan,18.0,667.0,,2.698651
2136,2021-10-20,2020-01-01,2021-10-16,OR,Deschutes County,41017,Small metro,244.0,3328.0,,7.331731
2137,2021-10-20,2020-01-01,2021-10-16,OR,Douglas County,41019,Micropolitan,183.0,2924.0,,6.25855
