# COVID Cases in the US

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import datetime
from bokeh.plotting import figure, show

In [None]:
# uploading cases from the USA data
start_date = datetime.date(2021,1,1)
end_date = datetime.date(2021,4,14)

dates = pd.date_range(start_date,end_date-datetime.timedelta(days=1),freq='d')
dates_str = dates.strftime("%m-%d-%Y")

In [None]:
# Collect list of files for all the selected dates

file_list = [ ('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/'
            + date
            + '.csv') for date in dates_str ]

In [None]:
%%time

# List of DataFrames - each one covers one day of global data
df_list = [ pd.read_csv(file) for file in file_list ]

Wall time: 46.2 s


In [None]:
# DataFrame that contains all the files' data 

super_cases = pd.concat(df_list)
super_cases

Unnamed: 0,FIPS,Admin2,Province_State,Country_Region,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,Active,Combined_Key,Incident_Rate,Case_Fatality_Ratio
0,,,,Afghanistan,2021-01-02 05:22:33,33.939110,67.709953,51526,2191,41727.0,0.0,Afghanistan,0.000000,4.252222
1,,,,Albania,2021-01-02 05:22:33,41.153300,20.168300,58316,1181,33634.0,23501.0,Albania,2026.409062,2.025173
2,,,,Algeria,2021-01-02 05:22:33,28.033900,1.659600,99897,2762,67395.0,29740.0,Algeria,227.809861,2.764848
3,,,,Andorra,2021-01-02 05:22:33,42.506300,1.521800,8117,84,7463.0,570.0,Andorra,10505.403482,1.034865
4,,,,Angola,2021-01-02 05:22:33,-11.202700,17.873900,17568,405,11146.0,6017.0,Angola,53.452981,2.305328
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3978,,,,Vietnam,2021-04-14 04:20:41,14.058324,108.277199,2714,35,2445.0,234.0,Vietnam,2.788206,1.289609
3979,,,,West Bank and Gaza,2021-04-14 04:20:41,31.952200,35.233200,272767,2901,237046.0,32820.0,West Bank and Gaza,5346.888001,1.063545
3980,,,,Yemen,2021-04-14 04:20:41,15.552727,48.516388,5507,1073,2070.0,2364.0,Yemen,18.463776,19.484293
3981,,,,Zambia,2021-04-14 04:20:41,-13.133897,27.849332,90218,1229,88144.0,845.0,Zambia,490.743124,1.362256


In [None]:
# Data from only US counties (counties stored in Admin2)

US_cases = super_cases[super_cases['Country_Region'] == 'US']
US_cases

Unnamed: 0,FIPS,Admin2,Province_State,Country_Region,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,Active,Combined_Key,Incident_Rate,Case_Fatality_Ratio
649,1001.0,Autauga,Alabama,US,2021-01-02 05:22:33,32.539527,-86.644082,4239,50,0.0,4189.0,"Autauga, Alabama, US",7587.391935,1.179523
650,1003.0,Baldwin,Alabama,US,2021-01-02 05:22:33,30.727750,-87.722071,13823,169,0.0,13654.0,"Baldwin, Alabama, US",6192.157109,1.2226
651,1005.0,Barbour,Alabama,US,2021-01-02 05:22:33,31.868263,-85.387129,1517,33,0.0,1484.0,"Barbour, Alabama, US",6145.183505,2.175346
652,1007.0,Bibb,Alabama,US,2021-01-02 05:22:33,32.996421,-87.125115,1854,46,0.0,1808.0,"Bibb, Alabama, US",8279.003304,2.481122
653,1009.0,Blount,Alabama,US,2021-01-02 05:22:33,33.982109,-86.567906,4693,63,0.0,4630.0,"Blount, Alabama, US",8115.726490,1.342425
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3924,56039.0,Teton,Wyoming,US,2021-04-14 04:20:41,43.935225,-110.589080,3675,9,,,"Teton, Wyoming, US",15662.291169,0.244898
3925,56041.0,Uinta,Wyoming,US,2021-04-14 04:20:41,41.287818,-110.547578,2153,12,,,"Uinta, Wyoming, US",10644.714724,0.557362
3926,90056.0,Unassigned,Wyoming,US,2021-04-14 04:20:41,,,0,0,,,"Unassigned, Wyoming, US",,
3927,56043.0,Washakie,Wyoming,US,2021-04-14 04:20:41,43.904516,-107.680187,898,26,,,"Washakie, Wyoming, US",11505.445227,2.895323


In [None]:
# Latest Date for first cell's Last Update
US_cases.iloc[0].loc['Last_Update']

'2021-01-02 05:22:33'

In [None]:
%%time
states2 = US_cases['Province_State'].to_list()
states2 = list(set(states2))
states2.sort()

Wall time: 11.9 ms


In [None]:
states2

['Alabama',
 'Alaska',
 'Arizona',
 'Arkansas',
 'California',
 'Colorado',
 'Connecticut',
 'Delaware',
 'Diamond Princess',
 'District of Columbia',
 'Florida',
 'Georgia',
 'Grand Princess',
 'Guam',
 'Hawaii',
 'Idaho',
 'Illinois',
 'Indiana',
 'Iowa',
 'Kansas',
 'Kentucky',
 'Louisiana',
 'Maine',
 'Maryland',
 'Massachusetts',
 'Michigan',
 'Minnesota',
 'Mississippi',
 'Missouri',
 'Montana',
 'Nebraska',
 'Nevada',
 'New Hampshire',
 'New Jersey',
 'New Mexico',
 'New York',
 'North Carolina',
 'North Dakota',
 'Northern Mariana Islands',
 'Ohio',
 'Oklahoma',
 'Oregon',
 'Pennsylvania',
 'Puerto Rico',
 'Recovered',
 'Rhode Island',
 'South Carolina',
 'South Dakota',
 'Tennessee',
 'Texas',
 'Utah',
 'Vermont',
 'Virgin Islands',
 'Virginia',
 'Washington',
 'West Virginia',
 'Wisconsin',
 'Wyoming']

In [None]:
%%time
US_cases.loc[:, 'DateTime'] = pd.to_datetime(US_cases.loc[:, 'Last_Update'], format='%Y-%m-%d')
US_cases[['year','month','day']] = US_cases.DateTime.apply(lambda x: pd.Series(x.strftime("%Y,%m,%d").split(",")))#.astype(int)

Wall time: 1min 37s


In [None]:
# US_cases['MonthYear'] = US_cases['month'].concat(US_cases['year'], sep=',')
# US_cases
# cols=[]
#US_cases["MonthYear"] = US_cases["month"].str.cat(US_cases[["year"]].astype(float), sep=".")
#df['MonthYear'] = df[cols].apply(lambda row: '_'.join(row.values.astype(str)), axis=1)
# US_cases['MonthYear'] = US_cases.month.str.cat([US_cases.year.str])
# US_cases['MonthYear']= US_cases['MonthYear'].astype(int)
US_cases['YearMonthDay'] = US_cases.year.astype(str) + US_cases.month.astype(str) + US_cases.day.astype(str)
US_cases



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,FIPS,Admin2,Province_State,Country_Region,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,...,Combined_Key,Incident_Rate,Case_Fatality_Ratio,DateTime,year,month,day,MonthYear,MonthDayYear,YearMonthDay
649,1001.0,Autauga,Alabama,US,2021-01-02 05:22:33,32.539527,-86.644082,4239,50,0.0,...,"Autauga, Alabama, US",7587.391935,1.179523,2021-01-02 05:22:33,2021,01,02,12021,01022021,20210102
650,1003.0,Baldwin,Alabama,US,2021-01-02 05:22:33,30.727750,-87.722071,13823,169,0.0,...,"Baldwin, Alabama, US",6192.157109,1.2226,2021-01-02 05:22:33,2021,01,02,12021,01022021,20210102
651,1005.0,Barbour,Alabama,US,2021-01-02 05:22:33,31.868263,-85.387129,1517,33,0.0,...,"Barbour, Alabama, US",6145.183505,2.175346,2021-01-02 05:22:33,2021,01,02,12021,01022021,20210102
652,1007.0,Bibb,Alabama,US,2021-01-02 05:22:33,32.996421,-87.125115,1854,46,0.0,...,"Bibb, Alabama, US",8279.003304,2.481122,2021-01-02 05:22:33,2021,01,02,12021,01022021,20210102
653,1009.0,Blount,Alabama,US,2021-01-02 05:22:33,33.982109,-86.567906,4693,63,0.0,...,"Blount, Alabama, US",8115.726490,1.342425,2021-01-02 05:22:33,2021,01,02,12021,01022021,20210102
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3924,56039.0,Teton,Wyoming,US,2021-04-14 04:20:41,43.935225,-110.589080,3675,9,,...,"Teton, Wyoming, US",15662.291169,0.244898,2021-04-14 04:20:41,2021,04,14,42021,04142021,20210414
3925,56041.0,Uinta,Wyoming,US,2021-04-14 04:20:41,41.287818,-110.547578,2153,12,,...,"Uinta, Wyoming, US",10644.714724,0.557362,2021-04-14 04:20:41,2021,04,14,42021,04142021,20210414
3926,90056.0,Unassigned,Wyoming,US,2021-04-14 04:20:41,,,0,0,,...,"Unassigned, Wyoming, US",,,2021-04-14 04:20:41,2021,04,14,42021,04142021,20210414
3927,56043.0,Washakie,Wyoming,US,2021-04-14 04:20:41,43.904516,-107.680187,898,26,,...,"Washakie, Wyoming, US",11505.445227,2.895323,2021-04-14 04:20:41,2021,04,14,42021,04142021,20210414


In [None]:
US_cases_grouped = US_cases.groupby('YearMonthDay').agg({'Confirmed': np.sum, 'Deaths': np.sum,'YearMonthDay':'first'})

In [None]:
X = US_cases_grouped['YearMonthDay']
y = US_cases_grouped.loc[:,'Confirmed']

p = figure(title='USA Cases', x_axis_label='Time', y_axis_label='Cases')
p.line(X,y,legend_label='USA')
show(p)

In [None]:
US_cases_grouped

Unnamed: 0_level_0,Confirmed,Deaths,YearMonthDay
YearMonthDay,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
20200804,15656,309,20200804
20200807,0,0,20200807
20201221,74580,256,20201221
20201229,9144,72,20201229
20210102,20249514,354247,20210102
...,...,...,...
20210410,31084810,561465,20210410
20210411,31151345,562178,20210411
20210412,31197725,562474,20210412
20210413,31267955,562947,20210413


In [None]:
# hoverable plot
import plotly.express as px

fig = px.line(US_cases_grouped, x="YearMonthDay", y="Confirmed", title="layout.hovermode='closest' (the default)")
fig.update_traces(mode="markers+lines")
#, color="country"
fig.show()

## COVID cases by states

In [None]:
state1 = input('What state?')

What state?Texas


In [None]:
state2=input('What state?')

What state?Ohio


In [None]:
index1 = (US_cases['Province_State']==state1).index
index2 = (US_cases['Province_State']==state2).index

In [None]:
state_cases1 = US_cases[(US_cases['Province_State']==state1)]
state_cases1

Unnamed: 0,FIPS,Admin2,Province_State,Country_Region,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,...,Combined_Key,Incident_Rate,Case_Fatality_Ratio,DateTime,year,month,day,MonthYear,MonthDayYear,YearMonthDay
3311,48001.0,Anderson,Texas,US,2021-01-02 05:22:33,31.815347,-95.653548,4814,65,0.0,...,"Anderson, Texas, US",8338.096475,1.350229,2021-01-02 05:22:33,2021,01,02,12021,01022021,20210102
3312,48003.0,Andrews,Texas,US,2021-01-02 05:22:33,32.304686,-102.637655,1433,25,0.0,...,"Andrews, Texas, US",7661.053194,1.744592,2021-01-02 05:22:33,2021,01,02,12021,01022021,20210102
3313,48005.0,Angelina,Texas,US,2021-01-02 05:22:33,31.254573,-94.609015,5696,149,0.0,...,"Angelina, Texas, US",6568.644410,2.615871,2021-01-02 05:22:33,2021,01,02,12021,01022021,20210102
3314,48007.0,Aransas,Texas,US,2021-01-02 05:22:33,28.105562,-96.999505,737,23,0.0,...,"Aransas, Texas, US",3134.836240,3.12076,2021-01-02 05:22:33,2021,01,02,12021,01022021,20210102
3315,48009.0,Archer,Texas,US,2021-01-02 05:22:33,33.615700,-98.687546,562,6,0.0,...,"Archer, Texas, US",6570.793873,1.067616,2021-01-02 05:22:33,2021,01,02,12021,01022021,20210102
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3567,48499.0,Wood,Texas,US,2021-04-14 04:20:41,32.787224,-95.382364,3368,125,,...,"Wood, Texas, US",7395.858495,3.711401,2021-04-14 04:20:41,2021,04,14,42021,04142021,20210414
3568,48501.0,Yoakum,Texas,US,2021-04-14 04:20:41,33.173202,-102.827643,868,27,,...,"Yoakum, Texas, US",9962.125560,3.110599,2021-04-14 04:20:41,2021,04,14,42021,04142021,20210414
3569,48503.0,Young,Texas,US,2021-04-14 04:20:41,33.176597,-98.687909,2141,44,,...,"Young, Texas, US",11887.840089,2.055114,2021-04-14 04:20:41,2021,04,14,42021,04142021,20210414
3570,48505.0,Zapata,Texas,US,2021-04-14 04:20:41,27.001564,-99.169872,1762,34,,...,"Zapata, Texas, US",12426.828408,1.929625,2021-04-14 04:20:41,2021,04,14,42021,04142021,20210414


In [None]:
state_cases2= US_cases[(US_cases['Province_State']==state2)]
state_cases2

Unnamed: 0,FIPS,Admin2,Province_State,Country_Region,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,...,Combined_Key,Incident_Rate,Case_Fatality_Ratio,DateTime,year,month,day,MonthYear,MonthDayYear,YearMonthDay
2740,39001.0,Adams,Ohio,US,2021-01-02 05:22:33,38.845411,-83.471896,1367,28,0.0,...,"Adams, Ohio, US",4935.374395,1.02414,2021-01-02 05:22:33,2021,01,02,12021,01022021,20210102
2741,39003.0,Allen,Ohio,US,2021-01-02 05:22:33,40.772852,-84.108023,8308,179,0.0,...,"Allen, Ohio, US",8117.165440,1.336062,2021-01-02 05:22:33,2021,01,02,12021,01022021,20210102
2742,39005.0,Ashland,Ohio,US,2021-01-02 05:22:33,40.847723,-82.272808,2729,71,0.0,...,"Ashland, Ohio, US",5102.460549,1.355808,2021-01-02 05:22:33,2021,01,02,12021,01022021,20210102
2743,39007.0,Ashtabula,Ohio,US,2021-01-02 05:22:33,41.708603,-80.748302,3829,127,0.0,...,"Ashtabula, Ohio, US",3937.639473,1.723688,2021-01-02 05:22:33,2021,01,02,12021,01022021,20210102
2744,39009.0,Athens,Ohio,US,2021-01-02 05:22:33,39.334256,-82.042786,3101,25,0.0,...,"Athens, Ohio, US",4746.888729,0.290229,2021-01-02 05:22:33,2021,01,02,12021,01022021,20210102
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2830,39167.0,Washington,Ohio,US,2021-04-14 04:20:41,39.456906,-81.491214,5053,108,,...,"Washington, Ohio, US",8434.177363,2.117554,2021-04-14 04:20:41,2021,04,14,42021,04142021,20210414
2831,39169.0,Wayne,Ohio,US,2021-04-14 04:20:41,40.829259,-81.888448,8506,210,,...,"Wayne, Ohio, US",7351.136462,2.457089,2021-04-14 04:20:41,2021,04,14,42021,04142021,20210414
2832,39171.0,Williams,Ohio,US,2021-04-14 04:20:41,41.560520,-84.584296,3205,74,,...,"Williams, Ohio, US",8734.874087,2.308892,2021-04-14 04:20:41,2021,04,14,42021,04142021,20210414
2833,39173.0,Wood,Ohio,US,2021-04-14 04:20:41,41.362248,-83.622851,12626,187,,...,"Wood, Ohio, US",9651.650779,1.45731,2021-04-14 04:20:41,2021,04,14,42021,04142021,20210414


In [None]:
#state_cases
state_cases1_grouped = state_cases1.groupby('YearMonthDay').agg({'Confirmed': np.sum, 'Deaths': np.sum,'YearMonthDay':'first'})
#state_cases
state_cases2_grouped = state_cases2.groupby('YearMonthDay').agg({'Confirmed': np.sum, 'Deaths': np.sum,'YearMonthDay':'first'})

In [None]:
state_cases_grouped

Unnamed: 0_level_0,Confirmed,Deaths,YearMonthDay
YearMonthDay,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
20210102,1782956,28359,20210102
20210103,1801181,28442,20210103
20210104,1816886,28496,20210104
20210105,1831146,28570,20210105
20210106,1864173,28808,20210106
...,...,...,...
20210410,2822889,49121,20210410
20210411,2825551,49188,20210411
20210412,2826977,49212,20210412
20210413,2831972,49231,20210413


In [None]:
# hoverable plot
import plotly.express as px

fig = px.line(state_cases1_grouped, x="YearMonthDay", y="Confirmed", title="layout.hovermode='closest' (the default)")
fig = px.line(state_cases2_grouped, x="YearMonthDay", y="Confirmed", title="layout.hovermode='closest' (the default)")

fig.update_traces(mode="markers+lines")
#, color="country"
fig.show()

In [None]:
import plotly.graph_objects as go
import numpy as np
#t = np.linspace(0, 2 * np.pi, 100)
fig = go.Figure()
fig.add_trace(go.Scatter(x="YearMonthDay", y='Confirmed')
#fig.add_trace(go.Scatter(x=t, y=np.cos(t), name='cost(t)'))
fig.update_layout(hovermode='x unified')
fig.show()

SyntaxError: invalid syntax (<ipython-input-147-f69a05e92f52>, line 7)