# COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at John Hopkins University


This is the data repository for the 2019 Novel Coronavirus Visual Dashboard operated by the Johns Hopkins University Center for Systems Science and Engineering (JHU CSSE). Also, Supported by ESRI Living Atlas Team and the Johns Hopkins University Applied Physics Lab (JHU APL).

<b> Data Source </b>

https://github.com/CSSEGISandData/COVID-19 <br>
https://github.com/imdevskp/covid_19_jhu_data_web_scrap_and_cleaning 

# Import Libraries


In [1]:
# for date and time operations
from datetime import datetime, timedelta
# for file and folder operations
import os
# for getting web contents
import requests 
# storing and analysing data
import pandas as pd
# numerical analysis
import numpy as np

# Download USA specific data
>Two time series tables are for the US confirmed cases and deaths, reported at the county level. They are named time_series_covid19_confirmed_US.csv, time_series_covid19_deaths_US.csv, respectively.

In [17]:
# download data
# =============

# urls of the files
urls = ['https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv', 
        'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_US.csv']

# directory for data
path = 'COVID Data/'

try:
    os.mkdir(path)
except OSError:
    print ("Creation of the directory %s failed" % path)
else:
    print ("Successfully created the directory %s " % path)
        
# download files to path (Files will download to local system)
print('Beginning file download with requests')
for url in urls:
    r = requests.get(url)
    with open(path+url.split('/')[-1], 'wb') as f:
        f.write(r.content)
    # Retrieve HTTP meta-data (200 = request successful)
    print(r.status_code)
    print(r.headers['content-type'])
    print(r.encoding)

Beginning file download with requests
Successfully created the directory COVID Data/ 
200
text/plain; charset=utf-8
utf-8
Creation of the directory COVID Data/ failed
200
text/plain; charset=utf-8
utf-8


In [38]:
# read data into dataframes
usa_confirmed_df = pd.read_csv(path+'time_series_covid19_confirmed_US.csv')
usa_deaths_df = pd.read_csv(path+'time_series_covid19_deaths_US.csv')

In [37]:
usa_confirmed_df.head()

Unnamed: 0,UID,iso2,iso3,code3,FIPS,Admin2,Province_State,Country_Region,Lat,Long_,...,6/18/20,6/19/20,6/20/20,6/21/20,6/22/20,6/23/20,6/24/20,6/25/20,6/26/20,6/27/20
0,16,AS,ASM,16,60.0,,American Samoa,US,-14.271,-170.132,...,0,0,0,0,0,0,0,0,0,0
1,316,GU,GUM,316,66.0,,Guam,US,13.4443,144.7937,...,192,200,222,222,222,225,226,231,247,247
2,580,MP,MNP,580,69.0,,Northern Mariana Islands,US,15.0979,145.6739,...,30,30,30,30,30,30,30,30,30,30
3,630,PR,PRI,630,72.0,,Puerto Rico,US,18.2208,-66.5901,...,6111,6195,6463,6525,6564,6685,6820,6877,6922,7066
4,850,VI,VIR,850,78.0,,Virgin Islands,US,18.3358,-64.8963,...,73,73,73,76,76,76,76,80,81,81


In [36]:
usa_deaths_df.head()

Unnamed: 0,UID,iso2,iso3,code3,FIPS,Admin2,Province_State,Country_Region,Lat,Long_,...,6/18/20,6/19/20,6/20/20,6/21/20,6/22/20,6/23/20,6/24/20,6/25/20,6/26/20,6/27/20
0,16,AS,ASM,16,60.0,,American Samoa,US,-14.271,-170.132,...,0,0,0,0,0,0,0,0,0,0
1,316,GU,GUM,316,66.0,,Guam,US,13.4443,144.7937,...,5,5,5,5,5,5,5,5,5,5
2,580,MP,MNP,580,69.0,,Northern Mariana Islands,US,15.0979,145.6739,...,2,2,2,2,2,2,2,2,2,2
3,630,PR,PRI,630,72.0,,Puerto Rico,US,18.2208,-66.5901,...,147,147,147,149,149,149,149,151,151,152
4,850,VI,VIR,850,78.0,,Virgin Islands,US,18.3358,-64.8963,...,6,6,6,6,6,6,6,6,6,6


# Tidy Data

- Confirmed and Deaths dataframs can be merged into a single data frame.  
- The dates can be pivoted into a single column (melt into long format)

In [30]:
# both data frames contain at least 600 rows and have many columns because of the trending dates
print(usa_confirmed_df.info())
print(usa_deaths_df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3261 entries, 0 to 3260
Columns: 169 entries, UID to 6/27/20
dtypes: float64(3), int64(160), object(6)
memory usage: 4.2+ MB
None
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3261 entries, 0 to 3260
Columns: 170 entries, UID to 6/27/20
dtypes: float64(3), int64(161), object(6)
memory usage: 4.2+ MB
None


In [32]:
# ids
ids = usa_confirmed_df.columns[0:11]
# dates
us_dates = usa_confirmed_df.columns[11:]

# melt to longer format
usa_confirmed_df_long = usa_confirmed_df.melt(id_vars=ids, value_vars=us_dates, var_name='Date', value_name='Confirmed')
usa_deaths_df_long = usa_deaths_df.melt(id_vars=ids, value_vars=us_dates, var_name='Date', value_name='Deaths')

In [43]:
# first few rows
usa_confirmed_df_long.head()

Unnamed: 0,UID,iso2,iso3,code3,FIPS,Admin2,Province_State,Country_Region,Lat,Long_,Combined_Key,Date,Confirmed
0,16,AS,ASM,16,60.0,,American Samoa,US,-14.271,-170.132,"American Samoa, US",1/22/20,0
1,316,GU,GUM,316,66.0,,Guam,US,13.4443,144.7937,"Guam, US",1/22/20,0
2,580,MP,MNP,580,69.0,,Northern Mariana Islands,US,15.0979,145.6739,"Northern Mariana Islands, US",1/22/20,0
3,630,PR,PRI,630,72.0,,Puerto Rico,US,18.2208,-66.5901,"Puerto Rico, US",1/22/20,0
4,850,VI,VIR,850,78.0,,Virgin Islands,US,18.3358,-64.8963,"Virgin Islands, US",1/22/20,0


In [39]:
# first few rows
usa_deaths_df_long.head()

Unnamed: 0,UID,iso2,iso3,code3,FIPS,Admin2,Province_State,Country_Region,Lat,Long_,Combined_Key,Date,Deaths
0,16,AS,ASM,16,60.0,,American Samoa,US,-14.271,-170.132,"American Samoa, US",1/22/20,0
1,316,GU,GUM,316,66.0,,Guam,US,13.4443,144.7937,"Guam, US",1/22/20,0
2,580,MP,MNP,580,69.0,,Northern Mariana Islands,US,15.0979,145.6739,"Northern Mariana Islands, US",1/22/20,0
3,630,PR,PRI,630,72.0,,Puerto Rico,US,18.2208,-66.5901,"Puerto Rico, US",1/22/20,0
4,850,VI,VIR,850,78.0,,Virgin Islands,US,18.3358,-64.8963,"Virgin Islands, US",1/22/20,0


In [44]:
print(usa_confirmed_df_long.info())
print(usa_deaths_df_long.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 515238 entries, 0 to 515237
Data columns (total 13 columns):
 #   Column          Non-Null Count   Dtype  
---  ------          --------------   -----  
 0   UID             515238 non-null  int64  
 1   iso2            515238 non-null  object 
 2   iso3            515238 non-null  object 
 3   code3           515238 non-null  int64  
 4   FIPS            513658 non-null  float64
 5   Admin2          514132 non-null  object 
 6   Province_State  515238 non-null  object 
 7   Country_Region  515238 non-null  object 
 8   Lat             515238 non-null  float64
 9   Long_           515238 non-null  float64
 10  Combined_Key    515238 non-null  object 
 11  Date            515238 non-null  object 
 12  Confirmed       515238 non-null  int64  
dtypes: float64(3), int64(3), object(7)
memory usage: 51.1+ MB
None
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 515238 entries, 0 to 515237
Data columns (total 13 columns):
 #   Column         

In [54]:
# Merge dataframes
usa_full_table = usa_confirmed_df_long.merge(usa_deaths_df_long, how='outer',left_on=['UID','Date']
                            ,right_on=['UID','Date']
                            ,suffixes=['','_DROP']).filter(regex='^(?!.*_DROP)')
#first few rows
usa_full_table.head()

Unnamed: 0,UID,iso2,iso3,code3,FIPS,Admin2,Province_State,Country_Region,Lat,Long_,Combined_Key,Date,Confirmed,Deaths
0,16,AS,ASM,16,60.0,,American Samoa,US,-14.271,-170.132,"American Samoa, US",1/22/20,0,0
1,316,GU,GUM,316,66.0,,Guam,US,13.4443,144.7937,"Guam, US",1/22/20,0,0
2,580,MP,MNP,580,69.0,,Northern Mariana Islands,US,15.0979,145.6739,"Northern Mariana Islands, US",1/22/20,0,0
3,630,PR,PRI,630,72.0,,Puerto Rico,US,18.2208,-66.5901,"Puerto Rico, US",1/22/20,0,0
4,850,VI,VIR,850,78.0,,Virgin Islands,US,18.3358,-64.8963,"Virgin Islands, US",1/22/20,0,0


In [55]:
# save as .csv file
usa_full_table.to_csv(path+'usa_county_wise.csv', index=False)

# Download Global data
> Three time series tables are for the global confirmed cases, recovered cases and deaths. Australia, Canada and China are reported at the province/state level. Dependencies of the Netherlands, the UK, France and Denmark are listed under the province/state level. The US and other countries are at the country level. The tables are renamed time_series_covid19_confirmed_global.csv and time_series_covid19_deaths_global.csv, and time_series_covid19_recovered_global.csv, respectively.

In [56]:
# download data
# =============

# urls of the files
urls = ['https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv', 
        'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv',
        'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv']
# directory for data
path = 'COVID Data/'

try:
    os.mkdir(path)
except OSError:
    print ("Creation of the directory %s failed" % path)
else:
    print ("Successfully created the directory %s " % path)
        
# download files to path (Files will download to local system)
print('Beginning file download with requests')
for url in urls:
    r = requests.get(url)
    with open(path+url.split('/')[-1], 'wb') as f:
        f.write(r.content)
    # Retrieve HTTP meta-data (200 = request successful)
    print(r.status_code)
    print(r.headers['content-type'])
    print(r.encoding)    

Creation of the directory COVID Data/ failed
Beginning file download with requests
200
text/plain; charset=utf-8
utf-8
200
text/plain; charset=utf-8
utf-8
200
text/plain; charset=utf-8
utf-8


In [58]:
# Dataframes
conf_df = pd.read_csv(path+'time_series_covid19_confirmed_global.csv')
deaths_df = pd.read_csv(path+'time_series_covid19_deaths_global.csv')
recv_df = pd.read_csv(path+'time_series_covid19_recovered_global.csv')

# Tidy and Cleanse
1.  Merge Dataframes
2.  Date format needs to be converted
3.  Fill any missing Recovered values with 0
4.  Fix country names
5.  Remove county data to avoid double counting (will not use county data in our analysis)

#### Merge Dataframes

In [60]:
# extract dates
dates = conf_df.columns[4:]

# melt dataframes into long format
conf_df_long = conf_df.melt(id_vars=['Province/State', 'Country/Region', 'Lat', 'Long'], 
                            value_vars=dates, var_name='Date', value_name='Confirmed')

deaths_df_long = deaths_df.melt(id_vars=['Province/State', 'Country/Region', 'Lat', 'Long'], 
                            value_vars=dates, var_name='Date', value_name='Deaths')

recv_df_long = recv_df.melt(id_vars=['Province/State', 'Country/Region', 'Lat', 'Long'], 
                            value_vars=dates, var_name='Date', value_name='Recovered')

recv_df_long = recv_df_long[recv_df_long['Country/Region']!='Canada']

print(conf_df_long.shape)
print(deaths_df_long.shape)
print(recv_df_long.shape)

(42028, 6)
(42028, 6)
(39816, 6)


Start with the confirmation and deaths dataframes because they have the same number of records. 
They likely have the same number of Dates and unique KEYIDs (state, country, date, lat, long)

In [61]:
# merge dataframes
full_table = pd.merge(left=conf_df_long, right=deaths_df_long, how='left',
                      on=['Province/State', 'Country/Region', 'Date', 'Lat', 'Long'])
full_table = pd.merge(left=full_table, right=recv_df_long, how='left',
                      on=['Province/State', 'Country/Region', 'Date', 'Lat', 'Long'])

full_table.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,Confirmed,Deaths,Recovered
0,,Afghanistan,33.0,65.0,1/22/20,0,0,0.0
1,,Albania,41.1533,20.1683,1/22/20,0,0,0.0
2,,Algeria,28.0339,1.6596,1/22/20,0,0,0.0
3,,Andorra,42.5063,1.5218,1/22/20,0,0,0.0
4,,Angola,-11.2027,17.8739,1/22/20,0,0,0.0


In [62]:
# Convert to proper date format
full_table['Date'] = pd.to_datetime(full_table['Date'])

# fill na with 0
full_table['Recovered'] = full_table['Recovered'].fillna(0)

# convert to int datatype
full_table['Recovered'] = full_table['Recovered'].astype('int')

In [63]:
# fixing Country names
# ====================

# renaming countries, regions, provinces
full_table['Country/Region'] = full_table['Country/Region'].replace('Korea, South', 'South Korea')

# Greenland
full_table.loc[full_table['Province/State']=='Greenland', 'Country/Region'] = 'Greenland'

# Mainland china to China
full_table['Country/Region'] = full_table['Country/Region'].replace('Mainland China', 'China')

In [64]:
# removing
# =======

# removing canada's recovered values
full_table = full_table[full_table['Province/State'].str.contains('Recovered')!=True]

# removing county wise data to avoid double counting
full_table = full_table[full_table['Province/State'].str.contains(',')!=True]

In [65]:
# Active Case = confirmed - deaths - recovered
full_table['Active'] = full_table['Confirmed'] - full_table['Deaths'] - full_table['Recovered']

# filling missing values 
# ======================
# fill missing province/state value with ''
full_table[['Province/State']] = full_table[['Province/State']].fillna('')
# fill missing numerical values with 0
cols = ['Confirmed', 'Deaths', 'Recovered', 'Active']
full_table[cols] = full_table[cols].fillna(0)

# fixing datatypes
full_table['Recovered'] = full_table['Recovered'].astype(int)

# random rows
full_table.sample(6)

Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,Confirmed,Deaths,Recovered,Active
33193,,Thailand,15.0,101.0,2020-05-25,3042,57,2928,57
7854,,Jordan,31.24,36.51,2020-02-20,0,0,0,0
32192,,Argentina,-38.4161,-63.6167,2020-05-22,10649,433,3062,7154
41603,French Guiana,France,3.9339,-53.1258,2020-06-26,3270,11,1166,2093
19390,,Laos,19.85627,102.495496,2020-04-03,10,0,0,10
38505,,Spain,40.0,-4.0,2020-06-14,243928,27136,150376,66416


#### Add WHO Region Field
https://en.wikipedia.org/wiki/WHO_regions

In [68]:
who_region = {}

# African Region AFRO
afro = "Algeria, Angola, Cabo Verde, Eswatini, Sao Tome and Principe, Benin, South Sudan, Western Sahara, Congo (Brazzaville), Congo (Kinshasa), Cote d'Ivoire, Botswana, Burkina Faso, Burundi, Cameroon, Cape Verde, Central African Republic, Chad, Comoros, Ivory Coast, Democratic Republic of the Congo, Equatorial Guinea, Eritrea, Ethiopia, Gabon, Gambia, Ghana, Guinea, Guinea-Bissau, Kenya, Lesotho, Liberia, Madagascar, Malawi, Mali, Mauritania, Mauritius, Mozambique, Namibia, Niger, Nigeria, Republic of the Congo, Rwanda, São Tomé and Príncipe, Senegal, Seychelles, Sierra Leone, Somalia, South Africa, Swaziland, Togo, Uganda, Tanzania, Zambia, Zimbabwe"
afro = [i.strip() for i in afro.split(',')]
for i in afro:
    who_region[i] = 'Africa'
    
# Region of the Americas PAHO
paho = 'Antigua and Barbuda, Argentina, Bahamas, Barbados, Belize, Bolivia, Brazil, Canada, Chile, Colombia, Costa Rica, Cuba, Dominica, Dominican Republic, Ecuador, El Salvador, Grenada, Guatemala, Guyana, Haiti, Honduras, Jamaica, Mexico, Nicaragua, Panama, Paraguay, Peru, Saint Kitts and Nevis, Saint Lucia, Saint Vincent and the Grenadines, Suriname, Trinidad and Tobago, United States, US, Uruguay, Venezuela'
paho = [i.strip() for i in paho.split(',')]
for i in paho:
    who_region[i] = 'Americas'

# South-East Asia Region SEARO
searo = 'Bangladesh, Bhutan, North Korea, India, Indonesia, Maldives, Myanmar, Burma, Nepal, Sri Lanka, Thailand, Timor-Leste'
searo = [i.strip() for i in searo.split(',')]
for i in searo:
    who_region[i] = 'South-East Asia'

# European Region EURO
euro = 'Albania, Andorra, Greenland, Kosovo, Holy See, Liechtenstein, Armenia, Czechia, Austria, Azerbaijan, Belarus, Belgium, Bosnia and Herzegovina, Bulgaria, Croatia, Cyprus, Czech Republic, Denmark, Estonia, Finland, France, Georgia, Germany, Greece, Hungary, Iceland, Ireland, Israel, Italy, Kazakhstan, Kyrgyzstan, Latvia, Lithuania, Luxembourg, Malta, Monaco, Montenegro, Netherlands, North Macedonia, Norway, Poland, Portugal, Moldova, Romania, Russia, San Marino, Serbia, Slovakia, Slovenia, Spain, Sweden, Switzerland, Tajikistan, Turkey, Turkmenistan, Ukraine, United Kingdom, Uzbekistan'
euro = [i.strip() for i in euro.split(',')]
for i in euro:
    who_region[i] = 'Europe'

# Eastern Mediterranean Region EMRO
emro = 'Afghanistan, Bahrain, Djibouti, Egypt, Iran, Iraq, Jordan, Kuwait, Lebanon, Libya, Morocco, Oman, Pakistan, Palestine, West Bank and Gaza, Qatar, Saudi Arabia, Somalia, Sudan, Syria, Tunisia, United Arab Emirates, Yemen'
emro = [i.strip() for i in emro.split(',')]
for i in emro:
    who_region[i] = 'Eastern Mediterranean'

# Western Pacific Region WPRO
wpro = 'Australia, Brunei, Cambodia, China, Cook Islands, Fiji, Japan, Kiribati, Laos, Malaysia, Marshall Islands, Micronesia, Mongolia, Nauru, New Zealand, Niue, Palau, Papua New Guinea, Philippines, South Korea, Samoa, Singapore, Solomon Islands, Taiwan, Taiwan*, Tonga, Tuvalu, Vanuatu, Vietnam'
wpro = [i.strip() for i in wpro.split(',')]
for i in wpro:
    who_region[i] = 'Western Pacific'

In [69]:
# add 'WHO Region' column
full_table['WHO Region'] = full_table['Country/Region'].map(who_region)

# find missing values
full_table[full_table['WHO Region'].isna()]['Country/Region'].unique()

array(['Diamond Princess', 'MS Zaandam'], dtype=object)

In [70]:
# =============

# fixing Country values
full_table.loc[full_table['Province/State']=='Greenland', 'Country/Region'] = 'Greenland'

# Active Case = confirmed - deaths - recovered
full_table['Active'] = full_table['Confirmed'] - full_table['Deaths'] - full_table['Recovered']

# replacing Mainland china with just China
full_table['Country/Region'] = full_table['Country/Region'].replace('Mainland China', 'China')

# filling missing values 
full_table[['Province/State']] = full_table[['Province/State']].fillna('')
full_table[['Confirmed', 'Deaths', 'Recovered', 'Active']] = full_table[['Confirmed', 'Deaths', 'Recovered', 'Active']].fillna(0)

# fixing datatypes
full_table['Recovered'] = full_table['Recovered'].astype(int)

full_table.sample(6)

Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,Confirmed,Deaths,Recovered,Active,WHO Region
3657,,Somalia,5.1521,46.1996,2020-02-04,0,0,0,0,Eastern Mediterranean
3775,Chongqing,China,30.0572,107.874,2020-02-05,389,2,15,372,Western Pacific
9048,,Angola,-11.2027,17.8739,2020-02-25,0,0,0,0,Africa
40471,New Brunswick,Canada,46.5653,-66.4619,2020-06-22,164,2,0,162,Americas
13505,,Sweden,63.0,16.0,2020-03-12,599,1,0,598,Europe
14925,,Brunei,4.5353,114.7277,2020-03-18,68,0,0,68,Western Pacific


In [71]:
# save data
full_table.to_csv(path+'covid_19_clean_complete.csv', index=False)

In [72]:
# we don't have recovered counts for USA (won't be able to merge USA)

usa_full_table[['Province_State','Country_Region','Lat','Long_','Date','Confirmed','Deaths','']]

Unnamed: 0,UID,iso2,iso3,code3,FIPS,Admin2,Province_State,Country_Region,Lat,Long_,Combined_Key,Date,Confirmed,Deaths
0,16,AS,ASM,16,60.0,,American Samoa,US,-14.271000,-170.132000,"American Samoa, US",1/22/20,0,0
1,316,GU,GUM,316,66.0,,Guam,US,13.444300,144.793700,"Guam, US",1/22/20,0,0
2,580,MP,MNP,580,69.0,,Northern Mariana Islands,US,15.097900,145.673900,"Northern Mariana Islands, US",1/22/20,0,0
3,630,PR,PRI,630,72.0,,Puerto Rico,US,18.220800,-66.590100,"Puerto Rico, US",1/22/20,0,0
4,850,VI,VIR,850,78.0,,Virgin Islands,US,18.335800,-64.896300,"Virgin Islands, US",1/22/20,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
515233,84070016,US,USA,840,,Central Utah,Utah,US,39.372319,-111.575868,"Central Utah, Utah, US",6/27/20,134,0
515234,84070017,US,USA,840,,Southeast Utah,Utah,US,38.996171,-110.701396,"Southeast Utah, Utah, US",6/27/20,34,0
515235,84070018,US,USA,840,,Southwest Utah,Utah,US,37.854472,-111.441876,"Southwest Utah, Utah, US",6/27/20,1361,10
515236,84070019,US,USA,840,,TriCounty,Utah,US,40.124915,-109.517442,"TriCounty, Utah, US",6/27/20,46,0


In [1]:
# US is confirmed in the full table to be aggregated at the national level
full_table[full_table['Country/Region']=='US']

NameError: name 'full_table' is not defined

In [None]:

# Grouped by day, country
# =======================

full_grouped = full_table.groupby(['Date', 'Country/Region'])['Confirmed', 'Deaths', 'Recovered', 'Active'].sum().reset_index()

# new cases ======================================================
temp = full_grouped.groupby(['Country/Region', 'Date', ])['Confirmed', 'Deaths', 'Recovered']
temp = temp.sum().diff().reset_index()

mask = temp['Country/Region'] != temp['Country/Region'].shift(1)

temp.loc[mask, 'Confirmed'] = np.nan
temp.loc[mask, 'Deaths'] = np.nan
temp.loc[mask, 'Recovered'] = np.nan

# renaming columns
temp.columns = ['Country/Region', 'Date', 'New cases', 'New deaths', 'New recovered']
# =================================================================

# merging new values
full_grouped = pd.merge(full_grouped, temp, on=['Country/Region', 'Date'])

# filling na with 0
full_grouped = full_grouped.fillna(0)

# fixing data types
cols = ['New cases', 'New deaths', 'New recovered']
full_grouped[cols] = full_grouped[cols].astype('int')

full_grouped['New cases'] = full_grouped['New cases'].apply(lambda x: 0 if x<0 else x)

full_grouped.head()