In [2]:
## System requirements
import pandas as pd
import numpy as np
import json
import urllib.request
import requests

In [3]:
df = pd.read_csv('covid-19-data/public/data/owid-covid-data.csv')

In [4]:
df.head()

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,total_deaths,new_deaths,total_cases_per_million,new_cases_per_million,...,aged_70_older,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy
0,AFG,Asia,Afghanistan,2019-12-31,0.0,0.0,0.0,0.0,0.0,0.0,...,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83
1,AFG,Asia,Afghanistan,2020-01-01,0.0,0.0,0.0,0.0,0.0,0.0,...,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83
2,AFG,Asia,Afghanistan,2020-01-02,0.0,0.0,0.0,0.0,0.0,0.0,...,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83
3,AFG,Asia,Afghanistan,2020-01-03,0.0,0.0,0.0,0.0,0.0,0.0,...,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83
4,AFG,Asia,Afghanistan,2020-01-04,0.0,0.0,0.0,0.0,0.0,0.0,...,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83


In [5]:
df.shape

(36984, 36)

In [6]:
df.date.nunique()

229

In [7]:
df.location.nunique()

212

In [8]:
df.columns

Index(['iso_code', 'continent', 'location', 'date', 'total_cases', 'new_cases',
       'total_deaths', 'new_deaths', 'total_cases_per_million',
       'new_cases_per_million', 'total_deaths_per_million',
       'new_deaths_per_million', 'new_tests', 'total_tests',
       'total_tests_per_thousand', 'new_tests_per_thousand',
       'new_tests_smoothed', 'new_tests_smoothed_per_thousand',
       'tests_per_case', 'positive_rate', 'tests_units', 'stringency_index',
       'population', 'population_density', 'median_age', 'aged_65_older',
       'aged_70_older', 'gdp_per_capita', 'extreme_poverty',
       'cardiovasc_death_rate', 'diabetes_prevalence', 'female_smokers',
       'male_smokers', 'handwashing_facilities', 'hospital_beds_per_thousand',
       'life_expectancy'],
      dtype='object')

#####  Data Information:
We have daily data for 212 locations.
Variables: 
['iso_code', 'location', 'date', 'total_cases', 'new_cases',
       'total_deaths', 'new_deaths', 'total_cases_per_million',
       'new_cases_per_million', 'total_deaths_per_million',
       'new_deaths_per_million', 'total_tests', 'new_tests',
       'total_tests_per_thousand', 'new_tests_per_thousand',
       'new_tests_smoothed', 'new_tests_smoothed_per_thousand', 'tests_units',
       'stringency_index', 'population', 'population_density', 'median_age',
       'aged_65_older', 'aged_70_older', 'gdp_per_capita', 'extreme_poverty',
       'cvd_death_rate', 'diabetes_prevalence', 'female_smokers',
       'male_smokers', 'handwashing_facilities', 'hospital_beds_per_100k']

In [9]:
df_latest = df.groupby('location').agg({'date': 'max', 'total_cases_per_million': 'max', 'new_cases_per_million': 'max', 'total_deaths_per_million': 'max', 'total_tests_per_thousand': 'max', 'population_density': 'max', 'total_cases': 'max', 'total_deaths': 'max'}).reset_index()

In [11]:
df_latest.describe([0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99]).round(2)

Unnamed: 0,total_cases_per_million,new_cases_per_million,total_deaths_per_million,total_tests_per_thousand,population_density,total_cases,total_deaths
count,210.0,210.0,210.0,89.0,200.0,211.0,211.0
mean,3440.26,208.1,94.55,92.41,360.37,201077.28,7233.77
std,5129.39,450.05,173.75,123.75,1573.68,1527542.38,54338.14
min,2.75,0.42,0.0,1.66,0.14,3.0,0.0
10%,86.63,5.66,0.08,5.73,15.31,133.0,1.0
25%,303.14,19.99,4.58,13.57,37.62,824.5,11.5
50%,1297.73,66.78,22.51,55.9,87.25,4988.0,93.0
75%,4580.75,226.07,97.68,117.38,213.88,41336.0,665.0
90%,8524.37,506.71,282.5,202.23,438.66,168290.0,6021.0
95%,14429.93,852.4,485.25,281.12,799.75,340819.0,17414.5


In [12]:
## Plotting
from bokeh.plotting import figure, output_file, show
from bokeh.models import ColumnDataSource
from bokeh.models.tools import HoverTool

In [13]:
source = ColumnDataSource(df_latest)
p = figure()
p.circle(x='total_cases', y='population_density',
         source=source,
         size=10, color='green')
hover = HoverTool()
hover.tooltips=[
    ('total cases', '@total_cases'),
    ('population_density', '@population_density'),
    ('total deaths', '@total_deaths')
]

p.add_tools(hover)

show(p)

In [33]:
df_latest.sort_values(['total_cases_per_million'], ascending=[1])

Unnamed: 0,location,date,total_cases_per_million,new_cases_per_million,total_deaths_per_million,total_tests_per_thousand,population_density,total_cases,total_deaths
149,Papua New Guinea,2020-05-28,0.894,0.559,0.000,,18.220,8,0
111,Lesotho,2020-05-28,0.934,0.467,0.000,,73.562,2,0
4,Angola,2020-05-28,2.160,0.274,0.122,,23.890,71,4
108,Laos,2020-05-28,2.611,0.550,0.000,,29.715,19,0
206,Vietnam,2020-05-28,3.359,0.267,0.000,2.681,308.127,327,0
32,Burundi,2020-05-28,3.532,1.261,0.084,,423.062,42,1
134,Myanmar,2020-05-28,3.786,0.386,0.110,0.419,81.721,206,6
119,Malawi,2020-05-28,5.280,0.941,0.209,,197.519,101,4
196,Uganda,2020-05-28,6.143,0.940,0.000,1.965,213.759,281,0
63,Ethiopia,2020-05-28,6.359,0.765,0.052,0.840,104.957,731,6


#####  Indian Regression Model

In [35]:
df_ind = df[df.location=='India']

In [38]:
df_ind_1 = df_ind[['date', 'total_cases', 'new_cases', 'total_deaths', 'new_deaths', 'total_cases_per_million', 'new_cases_per_million',  'total_deaths_per_million',
       'new_deaths_per_million', 'total_tests', 'new_tests',
       'total_tests_per_thousand', 'new_tests_per_thousand',
       'new_tests_smoothed', 'new_tests_smoothed_per_thousand', 'tests_units']]

In [39]:
df_ind_1

Unnamed: 0,date,total_cases,new_cases,total_deaths,new_deaths,total_cases_per_million,new_cases_per_million,total_deaths_per_million,new_deaths_per_million,total_tests,new_tests,total_tests_per_thousand,new_tests_per_thousand,new_tests_smoothed,new_tests_smoothed_per_thousand,tests_units
8748,2019-12-31,0,0,0,0,0.000,0.000,0.000,0.000,,,,,,,
8749,2020-01-01,0,0,0,0,0.000,0.000,0.000,0.000,,,,,,,
8750,2020-01-02,0,0,0,0,0.000,0.000,0.000,0.000,,,,,,,
8751,2020-01-03,0,0,0,0,0.000,0.000,0.000,0.000,,,,,,,
8752,2020-01-04,0,0,0,0,0.000,0.000,0.000,0.000,,,,,,,
8753,2020-01-05,0,0,0,0,0.000,0.000,0.000,0.000,,,,,,,
8754,2020-01-06,0,0,0,0,0.000,0.000,0.000,0.000,,,,,,,
8755,2020-01-07,0,0,0,0,0.000,0.000,0.000,0.000,,,,,,,
8756,2020-01-08,0,0,0,0,0.000,0.000,0.000,0.000,,,,,,,
8757,2020-01-09,0,0,0,0,0.000,0.000,0.000,0.000,,,,,,,


### Linear Regression on Corona Cases for India

In [15]:
df.columns

Index(['iso_code', 'continent', 'location', 'date', 'total_cases', 'new_cases',
       'total_deaths', 'new_deaths', 'total_cases_per_million',
       'new_cases_per_million', 'total_deaths_per_million',
       'new_deaths_per_million', 'new_tests', 'total_tests',
       'total_tests_per_thousand', 'new_tests_per_thousand',
       'new_tests_smoothed', 'new_tests_smoothed_per_thousand',
       'tests_per_case', 'positive_rate', 'tests_units', 'stringency_index',
       'population', 'population_density', 'median_age', 'aged_65_older',
       'aged_70_older', 'gdp_per_capita', 'extreme_poverty',
       'cardiovasc_death_rate', 'diabetes_prevalence', 'female_smokers',
       'male_smokers', 'handwashing_facilities', 'hospital_beds_per_thousand',
       'life_expectancy'],
      dtype='object')

In [78]:
df_ind = df[df.iso_code == 'IND']

In [79]:
## Finding date with first case and filtering the data from that point
df_ind = df_ind[df_ind.total_cases > 0]

In [80]:
## first case reported on 30th January 2020
df_ind.location.unique()

array(['India'], dtype=object)

In [81]:
df_ind = df_ind[['date', 'total_cases', 'new_cases', 'total_deaths', 
                 'new_deaths',  'new_tests', 'total_tests', 'tests_per_case', 
                 'positive_rate', 'population', 'population_density', 
                'median_age', 'aged_65_older','stringency_index',
       'aged_70_older', 'gdp_per_capita', 'extreme_poverty',
       'cardiovasc_death_rate', 'diabetes_prevalence', 'female_smokers',
       'male_smokers', 'handwashing_facilities', 'hospital_beds_per_thousand',
       'life_expectancy']]

In [50]:
df_ind.life_expectancy.unique()

array([450.419])

In [31]:
df_var = df[['location', 'date', 'total_cases', 'new_cases', 'total_deaths', 
                 'new_deaths',  'new_tests', 'total_tests', 'tests_per_case', 
                 'positive_rate', 'population', 'population_density', 
                'median_age', 'aged_65_older',
       'aged_70_older', 'gdp_per_capita', 'extreme_poverty',
       'cardiovasc_death_rate', 'diabetes_prevalence', 'female_smokers',
       'male_smokers', 'handwashing_facilities', 'hospital_beds_per_thousand',
       'life_expectancy']]

In [32]:
df_var.head()

Unnamed: 0,location,date,total_cases,new_cases,total_deaths,new_deaths,new_tests,total_tests,tests_per_case,positive_rate,...,aged_70_older,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy
0,Afghanistan,2019-12-31,0.0,0.0,0.0,0.0,,,,,...,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83
1,Afghanistan,2020-01-01,0.0,0.0,0.0,0.0,,,,,...,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83
2,Afghanistan,2020-01-02,0.0,0.0,0.0,0.0,,,,,...,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83
3,Afghanistan,2020-01-03,0.0,0.0,0.0,0.0,,,,,...,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83
4,Afghanistan,2020-01-04,0.0,0.0,0.0,0.0,,,,,...,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83


In [52]:
df_cp = df_var.dropna()

In [55]:
df_cp.location.unique()

array(['Bangladesh', 'Colombia', 'Costa Rica', 'El Salvador', 'Ethiopia',
       'Ghana', 'India', 'Indonesia', 'Kazakhstan', 'Kenya', 'Mexico',
       'Myanmar', 'Nepal', 'Pakistan', 'Paraguay', 'South Africa',
       'Thailand', 'Togo', 'Tunisia', 'Uganda', 'Vietnam', 'Zimbabwe'],
      dtype=object)

In [59]:
df_continent = df.groupby(['continent', 'date']).agg({'new_cases': 'sum', 'total_cases': 'sum', 
                                                      'total_deaths': 'sum', 'new_deaths': 'sum', 
                                                      'new_tests': 'sum', 
                                                      'total_tests': 'sum', 'positive_rate': 'mean', 
                                                     'stringency_index': 'mean', 'population_density': 'mean',
                                                     'median_age': 'mean', 'aged_70_older': 'mean', 
                                                      'gdp_per_capita': 'mean', 'extreme_poverty': 'mean',
                                                      'cardiovasc_death_rate': 'mean', 'diabetes_prevalence':'mean'}).reset_index()

In [67]:
df_continent[df_continent.date == '2020-08-15'].sort_values(by = 'new_cases', ascending = False)

Unnamed: 0,continent,date,new_cases,total_cases,total_deaths,new_deaths,new_tests,total_tests,positive_rate,stringency_index,population_density,median_age,aged_70_older,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence
457,Asia,2020-08-15,95916.0,5512064.0,117229.0,1519.0,0.0,0.0,,,445.049136,30.158696,3.942739,22185.363295,5.767857,290.287261,9.351818
1373,South America,2020-08-15,84099.0,5136357.0,173028.0,1935.0,0.0,0.0,,,24.392,30.216667,5.186,13840.78525,2.866667,187.323917,7.810833
915,North America,2020-08-15,76850.0,6316513.0,242102.0,2067.0,0.0,0.0,,,270.565545,33.128,6.22796,21655.243704,5.718182,197.066885,10.880968
686,Europe,2020-08-15,18686.0,2781889.0,173928.0,287.0,0.0,0.0,,37.96,638.055255,41.982051,11.717763,33338.380756,0.9,233.428325,6.544186
228,Africa,2020-08-15,12689.0,1098277.0,25102.0,422.0,0.0,0.0,,,103.492453,21.210909,2.167527,5443.837132,34.1,287.297426,5.629444
1144,Oceania,2020-08-15,434.0,25040.0,408.0,14.0,0.0,0.0,,,75.73275,32.071429,5.978571,23315.1805,0.95,286.065333,16.114286


In [68]:
df.location.unique()

array(['Afghanistan', 'Albania', 'Algeria', 'Andorra', 'Angola',
       'Anguilla', 'Antigua and Barbuda', 'Argentina', 'Armenia', 'Aruba',
       'Australia', 'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain',
       'Bangladesh', 'Barbados', 'Belarus', 'Belgium', 'Belize', 'Benin',
       'Bermuda', 'Bhutan', 'Bolivia', 'Bonaire Sint Eustatius and Saba',
       'Bosnia and Herzegovina', 'Botswana', 'Brazil',
       'British Virgin Islands', 'Brunei', 'Bulgaria', 'Burkina Faso',
       'Burundi', 'Cambodia', 'Cameroon', 'Canada', 'Cape Verde',
       'Cayman Islands', 'Central African Republic', 'Chad', 'Chile',
       'China', 'Colombia', 'Comoros', 'Congo', 'Costa Rica',
       "Cote d'Ivoire", 'Croatia', 'Cuba', 'Curacao', 'Cyprus',
       'Czech Republic', 'Democratic Republic of Congo', 'Denmark',
       'Djibouti', 'Dominica', 'Dominican Republic', 'Ecuador', 'Egypt',
       'El Salvador', 'Equatorial Guinea', 'Eritrea', 'Estonia',
       'Ethiopia', 'Faeroe Islands', 'Falkland Isla

##### Visualizations for India

In [134]:
## Plotting
from bokeh.plotting import figure, output_file, show
from bokeh.models import ColumnDataSource
from bokeh.models.tools import HoverTool

In [75]:
## Filtering the data for suitable data matches and across variables
df_ind[df_ind.new_tests.isnull()].date.unique()

array(['2020-01-30', '2020-01-31', '2020-02-01', '2020-02-02',
       '2020-02-03', '2020-02-04', '2020-02-05', '2020-02-06',
       '2020-02-07', '2020-02-08', '2020-02-09', '2020-02-10',
       '2020-02-11', '2020-02-12', '2020-02-13', '2020-02-14',
       '2020-02-15', '2020-02-16', '2020-02-17', '2020-02-18',
       '2020-02-19', '2020-02-20', '2020-02-21', '2020-02-22',
       '2020-02-23', '2020-02-24', '2020-02-25', '2020-02-26',
       '2020-02-27', '2020-02-28', '2020-02-29', '2020-03-01',
       '2020-03-02', '2020-03-03', '2020-03-04', '2020-03-05',
       '2020-03-06', '2020-03-07', '2020-03-08', '2020-03-10',
       '2020-03-11', '2020-03-12', '2020-03-13', '2020-03-14',
       '2020-03-15', '2020-03-16', '2020-03-17', '2020-03-25',
       '2020-03-26', '2020-03-27', '2020-03-28', '2020-03-29',
       '2020-03-30', '2020-03-31', '2020-04-01', '2020-04-19',
       '2020-04-20', '2020-04-21', '2020-04-22', '2020-08-14',
       '2020-08-15'], dtype=object)

In [87]:
df_ind.columns

Index(['date', 'total_cases', 'new_cases', 'total_deaths', 'new_deaths',
       'new_tests', 'total_tests', 'tests_per_case', 'positive_rate',
       'population', 'population_density', 'median_age', 'aged_65_older',
       'stringency_index', 'aged_70_older', 'gdp_per_capita',
       'extreme_poverty', 'cardiovasc_death_rate', 'diabetes_prevalence',
       'female_smokers', 'male_smokers', 'handwashing_facilities',
       'hospital_beds_per_thousand', 'life_expectancy'],
      dtype='object')

In [86]:
df_ind['stringency_index'].fillna(79.63, inplace=True)

In [97]:
## For each variable give null dates
col_null = []
for col in df_ind.columns:
    if len(list(df_ind[df_ind[col].isnull()]['date'])) > 0:
        col_null.append(col)
        print (col, " null date values for dates: ", set(list(pd.to_datetime(df_ind[df_ind[col].isnull()]['date']).dt.month)))

new_tests  null date values for dates:  {1, 2, 3, 4, 8}
total_tests  null date values for dates:  {1, 2, 3, 4, 8}
tests_per_case  null date values for dates:  {8, 1, 2, 3}
positive_rate  null date values for dates:  {8, 1, 2, 3}


In [99]:
for col in df_ind.columns:
    if len(list(df_ind[df_ind[col].isnull()]['date'])) > 0:
        print (col, " null date values for dates: ", set(list(df_ind[df_ind[col].isnull()]['date'])))

new_tests  null date values for dates:  {'2020-02-07', '2020-02-20', '2020-03-27', '2020-03-01', '2020-03-06', '2020-02-22', '2020-03-08', '2020-03-16', '2020-02-08', '2020-03-07', '2020-03-04', '2020-04-01', '2020-02-06', '2020-03-03', '2020-08-15', '2020-03-11', '2020-03-05', '2020-03-02', '2020-02-09', '2020-02-21', '2020-02-27', '2020-02-15', '2020-02-13', '2020-03-14', '2020-01-30', '2020-04-22', '2020-02-17', '2020-03-26', '2020-08-14', '2020-03-10', '2020-02-18', '2020-02-01', '2020-02-05', '2020-02-11', '2020-02-29', '2020-02-14', '2020-02-25', '2020-02-28', '2020-03-13', '2020-03-28', '2020-03-12', '2020-03-15', '2020-02-26', '2020-03-31', '2020-02-03', '2020-02-16', '2020-01-31', '2020-02-23', '2020-02-19', '2020-03-30', '2020-04-21', '2020-02-12', '2020-02-02', '2020-03-25', '2020-02-10', '2020-03-29', '2020-04-20', '2020-04-19', '2020-02-24', '2020-03-17', '2020-02-04'}
total_tests  null date values for dates:  {'2020-02-07', '2020-02-20', '2020-03-27', '2020-03-01', '2020-

In [100]:
col_null

['new_tests', 'total_tests', 'tests_per_case', 'positive_rate']

In [101]:
df_impute = df_ind[['date','new_tests', 'total_tests', 'tests_per_case', 'positive_rate']]

In [103]:
df_impute['datetime'] = pd.to_datetime(df_ind['date'])
df_impute['month'] = df_impute['datetime'].dt.month

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [110]:
df_imp_vals = df_impute.groupby('month')['new_tests', 'total_tests', 'tests_per_case', 'positive_rate'].mean().reset_index()

In [111]:
df_imp_vals

Unnamed: 0,month,new_tests,total_tests,tests_per_case,positive_rate
0,1,,,,
1,2,,,,
2,3,1717.0,17442.0,39.078538,0.029692
3,4,30229.88,359152.4,25.854933,0.0391
4,5,94663.0,2288044.0,22.962323,0.045903
5,6,166312.6,6169970.0,13.705267,0.073567
6,7,339744.322581,13432630.0,10.07329,0.1
7,8,641212.076923,23478930.0,10.465077,0.095923


In [107]:
df_impute['weekofyear'] = df_impute['datetime'].dt.week

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [108]:
df_impute.head()

Unnamed: 0,date,new_tests,total_tests,tests_per_case,positive_rate,datetime,month,weekofyear
15527,2020-01-30,,,,,2020-01-30,1,5
15528,2020-01-31,,,,,2020-01-31,1,5
15529,2020-02-01,,,,,2020-02-01,2,5
15530,2020-02-02,,,,,2020-02-02,2,5
15531,2020-02-03,,,,,2020-02-03,2,6


In [112]:
df_imp_vals_week = df_impute.groupby('weekofyear')['new_tests', 'total_tests', 'tests_per_case', 'positive_rate'].mean().reset_index()

In [114]:
for col in col_null:
    df_imp_vals_week.rename(columns = {col: col+'_imp'}, inplace = True)

In [118]:
df_imp_vals_week
df_ind['weekofyear'] = pd.to_datetime(df_ind['date']).dt.week

In [120]:
df_ind_2 = pd.merge(df_ind, df_imp_vals_week, on=['weekofyear'], how = 'left')

In [123]:
df_ind_2.new_tests.fillna(df_ind_2.new_tests_imp, inplace= True)

In [126]:
df_ind_2.tests_per_case.fillna(df_ind_2.tests_per_case_imp, inplace= True)
df_ind_2.positive_rate.fillna(df_ind_2.positive_rate_imp, inplace= True)

In [127]:
df_ind_2.drop(columns=['total_tests'], inplace=True)

In [130]:
df_ind_2.shape

(198, 28)

In [132]:
df_ind_3 = df_ind_2.dropna()

In [133]:
df_ind_3.head()

Unnamed: 0,date,total_cases,new_cases,total_deaths,new_deaths,new_tests,tests_per_case,positive_rate,population,population_density,...,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,weekofyear,new_tests_imp,total_tests_imp,tests_per_case_imp,positive_rate_imp
45,2020-03-16,93.0,3.0,2.0,0.0,1516.4,62.7505,0.017,1380004000.0,450.419,...,1.9,20.6,59.55,0.53,69.66,12,1516.4,15704.0,62.7505,0.017
46,2020-03-17,125.0,32.0,3.0,1.0,1516.4,62.7505,0.017,1380004000.0,450.419,...,1.9,20.6,59.55,0.53,69.66,12,1516.4,15704.0,62.7505,0.017
47,2020-03-18,137.0,12.0,3.0,0.0,191.0,62.7505,0.017,1380004000.0,450.419,...,1.9,20.6,59.55,0.53,69.66,12,1516.4,15704.0,62.7505,0.017
48,2020-03-19,165.0,28.0,3.0,0.0,1060.0,85.598,0.012,1380004000.0,450.419,...,1.9,20.6,59.55,0.53,69.66,12,1516.4,15704.0,62.7505,0.017
49,2020-03-20,191.0,26.0,4.0,1.0,1325.0,67.888,0.015,1380004000.0,450.419,...,1.9,20.6,59.55,0.53,69.66,12,1516.4,15704.0,62.7505,0.017


##### Positive Rate Change across Weeks

In [149]:
weekly = df_ind_3.groupby('weekofyear')['positive_rate', 'new_cases', 'new_deaths', 'new_tests'].mean().reset_index()

In [142]:
from bokeh.plotting import figure, output_file, show
from bokeh.models import ColumnDataSource
from bokeh.palettes import Spectral3
output_file('simple_timeseries_plot.html')

#make sure MSNDATE is a datetime format
df_ind_3['date'] = pd.to_datetime(df_ind_3['date'])

grouped = df_ind_3.groupby('date')['new_cases', 'new_deaths'].sum()

source = ColumnDataSource(grouped)

p = figure(x_axis_type='datetime')

# p.line(x='date', y='new_tests', line_width=2, source=source, legend='new tests')
p.line(x='date', y='new_cases', line_width=2, source=source, color=Spectral3[1], legend='New Cases')
p.line(x='date', y='new_deaths', line_width=2, source=source, color=Spectral3[2], legend='New Deaths')

p.yaxis.axis_label = 'new cases and deaths with positive rate'

show(p)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys


In [146]:
# source = ColumnDataSource(weekly)
import pandas_bokeh
weekly.plot_bokeh(
    kind='line',
    x='weekofyear',
    y=['positive_rate'],
    xlabel='week',
    ylabel='positive rate',
    title='WoW change in Positive Rate'
)

In [148]:
weekly.plot_bokeh(
    kind='line',
    x='weekofyear',
    y=['new_cases'],
    xlabel='week',
    ylabel='new cases',
    title='WoW change in New Cases'
)

In [150]:
weekly.plot_bokeh(
    kind='line',
    x='weekofyear',
    y=['new_tests'],
    xlabel='week',
    ylabel='new tests',
    title='WoW change in New Test'
)

In [153]:
df_us = df[df.iso_code == 'USA']

In [154]:
df_us['date'] = pd.to_datetime(df_us['date'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [155]:
df_us['weekofyear'] = df_us.date.dt.week

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [156]:
weekly_us = df_us.groupby(['weekofyear'])['positive_rate', 'new_cases', 'new_deaths', 'new_tests'].mean().reset_index()

In [157]:
weekly_us.plot_bokeh(
    kind='line',
    x='weekofyear',
    y=['positive_rate'],
    xlabel='week',
    ylabel='positive rate',
    title='WoW change in Positive Rate'
)

In [158]:
weekly_us.plot_bokeh(
    kind='line',
    x='weekofyear',
    y=['new_cases'],
    xlabel='week',
    ylabel='new cases',
    title='WoW change in New Cases'
)

In [159]:
weekly_us.plot_bokeh(
    kind='line',
    x='weekofyear',
    y=['new_tests'],
    xlabel='week',
    ylabel='new tests',
    title='WoW change in New Test'
)

In [160]:
weekly_us.plot_bokeh(
    kind='line',
    x='weekofyear',
    y=['new_deaths'],
    xlabel='week',
    ylabel='new deaths',
    title='WoW change in New Deaths'
)