#### BigQuery magic in Jupyter notebooks

In [None]:
%load_ext google.cloud.bigquery

import matplotlib.pyplot  as plt
import pandas as pd

#### Some pandas display settings

In [None]:
# Default value of display.max_rows is 10 i.e. at max 10 rows will be printed.
# Set it None to display all rows in the dataframe
pd.set_option('display.max_rows', None)

# Set it to None to display all columns in the dataframe
pd.set_option('display.max_columns', None)

# Width of the display in characters. If set to None and pandas will correctly auto-detect the width.
pd.set_option('display.width', None)

# The maximum width in characters of a column in the repr of a pandas data structure
pd.set_option('display.max_colwidth', None)

#### Pull data from BigQuery and load it into a dataframe

In [None]:
%%bigquery df_countries

SELECT
  *
FROM
  `bigquery-public-data.covid19_ecdc.covid_19_geographic_distribution_worldwide`
WHERE
  date >= "2020-01-01"
  AND country_territory_code in ("USA", "FRA", "GBR", "ITA", "ESP", "DEU", "CAN")
ORDER BY date

In [None]:
df_countries

#### Look at daily cases in the United States only

In [None]:
df_usa = df_countries[df_countries.country_territory_code == "USA"]
df_usa

In [None]:
df_usa.plot(kind="bar", x="date", y="daily_confirmed_cases", figsize=(18, 12))  # all data

In [None]:
df_usa[df_usa.month >= 9].plot(kind="bar", x="date", y="daily_confirmed_cases", figsize=(18, 12))  # only from September on

In [None]:
df_usa[(df_usa.month >= 4) & (df_usa.month < 7)].plot(kind="bar", x="date", y="daily_confirmed_cases", figsize=(18, 12))  # only fro April - June

#### Look at both daily cases and daily deaths

In [None]:
df_usa[df_usa.month >= 9].plot(kind="bar", x="date", y=["daily_confirmed_cases", "daily_deaths"], figsize=(18, 12))  # only fro April - June

In [None]:
df_usa[df_usa.month >= 9].plot(kind="bar", x="date", y=["daily_confirmed_cases", "daily_deaths"], stacked=True, figsize=(18, 12))  # only fro April - June

#### Now, look at all countries but focus on only a few columns

In [None]:
df_countries2 = df_countries[['date', 'day', 'month', 'year', 'daily_confirmed_cases', 'daily_deaths', 'country_territory_code', 'pop_data_2019']]

In [None]:
df_countries2

#### Group data by month

In [None]:
df_by_month = df_countries2.groupby(['month']).agg({'daily_confirmed_cases': sum, 'daily_deaths':sum, 'pop_data_2019':max})
df_by_month

In [None]:
df_by_month.plot(kind="bar", y=["daily_confirmed_cases", "daily_deaths"], stacked=True, figsize=(18, 12))

#### Group data by month and country

In [None]:
df_by_month_country = df_countries2.groupby(['month', 'country_territory_code']).agg({'daily_confirmed_cases': sum, 'daily_deaths':sum, 'pop_data_2019':max})
df_by_month_country

In [None]:
df_by_month_country.index

#### Retrieve data for different cases

In [None]:
df_by_month_country.loc[(1,), ]   # data for January

In [None]:
df_by_month_country.loc[(range(1,11),"USA"), ]    # data for USA from January - November

In [None]:
df_by_month_country_unstack = df_by_month_country.unstack()
df_by_month_country_unstack

In [None]:
df_by_month_country_unstack.plot(kind='bar', y=['daily_confirmed_cases', 'daily_deaths'], figsize=(18, 12))

In [None]:
df_by_month_country_unstack.plot(kind='bar', y=['daily_confirmed_cases', 'daily_deaths'], figsize=(18, 12))  # log scale
plt.yscale('log')

#### make 2 subplots instead

In [None]:
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(18, 12))
df_by_month_country_unstack['daily_confirmed_cases'].plot(kind='bar', ax=axes[0])
df_by_month_country_unstack['daily_deaths'].plot(kind='bar',ax=axes[1])

#### Add a couple computed quantities - population-normalized daily cases & deaths

In [None]:
df_by_month_country['daily_cases_by_1M'] = df_by_month_country['daily_confirmed_cases']/df_by_month_country['pop_data_2019']*1e6
df_by_month_country['daily_death_by_1M'] = df_by_month_country['daily_deaths']/df_by_month_country['pop_data_2019']*1e6

In [None]:
df_by_month_country

#### Rearrange columns...

In [None]:
df_by_month_country = df_by_month_country.reindex(
        columns=["daily_confirmed_cases", "daily_deaths", "daily_cases_by_1M", "daily_death_by_1M", "pop_data_2019"])

df_by_month_country