# Our World in Data analysis notebook

This notebook contains analysis done largely for CBC News and the COVID Brief newsletter. It uses [one dataset from Our World in Data](https://ourworldindata.org/explorers/coronavirus-data-explorer?zoomToSelection=true&time=2020-03-01..latest&country=USA~GBR~CAN~DEU~ITA~IND&region=World&pickerMetric=location&pickerSort=asc&Interval=7-day+rolling+average&Relative+to+Population=true&Metric=Confirmed+cases&Color+by+test+positivity=false).

First, we'll important pandas and numpy, both of which I use in analysis down below. Then, we'll read in the data we're using straight from OWID's servers.

In [4]:
import pandas as pd
import numpy as  np

list_of_continents = ["Africa", "Europe", "Asia", "North America", "South America", "Oceania"]

raw = pd.read_csv('https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/owid-covid-data.csv')

### 2021/11/15 - Germany deaths

In [None]:
germany = raw[raw["location"].isin(["Germany"])]
germany = germany[["iso_code", "date", "total_deaths"]]

### 2021/11/15 - Which countries have surpassed 100K deaths?

Find when countries passed the 100K deaths marker.

In [19]:
thousand_deaths = (raw
                   .loc[raw["total_deaths"] >= 100000]
                   .sort_values("date", ascending=True)
                   .drop_duplicates('iso_code')
                   .loc[~raw["iso_code"].str.contains("OWID")]
                   .loc[:,["iso_code", "location", "date", "total_deaths"]]
                   )

world_deaths = (raw[(raw["location"] == "World") & (raw["date"].isin(dates))]
                .loc[:,["date", "total_deaths"]]
                )

dates = thousand_deaths['date'].to_list()

export = thousand_deaths.merge(world_deaths, on="date")

display(export)

Unnamed: 0,iso_code,location,date,total_deaths_x,total_deaths_y
0,USA,United States,2020-05-23,100524.0,358817.0
1,BRA,Brazil,2020-08-08,100657.0,761943.0
2,IND,India,2020-10-02,100842.0,1079012.0
3,MEX,Mexico,2020-11-19,100104.0,1416252.0
4,PER,Peru,2021-01-25,100377.0,2209583.0
5,GBR,United Kingdom,2021-01-26,100241.0,2226994.0
6,ITA,Italy,2021-03-08,100103.0,2681960.0
7,RUS,Russia,2021-04-08,100158.0,3000548.0
8,FRA,France,2021-04-15,100087.0,3088430.0
9,COL,Colombia,2021-06-21,100582.0,3877879.0


### 2021/11/15 - Deaths around the world

In [None]:
world_timeline = raw[raw["location"] == "World"]
world_timeline = world_timeline[["date", 'total_deaths']]
world_timeline["total_deaths"] = world_timeline["total_deaths"].dropna().astype(int, errors="ignore")

### 2021/11/25 - PAHO countries

A look at how PAHO countries are faring.

In [None]:
paho_countries = [
    "United States",
    "Brazil",
    "Argentina",
    "Colombia",
    "Mexico",
    "Peru",
    "Canada",
    "Chile",
    "Cuba",
    "Guatamala",
    "Costa Rica",
    "Bolivia",
    "Ecuador",
    "Panama",
    "Paraguay",
    "Venezuela",
    "Dominican Republic",
    "Uruguay",
    "Honduras",
    "Puerto Rico"
]

paho = raw[raw["location"].isin(paho_countries)]

paho = (paho[['iso_code', 'location', 'date', 'total_cases', 'total_deaths']]
        .sort_values(["location", "date"], ascending=False)
        .drop_duplicates("location")
        )
paho["CFR %"] = paho["total_deaths"] / paho["total_cases"] * 100
paho = paho.sort_values('CFR %', ascending=False)

### 2021/12/14 - Belgium and the Netherlands

In [None]:
netherlands = raw[raw["location"] == "Netherlands"]
belgium = raw[raw["location"] == "Belgium"]

countries = [belgium, netherlands]

for country in countries:
  country["new_deaths_7day"] = country["new_deaths"].rolling(7).mean()
  country["new_cases_7day"] = country["new_cases"].rolling(7).mean()
  country = country.reset_index()
  country = (country[['location', 'date', 'new_cases', 'new_cases_7day', 'new_deaths', 'new_deaths_7day', 'total_cases_per_million', "total_deaths_per_million"]]
             .dropna()
             )

### 2021/12/16 - Canada compared to world

In [None]:
top_data = raw[raw["location"].isin(["Canada", "United States", "United Kingdom", "France", "Italy", "Japan", "Germany"])]
top_data = top_data[top_data["date"] >= "2021-06-01"]
pivot = top_data.pivot(columns="location", index="date", values="new_cases_per_million").rolling(7).mean()

### 2021/12/17 - Canada new cases and deaths

In [None]:
canada = raw[raw["location"] == "Canada"]
canada = canada[["date", "new_cases", "new_deaths", "hosp_patients"]].set_index("date")
canada = canada.rolling(7).mean()
max_deaths = canada["new_deaths"].max()
max_hosps = canada["hosp_patients"].max()
max_cases = canada["new_cases"].max()

canada["new_cases"] = canada["new_cases"] / max_cases *100
canada["hosp_patients"] = canada["hosp_patients"] / max_hosps *100
canada["new_deaths"] = canada["new_deaths"] / max_deaths *100

### 2022/01/06 - Worldwide new case rates

In [None]:
today = raw.sort_values("new_cases_per_million", ascending=False).drop_duplicates("location")
today = today.dropna(subset=["continent"])
today = today.sort_values("new_cases_per_million", ascending=False)
today = today[today["population"] > 1000000]
today.index = np.arange(1, len(today) + 1)

today = today[["location", "date", "new_cases_per_million"]]
canada = today[today["location"] == "Canada"]

all = pd.concat([today.head(50), canada])

Unnamed: 0,location,date,new_cases_per_million
1,Spain,2022-01-03,7974.421
2,Denmark,2021-12-27,7058.811
3,Ireland,2022-01-03,6834.769
4,Palestine,2021-10-06,5812.257
5,France,2022-01-05,4917.571
6,Greece,2022-01-04,4838.803
7,Switzerland,2022-01-03,4411.339
8,Sweden,2022-01-04,4229.166
9,Belgium,2021-11-29,4112.33
10,Portugal,2022-01-05,3891.65


In [None]:
g7 = raw[raw["location"].isin(["Canada", "United States", "United Kingdom", "France", "Italy", "Japan", "Germany"])]
today = g7.sort_values("people_vaccinated_per_hundred", ascending=False).drop_duplicates("location")
today = today.reset_index()
today = today[["location", "date", "people_vaccinated_per_hundred"]]

Unnamed: 0,location,date,people_vaccinated_per_hundred
0,Canada,2021-12-22,82.96
1,Italy,2021-12-22,79.56
2,Japan,2021-12-22,79.53
3,France,2021-12-21,77.85
4,United Kingdom,2021-12-21,75.62
5,Germany,2021-12-21,72.95
6,United States,2021-12-22,72.76


### 2022/01/04 - Positive test rate

In [None]:
positivity = raw.dropna(subset=["continent"]).dropna(subset=["positive_rate"])
positivity = positivity.sort_values("date", ascending=False).drop_duplicates("location")
positivity = positivity[["location", "date", "positive_rate"]].sort_values("positive_rate", ascending=False).set_index("location")

### 2022/01/06 - Sweden

In [None]:
today = (raw
         .sort_values("new_deaths_per_million", ascending=False)
         .drop_duplicates("location")
         .dropna(subset=["continent"])
         )
today = today[today["population"] > 1000000]
today.index = np.arange(1, len(today) + 1)

today = today[["location", "date", "new_deaths_per_million"]]
canada = today[today["location"] == "Canada"]

all = pd.concat([today.head(50), canada])

### 2022/01/06 - Continent/income comparison

In [None]:
continents = (raw
              .sort_values("new_cases_per_million", ascending=False)
              .drop_duplicates("location")
              )
continents = continents[continents["location"].isin(["Africa", "Europe", "Asia", "North America", "South America", "Oceania"])]
continents = continents.sort_values("new_cases_per_million", ascending=False)
continents = continents[continents["population"] > 1000000]
continents.index = np.arange(1, len(continents) + 1)

continents = continents[["location", "date", "new_cases_per_million"]]
canada = continents[continents["location"] == "Canada"]

all = pd.concat([continents.head(50), canada])

### 2022/01/12 - Commonwealth countries

In [None]:
countries = ["Australia", "New Zealand", "United Kingdom", "United States", "Canada"]

subset = raw[raw["location"].isin(countries)]
subset = subset[subset["date"] >= "2021-01-13"]

pivot = (pd.pivot(subset, columns="location", index="date", values="new_cases_per_million")
         .rolling(7)
         .mean()
         )

### 2022/01/14 - Canada

In [None]:
canada = raw[raw["location"] == "Canada"]
canada = canada[["date", "new_cases", "new_deaths", "hosp_patients"]].set_index("date")

### 2022/01/20 - Austria

In [None]:
austria = raw[raw["location"].isin(["Austria", "World"])]
austria = austria[["location", "date", "hosp_patients_per_million"]]
austria = pd.pivot(austria, index="date", columns="location", values="hosp_patients_per_million")

### 2022/01/28 - Sweden and the world over time

Sweden news cases over time.

In [None]:
sweden = raw[raw["location"] == "Sweden"]
sweden = sweden[["date", "new_cases"]]
sweden["new_cases"] = sweden["new_cases"].rolling(7).mean()
sweden = sweden.set_index("date")

The world, new cases over time.

In [None]:
world = raw[raw["location"] == "World"]
world = world[["date", "new_cases"]].set_index("date")
world["new_cases"] = (world["new_cases"]
                      .rolling(7)
                      .mean()
                      )

### 2022/01/27 - Ranking continents

Resulted in [this](https://www.datawrapper.de/_/UpKbt/) visualization.

In [None]:
continents = (raw[raw["location"].isin(list_of_continents)]
              .pivot_table(columns="location", index="date", values="new_cases_per_million")
              .dropna()
              )
continents.index = pd.to_datetime(continents.index)

continents = (continents
              .groupby([continents.index.year.values,continents.index.month.values])
              .sum()
              .reset_index()
              .rename(columns={"level_0": "year", "level_1": "month"})
              )
continents["month"] = continents["year"].astype(str) + "-" + continents["month"].astype(str)

continents = (continents
              .drop(columns=["year"])
              .melt(id_vars="month")
              )

dates = continents["month"].unique()

ranked = []

for date in dates:
  top10 = continents[continents["month"] == date].sort_values('value', ascending=False)
  top10["rank"] = range(1, len(top10)+1)
  ranked.append(top10)

all_ranked = (pd
              .concat(ranked)
              .pivot(columns="month", index="location", values="rank")
              )

### 2022/01/28 -  Reuter's arrows

[This](https://www.datawrapper.de/_/nDOb1/) is the result of the folling analysis.

In [None]:
arrows = raw[raw["date"].isin(["2021-01-28", "2021-01-14"])]
arrows = (arrows[arrows["population"] > 1000000]
          .dropna(subset=["continent"])
          .pivot_table(index=["location", "continent"], columns="date", values="new_cases_per_million")
          )
arrows["diff"] = (arrows["2021-01-28"] - arrows["2021-01-14"]).astype(int)
arrows = arrows.reset_index()

data = pd.DataFrame({"countries": ["", "", ""]}, index=["Positives", "Negatives", "No change"])

arrows_pos = arrows[arrows['diff'] > 0]
arrows_pos["text"] = arrows_pos["location"] + " (+" + arrows_pos["diff"].astype(int).astype(str) + ")"
data.at["Positives", "countries"] = ', '.join(arrows_pos["text"])

arrows_neg = arrows[arrows['diff'] < 0]
arrows_neg["text"] = arrows_neg["location"] + " (" + arrows_neg["diff"].astype(int).astype(str) + ")"
data.at["Negatives", "countries"] = ', '.join(arrows_neg["text"])

arrows_none = arrows[arrows['diff'] == 0]
data.at["No change", "countries"] = ', '.join(arrows_none["location"])

### 2022/02/07 -  COVID brief, booster rates

In [17]:
countries = ["Canada", "United States", "Italy", "France", "Germany", "Spain", "United Kingdom", "Japan", "Israel", "World", "Chile"]

target = (raw[raw["location"].isin(countries)]
          .sort_values("total_boosters_per_hundred", ascending=False)
          .drop_duplicates("location")
          )
target = target[["location", "date", "total_boosters_per_hundred"]].set_index("location")

### 2022/03/10 - COVID brief, 6 countries


In [None]:
six = raw[raw["location"].isin(["South Korea", "Hong Kong", "Singapore", "Vietnam", "Malaysia", "Japan"])]

six["new_cases_per_million"] = (six["new_cases_per_million"]
                                .rolling(7)
                                .mean()
                                )
six = (six[six["date"] >= "2022-01-01"]
       .pivot(index="date", columns="location", values="new_cases_per_million")
       )