This notebook scrapes tables in wikipedia articles, performs some cleaning and saves the data as a csv file.

In [None]:
library(tidyverse)
library(rvest)

# URL of the Wikipedia page
url <- "https://en.wikipedia.org/wiki/Deployment_of_COVID-19_vaccines#covid-19-vaccine-distribution-by-country"

# Read the HTML content of the page
page <- read_html(url)


vaccine_deployment_df <- page %>%
  html_node(".wikitable") %>%
  html_table()

# Remove the column with empty name
vaccine_deployment_df <- vaccine_deployment_df %>%
  select(-one_of(""))

# Removing all characters after '[' in column names
cleaned_colnames <- gsub("\\[.*", "", colnames(vaccine_deployment_df))
colnames(vaccine_deployment_df) <- cleaned_colnames

# Remove the last record
vaccine_deployment_df <- vaccine_deployment_df[-nrow(vaccine_deployment_df), ]

# Remove percent character and footnotes
vaccine_deployment_df <- vaccine_deployment_df %>%
  mutate(Percent = str_replace_all(Percent, "%", "")) %>%
  mutate(Location = gsub("\\[.*", "", Location))


head(vaccine_deployment_df, 10)

write_csv(vaccine_deployment_df, "vaccine_deployment.csv")

Location,Vaccinated,Percent
<chr>,<chr>,<chr>
World,5625493786,70.54
China,1310292000,91.89
India,1027418330,72.5
European Union,338046600,75.1
United States,270227181,81.39
Indonesia,203845350,73.99
Brazil,189643431,88.08
Pakistan,165567890,70.21
Bangladesh,151504341,88.5
Japan,104705133,84.47


In [None]:
# URL of the Wikipedia page
url <- "https://en.wikipedia.org/wiki/COVID-19_pandemic_death_rates_by_country#covid-19-pandemic-cases-and-mortality-by-country"

# Read the HTML content of the page
page <- read_html(url)

# Extract the table with class "wikitable"
covid_deathrate_df <- page %>%
  html_node(".wikitable") %>%
  html_table()

# Remove the last record
covid_deathrate_df <- covid_deathrate_df[-nrow(covid_deathrate_df), ]

# Remove footnotes
covid_deathrate_df <- covid_deathrate_df %>%
  mutate(Country = gsub("\\[.*", "", Country)) %>%
  mutate(across(everything(), ~na_if(., "—")))

head(covid_deathrate_df, 10)

write_csv(covid_deathrate_df, "covid_deathrate.csv")

Country,Deaths / million,Deaths,Cases
<chr>,<chr>,<chr>,<chr>
World,872,6960770,771150460
Peru,6511,221704,4520727
Bulgaria,5664,38414,1302188
Bosnia and Herzegovina,5057,16354,403155
Hungary,4896,48807,2206311
North Macedonia,4750,9946,349104
Georgia,4575,17132,1855289
Croatia,4553,18351,1275337
Slovenia,4456,9448,1345384
Montenegro,4232,2654,251280


In [None]:
# URL of the Wikipedia page
url <- "https://en.wikipedia.org/wiki/List_of_countries_by_obesity_rate"

# Read the HTML content of the page
page <- read_html(url)

# Extract the table with class "wikitable"
obsesity_rate_df <- page %>%
  html_node(".wikitable") %>%
  html_table()

head(obsesity_rate_df, 5)

write_csv(obsesity_rate_df, "obsesity_rate.csv")

Country,Obesity rate (%)
<chr>,<dbl>
Cook Islands,55.9
Palau,55.3
Marshall Islands,52.9
Tuvalu,51.6
Tonga,48.2


In [None]:
# URL of the Wikipedia page
url <- "https://en.wikipedia.org/wiki/List_of_countries_by_hospital_beds"

# Read the HTML content of the page
page <- read_html(url)

# Extract the table with class "wikitable"
beds_df <- page %>%
  html_node(".wikitable") %>%
  html_table()

# Remove the first record
beds_df <- beds_df[-1, ]

# Changing column names
column_names = c('Country (or territory)', 'Region', 'Hospital beds per 1,000 people 2013',
                  'Hospital beds per 1,000 people 2014','Hospital beds per 1,000 people 2015',
                  'Hospital beds per 1,000 people 2016','Hospital beds per 1,000 people 2017',
                   'Occupancy (%)', 'ICU-CCB beds/100,000 inhabitants','Ventilators')

colnames(beds_df) <- column_names


beds_df <- beds_df %>%
  mutate(across(everything(),
                ~ gsub("\\[.*?\\]", "", .))) %>%
  mutate(across(everything(), ~na_if(., "—")))



head(beds_df, 5)

write_csv(beds_df, "hospital_beds.csv")

Country (or territory),Region,"Hospital beds per 1,000 people 2013","Hospital beds per 1,000 people 2014","Hospital beds per 1,000 people 2015","Hospital beds per 1,000 people 2016","Hospital beds per 1,000 people 2017",Occupancy (%),"ICU-CCB beds/100,000 inhabitants",Ventilators
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
Japan,Asia,13.3,13.21,13.17,13.11,13.05,75.5,13.5,45293
South Korea,Asia,10.92,11.59,11.61,11.98,12.27,,10.6,9795
Ukraine,Europe,,,,,8.8,,,3600
Russia,Europe,9.07,8.81,8.35,8.16,8.05,,8.3,"40,000 (27.3 per 100,000 inhabitants)"
Germany,Europe,8.28,8.23,8.13,8.06,8.0,62.1,38.7,25000
