In [None]:
# Load libraries for webscraping, data wrangling, and mapping
library(xml2)
library(rvest)
library(stringr)
library(tidyverse)
library(leaflet)
library(htmlwidgets)

In [None]:
# Specifying the url for desired website to be scraped
url <- 'https://ispgr.org/past-congresses/'

# Reading the HTML code from the website
webpage <- read_html(url)

In [None]:
# Create list of congress years
year_node <- html_nodes(webpage, '.et_pb_module_header')
year <- html_text(year_node)
year_list <- as.list(year)

# Create list of congress locations
place_node <- html_nodes(webpage, '.et_pb_blurb_description')
place <-html_text(place_node)
place <- str_replace_all(place, '[\t\n]', '')
dates <- '[:alpha:]*[\\s]*+[:digit:]*[\\s]*[-][\\s]*[:alpha:]*[\\s]*+[:digit:]*[\\s]*'
place <- str_replace_all(place, dates, '')
place <- gsub("([a-z])([A-Z])", "\\1 \\2", place)
place_list <- as.list(place)

# Create list of flag images
flag_node <- html_nodes(webpage, '.et_pb_image_wrap')
flag <- html_children(flag_node)
image_src <- 'src\\s*=\\s*"(.+?)"'
flag <- str_extract_all(flag, image_src)
flag <- str_replace_all(flag, 'src\\s*=\\s*+["]', '')
flag <- str_replace_all(flag, '["]', '')
flag_list <- as.list(flag)

# Create dataframe
ispgr_data <- data.frame(Year = matrix(unlist(year_list), nrow=length(year_list), byrow=T))
ispgr_data$Location <- place_list
ispgr_data$Flag <- flag_list

# Remove cancelled ISPGR from dataframe
ispgr_data = filter(ispgr_data, Year!="2011")
ispgr_data

In [None]:
content_node <- html_nodes(webpage, '.et_pb_toggle_content')
content <- html_text(content_node)
content <- str_replace_all(content, '[\t\n]', '')

extra_data <- data.frame(matrix(nrow=length(content_node)))

In [None]:
library(leaflet)
leaflet() %>% addTiles()