Skip to content

Commit

Permalink
Re-download and wrangle GoT data
Browse files Browse the repository at this point in the history
  • Loading branch information
jennybc committed Sep 7, 2017
1 parent 1e67c41 commit fd35319
Show file tree
Hide file tree
Showing 54 changed files with 7,733 additions and 7,427 deletions.
29 changes: 18 additions & 11 deletions data-raw/got_pov-chars-download.R
@@ -1,11 +1,9 @@
## https://anapioficeandfire.com

## devtools::install_github("krlmlr/here")
library(here)
library(httr)
library(tidyverse)
library(stringr)
library(listviewer)
library(jsonlite)

## determines number of pages implied by link header in httr response
Expand Down Expand Up @@ -46,21 +44,26 @@ resp <- GET("http://www.anapioficeandfire.com/api/houses?pageSize=1")
iceandfire <- "http://www.anapioficeandfire.com"

## get all books
books_json <- here("data-raw", "iceandfire-json", "books.json")
iceandfire %>%
modify_url(path = c("api", "books"), query = list(pageSize = n_books)) %>%
download.file(here("data-raw", "iceandfire-json", "books.json"))
download.file(books_json)
books_json %>%
readLines(warn = FALSE) %>%
prettify() %>%
writeLines(books_json)

books <-
fromJSON(here("data-raw", "iceandfire-json", "books.json"),
simplifyDataFrame = FALSE)
books <- books_json %>%
fromJSON(simplifyDataFrame = FALSE)

books_df <- tibble(
book = books %>% map_chr("name"),
book_id = books %>% map_chr("url") %>% map_int(get_id),
pov = books %>% map("povCharacters"),
n_pov = lengths(pov)
)
books_df
books_df %>%
arrange(desc(n_pov))
## Confirmed: I can concentrate on only the 5 main books
## A Game of Thrones, A Clash of Kings, A Storm of Swords,
## A Feast for Crows, and A Dance with Dragons
Expand All @@ -87,7 +90,7 @@ pov_df <- tibble(
paste0("character-", character_id, ".json"))
)
walk2(pov_df$url, pov_df$fname,
function(url, file) GET(url) %>% write_lines(file))
function(url, file) GET(url) %>% prettify() %>% write_lines(file))

## give POV character JSON files better names
pov_df <- pov_df %>%
Expand All @@ -96,7 +99,7 @@ pov_df <- pov_df %>%
fname2 = str_replace(fname, ".json$",
paste0("-", str_replace_all(name, "\\s+", "-"), ".json")))
walk2(pov_df$fname, pov_df$fname2, file.rename)
jsonedit(pov_df$from_api)
## View(pov_df$from_api)

## download JSON for the houses found in allegiances of POV characters
houses_df <- tibble(
Expand All @@ -105,8 +108,13 @@ houses_df <- tibble(
fname = here("data-raw", "iceandfire-json",
paste0("house-", house_id, ".json"))
)
## guard against this form of duplication:
## https://www.anapioficeandfire.com/api/houses/362
## https://anapioficeandfire.com/api/houses/362
houses_df <- houses_df %>%
filter(!duplicated(houses_df$house_id))
walk2(houses_df$url, houses_df$fname,
function(url, file) GET(url) %>% write_lines(file))
function(url, file) GET(url) %>% prettify() %>% write_lines(file))

## give house JSON files better names
houses_df <- houses_df %>%
Expand All @@ -120,4 +128,3 @@ houses_df <- houses_df %>%
fname, ".json$",
paste0("-",str_replace_all(house_name, "\\s+", "-"), ".json")))
walk2(houses_df$fname, houses_df$fname2, file.rename)

33 changes: 19 additions & 14 deletions data-raw/got_pov-chars-wrangle.R
@@ -1,6 +1,4 @@
## devtools::install_github("krlmlr/here")
library(here)
library(listviewer)
library(jsonlite)
library(assertthat)
library(tidyverse)
Expand All @@ -11,11 +9,13 @@ library(xml2)
## get resource id from URL
get_id <- . %>% basename() %>% as.integer()

books <- fromJSON(here("data-raw", "iceandfire-json", "books.json"),
simplifyDataFrame = FALSE)
books <- fromJSON(
here("data-raw", "iceandfire-json", "books.json"),
simplifyDataFrame = FALSE
)
assert_that(length(books) == 12)
#str(books, max.level = 1)
#jsonedit(books)
#View(books)

books_df <- tibble(
book = books %>% map_chr("name"),
Expand All @@ -25,18 +25,24 @@ books_df <- tibble(
)
books_df

pov_json_files <- list.files(here("data-raw", "iceandfire-json"),
pattern = "^character", full.names = TRUE)
assert_that(length(pov_json_files) == 29)
pov_json_files <- list.files(
here("data-raw", "iceandfire-json"),
pattern = "^character",
full.names = TRUE
)
assert_that(length(pov_json_files) == 30)

pov_df <- tibble(
from_api = map(pov_json_files, fromJSON),
name = from_api %>% map_chr("name")
)

houses_json_files <- list.files(here("data-raw", "iceandfire-json"),
pattern = "^house", full.names = TRUE)
assert_that(length(houses_json_files) == 16)
houses_json_files <- list.files(
here("data-raw", "iceandfire-json"),
pattern = "^house",
full.names = TRUE
)
assert_that(length(houses_json_files) == 17)
houses_df <- tibble(
from_api = map(houses_json_files, fromJSON),
house = from_api %>% map_chr("name"),
Expand Down Expand Up @@ -72,8 +78,7 @@ pov_df <- pov_df %>%
}
chr_list
}))
jsonedit(pov_df)
pov_df %>% View()
View(pov_df)

## create integer and logical elements for each character for pedagogy
pov_df <- pov_df %>%
Expand All @@ -89,7 +94,7 @@ pov_df <- pov_df %>%
"allegiances", "books", "povBooks", "tvSeries", "playedBy")
chr_list[nms]
}))
jsonedit(pov_df)
View(pov_df)

## this is the basically the list that will go in the package
got_chars <- pov_df$from_api
Expand Down

0 comments on commit fd35319

Please sign in to comment.