Skip to content
Permalink
Branch: master
Find file Copy path
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
185 lines (139 sloc) 6.17 KB
library(tidyverse)
library(httr)
library(jsonlite)
library(ggmap)
library(osmdata)
library(sqldf)
library(googleway)
#---------------function to get UK health data----------------
#-----------from here: https://www.trafforddatalab.io/open_data_companion/----------
get_health <- function(BusinessTypeId, Name, pageNumber) {
path <- "http://api.ratings.food.gov.uk/Establishments"
if (Name=="") {
request <- GET(url = path,
query = list(
BusinessTypeId = BusinessTypeId,
pageNumber = pageNumber,
pageSize = 5000),
add_headers("x-api-version" = "2"))
} else {
request <- GET(url = path,
query = list(
BusinessTypeId = BusinessTypeId,
Name = Name,
pageNumber = pageNumber,
pageSize = 5000),
add_headers("x-api-version" = "2"))
}
response <- content(request, as = "text", encoding = "UTF-8") %>%
fromJSON(flatten = TRUE) %>%
pluck("establishments") %>%
as_tibble()
# tidy the data
df <- response %>%
mutate_all(funs(replace(., . == '', NA))) %>%
select(name = BusinessName,
type = BusinessType,
rating = RatingValue,
address1 = AddressLine1,
address2 = AddressLine2,
address3 = AddressLine3,
address4 = AddressLine4,
postcode = PostCode,
long = geocode.longitude,
lat = geocode.latitude) %>%
unite(address, address1, address2, address3, address4, remove = TRUE, sep = ", ") %>%
mutate(address = str_replace_all(address, "NA,", ""),
address = str_replace_all(address, ", NA", ""),
long = as.numeric(long),
lat = as.numeric(lat))
return(df)
}
#----------------uk---------------
#first, get all pub/bar/nightclub
uk <- NULL
for (i in 1:11) {
df <- get_health(7843, "", i)
uk <- rbind(uk, df)
}
#then, get restaurants with bar or pub in the name. Some pubs are coded as restaurants, unfortunately.
df_pub <- get_health(1, "pub", 1)
df_bar <- get_health(1, "bar", 1)
#exclude things that don't seem like bars.
exclude <- c('burger', 'coffee', 'bagel', 'espresso', 'chocolat', 'chocolate', 'noodle', 'snack', 'meat', 'tea', 'fish',
'milk', 'sandwich', 'brunch', 'burrito', 'chicken', 'pasta', 'juice', 'cafe', 'deli', 'energy', 'salad',
'salsa', 'shake', 'snax', 'Hummus', 'oyster', 'breakfast', 'Bar B Q', 'tapas', 'Caffe', 'Baguette',
'Cappuccino', 'cake', 'brownie', 'dessert', 'cupcake', 'Expresso', 'pizza', 'Food Bar', 'cream', 'Mozzarella',
'meze', 'sushi', 'Sundae', 'garden bar')
df_bar <- df_bar[!grepl(paste(exclude, collapse="|"), df_bar$name, ignore.case=TRUE),]
uk <- rbind(uk, df_pub)
uk <- rbind(uk, df_bar)
#get regions by post code
scotland <- c("AB", "DD", "KW", "DG", "KY", "EH", "ML", "FK", "PA", "G", "PH", "TD", "IV", "ZE", "KA", "HS")
northireland <- c("BT")
wales <- c("CF", "NP", "LD", "SY", "LL", "SA")
uk$postcode <- gsub("\\d", "", substr(uk$postcode, 1, 2))
uk$region <- "england"
uk$region[uk$postcode %in% scotland] <- "scotland"
uk$region[uk$postcode %in% northireland] <- "northireland"
uk$region[uk$postcode %in% wales] <- "wales"
#--------------ireland--------------
pubsir <- read.csv("https://www.revenue.ie/en/corporate/documents/statistics/excise/liquor-licences.csv")
pubsir <- pubsir[grep("^Publican's Licence \\([6-7]", pubsir$description), ]
pubsir <- subset(pubsir, !is.na(trading_name))
pubsir$geocodename <- gsub(" ", "+",paste0(pubsir$trading_name, ",+", pubsir$address_1, ",+", pubsir$address_2, "+,Ireland"))
colnames(pubsir)[1] <- "ID"
#geocode the pubs
ire <- NULL
key <- "my google api key here"
register_google(key = key)
for (i in 1:nrow(pubsir)){
geo <- geocode(pubsir$geocodename[i])
temp <- data.frame(pubsir$ID[i], pubsir$trading_name[i] , geo$lat, geo$lon)
ire <- rbind(temp, ire)
if (i%%20==0) {
Sys.sleep(1)
print(i) #sleep a bit so Google doesn't cut you off
}
}
colnames(ire) <- c("ID", "name", "lat", "long")
ire$region <- "ireland"
#get rid of cruddy geocodes
ire <- subset(ire, long > -9.978)
ire <- subset(ire, long < -6.033)
ire <- subset(ire, lat < 55.132)
ire <- subset(ire, lat > 51.670)
#merge uk and ireland
t1 <-data.frame(uk$name,uk$long,uk$lat, uk$region)
t2 <- data.frame(ire$name,ire$long,ire$lat, ire$region)
colnames(t1) <- c("name", "long", "lat", "region")
colnames(t2) <- c("name", "long", "lat", "region")
allpubs <- rbind(t1, t2)
#remove missing geocodes
allpubs <- subset(allpubs, !(is.na(lat)))
#add an id
allpubs$ID <- seq.int(nrow(allpubs))
#---------------selective google maps queries-------------
#---these spots seemed a bit blank on the map so I double checked them on Google
key <- "my google distinaces api key here"
check <- data.frame(x = c(-6.26,-7.353, -4.7,-6.146, -6.92, -4.78, -4.625, -4.375, -7,-6.84, -4.21, -3.07,-6.87, -4.63, -4.53)
, y = c(57.335, 57.264, 58.297,55.823,56.493,56.496, 56.625, 58.1875, 58, 57.97, 58.234, 56.818, 57.867, 56.206, 54.22))
results <- NULL
for (i in 1:nrow(check)) {
x <- check$x[i]
y <- check$y[i]
temp <- google_places(location = c(y, x), radius = 30000, place_type = 'bar', key = key)
results <- data.frame(temp$results$name, temp$results$geometry$location$lat, temp$results$geometry$location$lng) %>% rbind(results)
Sys.sleep(1)
while ( !(is.null(temp$next_page_token))) {
Sys.sleep(1)
nextpage <- temp$next_page_token
temp <- google_places(location = c(y, x), radius = 30000, place_type = 'bar', key = key, page_token = nextpage)
results <- data.frame(temp$results$name, temp$results$geometry$location$lat, temp$results$geometry$location$lng) %>% rbind(results)
}
Sys.sleep(5)
}
##!!!!####
#I merged the other results with these new Google results by hand. Not all the results were actually pub, and some were already in the dataset.
#----final----
write.csv(allpubs, "ALLPUBS.csv", row.names =FALSE)
You can’t perform that action at this time.