# 2. Request New Data via API
Requesting all of the data can be impractical, especially when the amount of data being requested is increasing everyday. Instead, it makes sense to only request data if it has not been requested already. This notebook goes over the system I devised to only request the necessary data.

# 2.1 Import Data

In [None]:
library(tidyverse)
library(httr)
library(jsonlite)
library(anytime)
library(googlesheets4)
library(here)

source(here("R", "00_source.R"))

In [None]:
scrobbleDataID <- "1-hx0pjREakkdFIQZWJJ5-KZbSQNTEdX99GQ0HM8u6MM"
artistTagID <- "1lZLASIwaiXjQ8JBQRwnIH9LLrqijM68hGKAyrOdfdgU"
gs4_auth(cache = here("R", ".secrets"), email = T)

In [None]:
scrobbleData <- read_sheet(scrobbleDataID, 1)
artistTagInfo <- read_sheet(artistTagID, 1)

# 2.2 Request New Data
To only request the new data, I request the first page of data, i.e. the first 200 songs. These 200 songs from the API get compared with the last 200 songs in the google sheet containing all of the previously requested data. If all of these songs are the same, then there must not be any new data to request. If there are any differences, then the new data is requested and appended to the saved data. In the event that all of the 200 songs are different, then the next page needs to be checked. This process is repeated until all of the new songs are collected.

In [None]:
APIKey <- "2eb9b95751e6a5cd1da827604bf51906"
userAgent <- "CACTUS__PLANT"
url <- "https://ws.audioscrobbler.com/2.0/"

In [None]:
while(pageNum <= totalPages) {
  # request data
  payload <- list(method = "user.getRecentTracks", user = userAgent, extended = 1, limit = 200, page = pageNum)
  res <- get.lastfm(payload, userAgent)
  
  # error if request is unsuccessful
  if (res$status_code != 200) {
    print(paste("ERROR:", res$status_code))
    break
  }
  
  # convert data
  char <- rawToChar(res$content)
  dataJSON <- fromJSON(char)
  
  # save new data to dataframe
  newTracks <- data.frame(artistName = dataJSON$recenttracks$track$artist$name,
                          artistURL = dataJSON$recenttracks$track$artist$url,
                          albumName = dataJSON$recenttracks$track$album$`#text`,
                          trackName = dataJSON$recenttracks$track$name,
                          trackURL = dataJSON$recenttracks$track$url,
                          trackImage = unlist(lapply(lapply(dataJSON$recenttracks$track$image, `[[`, 2), `[[`, 4)),
                          date = as.numeric(dataJSON$recenttracks$track$date$uts))
  
  pageNum <- as.numeric(dataJSON$recenttracks$`@attr`$page)
  totalPages <- as.numeric(dataJSON$recenttracks$`@attr`$totalPages)
  
  ## compare new data with saved data, is there new data available?
  equal <-  all_equal(newTracks, scrobbleData[((payload$limit - 199) + (payload$limit * (pageNum - 1))):(payload$limit + (payload$limit * (pageNum - 1))), ])
  
  if(isTRUE(equal)) { # no new data, break loop
    print("No new data detected")
    break
  } else { # new data exists, find how many songs need to be added
    trackDiffsStr <- str_extract(equal, "(?<=y: )(.*)(?=\\n)")
    trackDiffs <- as.numeric(unlist(strsplit(trackDiffsStr, split = ", ")))
    if (length(trackDiffs) < 200) { # all different songs on this page, combine data, no need to check any more pages
      print(paste("New data found. Adding", length(trackDiffs), "new scrobble(s) to data set. Breaking loop."))
      scrobbleData <- rbind(newTracks[trackDiffs, ], scrobbleData)
      break
    } else { # need to check next page, add data, continue loop
      print(paste("New data found. Adding ", length(trackDiffs), " new scrobble(s) to data set."))
      print(paste("Moving to page", pageNum + 1))
      scrobbleData <- rbind(newTracks[trackDiffs, ], scrobbleData)
    }
  }
  
  Sys.sleep(0.25) # reduce frequency of requests to not overload server
  
  pageNum <- pageNum + 1
  
}

# 2.3 Request New Artist Tags
A similar process is used to request the new artist tags. If any new songs were added in the previous step, the artists of those songs are compared with the artists already collected. The tags for the artists not already collected are requested.

In [None]:
savedArtists <- artistTagInfo$artistName
allArtists <- unique(scrobbleData$artistName)

diffInd <- which(!(allArtists %in% savedArtists))

if (length(diffInd) > 0) {
  newArtists <- allArtists[diffInd]
  print(paste("New artist(s) found:", newArtists))
  newTagInfo <- data.frame(artistName = newArtists,
                           artistTag = character(length(newArtists)))
  
  for (i in 1:length(newArtists)) {
    
    # request data
    payload <- list(method = "artist.getTopTags", artist = newTagInfo$artistName[i])
    res <- get.lastfm(payload, userAgent)
    
    # error if request is unsuccessful
    if (res$status_code != 200) {
      print(paste("ERROR:", res$status_code))
      break
    }
    
    # convert data
    char <- rawToChar(res$content)
    dataJSON <- fromJSON(char)
    
    # add tag to data frame
    tagName <- dataJSON$toptags$tag$name[1]
    
    if(!is.null(tagName)) {
      newTagInfo$artistTag[i] <- tagName
    } else {
      newTagInfo$artistTag[i] <- NA
    }
    
    Sys.sleep(0.25) # reduce frequency of requests to not overload server
    
  }
  
  # combine data
  artistTagInfo <- rbind(newTagInfo, artistTagInfo)
} else {
  print("No new artist(s) found.")
}

# 2.4 Save Data to Google Sheet

In [None]:
write_sheet(scrobbleData, scrobbleDataID, 1)
write_sheet(artistTagInfo, artistTagID, 1)