# Get geographical information from lat/long in data

**_Objective:_** Using the lat/long values in the data, obtain the state and county names.

## Required libraries

In [1]:
library(sp)
library(maps)
library(rgeos)
library(maptools)

"package 'rgeos' was built under R version 3.6.3"rgeos version: 0.5-5, (SVN revision 640)
 GEOS runtime version: 3.8.0-CAPI-1.13.1 
 Linking to sp version: 1.4-5 
 Polygon checking: TRUE 

"package 'maptools' was built under R version 3.6.3"Checking rgeos availability: TRUE


## Required function

In [2]:
latlong2county <- function(pointsDF) {
    # Adapted from:
    #     https://stackoverflow.com/questions/13316185/r-convert-zipcode-or-lat-long-to-county
    # Prepare SpatialPolygons object with one SpatialPolygon
    # per county
    counties <- map('county', fill=TRUE, col="transparent", plot=FALSE)
    IDs <- sapply(strsplit(counties$names, ":"), function(x) x[1])
    counties_sp <- map2SpatialPolygons(counties, IDs=IDs,
                     proj4string=CRS("+proj=longlat +datum=WGS84"))

    # Convert pointsDF to a SpatialPoints object 
    pointsSP <- SpatialPoints(pointsDF, 
                    proj4string=CRS("+proj=longlat +datum=WGS84"))

    # Use 'over' to get _indices_ of the Polygons object containing each point 
    indices <- over(pointsSP, counties_sp)

    # Return the county names of the Polygons object containing each point
    countyNames <- sapply(counties_sp@polygons, function(x) x@ID)
    countyNames[indices]
}

## Bring the data in

In [3]:
setwd("../data/interim/")

In [4]:
dat <- read.csv("interim_no_county_raw_ufo_data_20230429_2035.csv")

## Get the state and county names

In [5]:
obs_points <-data.frame(long=dat$city_longitude, lat=dat$city_latitude)
state_county <- latlong2county(obs_points)
start_time <- Sys.time()
num_elements <- length(state_county)
state <- NULL
county <- NULL
for (i in 1:num_elements) {
    if (grepl(",", state_county[i])) {
        obs <- strsplit(state_county, split=",")[[i]]
        state[i] <- obs[1]
        county[i] <- obs[2]
    } else {
        state[i] <- "NA"
        county[i] <- "NA"
    }
}
print(Sys.time() - start_time)

Time difference of 1.591171 hours


In [6]:
dat <- cbind(dat, state, county)
rm(state_county, start_time, num_elements, state, county, i, obs)

## Write the new dataframe to file

In [7]:
file_name_root <- "interim_with_county_info_ufo_"
date_time_vector <- strsplit(as.character(Sys.time()), " ")
date_clean <- gsub("-", "", date_time_vector[[1]][1])
time_clean <- substr(gsub(":", "", date_time_vector[[1]][2]), 1, 4)
file_ext <- ".csv"
file_name <- paste(file_name_root, date_clean, "_", time_clean, file_ext, sep="")
file_name
rm(file_name_root, date_time_vector, date_clean, time_clean, file_ext)

In [8]:
write.csv(dat, file_name, row.names=FALSE)