In [1]:
options(warn = -1)

In [2]:
library(readxl)
library(lubridate)
library(rgdal)


Attaching package: 'lubridate'


The following objects are masked from 'package:base':

    date, intersect, setdiff, union


Loading required package: sp

rgdal: version: 1.5-23, (SVN revision 1121)
Geospatial Data Abstraction Library extensions to R successfully loaded
Loaded GDAL runtime: GDAL 3.2.1, released 2020/12/29
Path to GDAL shared files: C:/Users/clid1852/Documents/R/win-library/4.0/rgdal/gdal
GDAL binary built with GEOS: TRUE 
Loaded PROJ runtime: Rel. 7.2.1, January 1st, 2021, [PJ_VERSION: 721]
Path to PROJ shared files: C:/Users/clid1852/Documents/R/win-library/4.0/rgdal/proj
PROJ CDN enabled: TRUE
Linking to sp version:1.4-5
Overwritten PROJ_LIB was C:/Users/clid1852/Documents/R/win-library/4.0/rgdal/proj



In [3]:
inpath <- 'T:/Data/COUNTS/Nonmotorized Counts/Summary Tables/Bicycle/'

In [4]:
data <- read.csv(paste0(inpath, 'Bicycle_HourlyForTableau.csv'))

In [5]:
names(data)

In [6]:
data$Date <- as.Date(data$Date, "%Y-%m-%d")

In [7]:
dim(data)

In [8]:
length(unique(data$UniqueId))

In [9]:
locdata <- read.csv("T:/Data/COUNTS/Nonmotorized Counts/Supporting Data/Supporting Bicycle Data/CountLocationInformation.csv")

In [10]:
names(locdata)

In [11]:
length(unique(data$Location))

In [12]:
# use only the total direction
data1 <- data[data$Direction == 'Total',]

In [13]:
# if the most recent year is not complete, remove it first
data1 <- data1[data1$Year != 2022,]

In [14]:
data1 <- data1[data1$ObsHours == 24,]

In [15]:
data1$Season <- ifelse(data1$MonthDesc == "September", "Fall", data1$Season)

In [16]:
data2 <- data1[,-which(names(data1) %in% c('Hour','Hourly_Count'))]

In [17]:
dim(data2[is.na(data2$Date),])

In [18]:
data3 <- data2[!duplicated(data2$Date),]

In [19]:
dim(data2)

In [20]:
dim(data3) 

In [21]:
locvars <- c('Location', 'Latitude', 'Longitude', 'Site_Name', 
             'DoubleCountLocation', 'IsOneway', 'OnewayDirection', 
             'IsSidewalk')

In [22]:
MPOBound <- readOGR(dsn = "V:/Data/Transportation", layer="MPO_Bound")

OGR data source with driver: ESRI Shapefile 
Source: "V:\Data\Transportation", layer: "MPO_Bound"
with 1 features
It has 3 fields


In [23]:
# require MPOBound
df2spdf <- function(df, lon_col_name, lat_col_name, trans = TRUE){
  lonlat <- sp::CRS("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0")
  lon_col_no <- which(names(df)==lon_col_name)
  lat_col_no <- which(names(df)==lat_col_name)
  xy <- data.frame(df[,c(lon_col_no,lat_col_no)])
  coordinates(xy) <- c(lon_col_name, lat_col_name)
  proj4string(xy) <- lonlat
  spdf <- sp::SpatialPointsDataFrame(coords = xy, data = df)
  if(trans){
    spdf <- spTransform(spdf, CRS(proj4string(MPOBound)))
  }
  return(spdf)
}

In [24]:
path <- "T:/DCProjects/StoryMap/BikeCounting/BikeCounts/Output"

In [25]:
head(data$Period)

In [26]:
aggdata <- aggregate(x=list(DailyCounts = data1$Hourly_Count), 
                     by=list(Date = data1$Date, Location = data1$Location), 
                     FUN=sum, na.rm=TRUE)

In [27]:
datedata <- unique(data1[,c("Date", "Year", "Month", "MonthDesc", "Season", "Weekday", "IsHoliday", "UoInSession", "IsSpecialEvent")])

In [28]:
aggdata <- merge(aggdata, datedata, by="Date")

In [29]:
agg_data <- function(var="Weekday", year=2021, byyear=TRUE){
    if(byyear){
        aggdata <- aggdata[aggdata$Year == year,]
    }
    outdata <- aggregate(x=list(DailyCounts = aggdata$DailyCounts), by=list(Category = aggdata[,var], Location = aggdata$Location), FUN=mean)
    outdata <- merge(outdata, locdata[,locvars], by = 'Location')
    for(loc in unique(outdata$Location)){
        for(cat in unique(outdata$Category)){
            c <- aggdata[aggdata$Location == loc & aggdata[,var] == cat, var]
            outdata[outdata$Location==loc & outdata$Category == cat,"N"] <- length(c)
        }
        
    }
    names(outdata)[which(names(outdata)=='Category')] <- var
    write.csv(outdata, paste0(path, "/DailyCounts_", var,".csv"), row.names = FALSE)
    print(paste("Got the aggregated data by", var))
    outspdf <- df2spdf(outdata, 'Longitude', 'Latitude')
    writeOGR(outspdf, dsn=path, layer=paste0("DailyCounts_", var), 
         driver="ESRI Shapefile", overwrite_layer=TRUE)
    print(paste("Got the spatial aggregated data by", var))
}

In [30]:
agg_data()

[1] "Got the aggregated data by Weekday"
[1] "Got the spatial aggregated data by Weekday"


In [31]:
for(var in c("Weekday", "Month", "Season")){
    agg_data(var=var)
}

[1] "Got the aggregated data by Weekday"
[1] "Got the spatial aggregated data by Weekday"
[1] "Got the aggregated data by Month"
[1] "Got the spatial aggregated data by Month"
[1] "Got the aggregated data by Season"
[1] "Got the spatial aggregated data by Season"
