In [115]:
options(warn = -1)
library(stringr)
library(rgdal)
library(lubridate)
library(dplyr)
library(tidyverse)

-- [1mAttaching packages[22m ------------------------------------------------------------------------------- tidyverse 1.3.1 --

[32mv[39m [34mggplot2[39m 3.3.3     [32mv[39m [34mreadr  [39m 1.4.0
[32mv[39m [34mtibble [39m 3.0.4     [32mv[39m [34mpurrr  [39m 0.3.4
[32mv[39m [34mtidyr  [39m 1.1.3     [32mv[39m [34mforcats[39m 0.5.1

-- [1mConflicts[22m ---------------------------------------------------------------------------------- tidyverse_conflicts() --
[31mx[39m [34mlubridate[39m::[32mas.difftime()[39m masks [34mbase[39m::as.difftime()
[31mx[39m [34mlubridate[39m::[32mdate()[39m        masks [34mbase[39m::date()
[31mx[39m [34mdplyr[39m::[32mfilter()[39m          masks [34mstats[39m::filter()
[31mx[39m [34mlubridate[39m::[32mintersect()[39m   masks [34mbase[39m::intersect()
[31mx[39m [34mdplyr[39m::[32mlag()[39m             masks [34mstats[39m::lag()
[31mx[39m [34mlubridate[39m::[32msetdiff()[39m     masks [

In [19]:
path <- "T:/MPO/Bike&Ped/BikeCounting/StoryMap/BikeShare/Data/Output/review"

In [20]:
files <- list.files(path)

In [14]:
selected_vars <- c('User.ID', 'Route.ID', 'Start.Hub', 
                   'Start.Latitude', 'Start.Longitude',
                   'Start.Date', 'Start.Time', 
                   'End.Hub', 'End.Latitude', 'End.Longitude',
                   'End.Date', 'End.Time', 'Bike.ID', 'Bike.Name',
                   'Distance..Miles.', 'Duration')

In [22]:
for(file in files){
    #print(file)
    if(file == files[1]){
        df <- read.csv(paste0(path, "/", file))
        df <- df[, selected_vars]
    }else{
        ndf <- read.csv(paste0(path, "/", file))
        ndf <- ndf[, selected_vars]
        df <- rbind(df, ndf)
    }
}

In [24]:
df$Path.ID = paste(df$Start.Hub, "-", df$End.Hub)

In [25]:
toMinutes <- function(x){
  h <- as.numeric(strsplit(x, ":")[[1]][1])
  m <- as.numeric(strsplit(x, ":")[[1]][2])
  s <- as.numeric(strsplit(x, ":")[[1]][3])
  
  res <- h*60 + m + s/60
  
  return(res)
}

In [26]:
df$Minutes <- unlist(lapply(df$Duration, function(x) toMinutes(x)))

In [27]:
organize_data <- function(df){
  #trips <- read.csv(paste0(inpath, "/", file))
  org <- df[,c('Route.ID', 'Bike.ID', 'User.ID', 
                  'Start.Hub', 'Start.Latitude', 'Start.Longitude',
                  'Start.Date', 'Start.Time', 'Path.ID',
                  'Distance..Miles.', 'Minutes')]
  names(org) <- c("RouteID", "BikeID", 'UserID',
                  "Location", "Latitude", "Longitude",
                  "Date", "Time", 'PathID',
                  'Distance', 'Minutes')
  org$OrgDst <- rep("Origin", dim(org)[1])
  dst <- df[,c('Route.ID', 'Bike.ID', 'User.ID', 'End.Hub',
                  'End.Latitude', 'End.Longitude',
                  'End.Date','End.Time', 'Path.ID',
                  'Distance..Miles.', 'Minutes')]
  names(dst) <- c("RouteID", "BikeID", 'UserID',
                  "Location", "Latitude", "Longitude", 
                  "Date", "Time", 'PathID',
                  'Distance', 'Minutes')
  dst$OrgDst <- rep("Destination", dim(dst)[1])
  ndf <- rbind(org, dst)
  return(ndf)
}

In [28]:
# focus on CLMPO
mdf <- df[(df$Start.Latitude >= 43.97865 & df$Start.Latitude <= 44.16123) & 
           (df$Start.Longitude >= -123.2321 & df$Start.Longitude <= -122.8281) & 
           (df$End.Latitude >= 43.97865 & df$End.Latitude <= 44.16123) &
           (df$End.Longitude >= -123.2321 & df$End.Longitude <= -122.8281), ]

mdf <- mdf[!(mdf$Start.Longitude == mdf$End.Longitude & mdf$Start.Latitude == mdf$End.Latitude),]

In [29]:
ndf <- organize_data(mdf)

In [30]:
ndf$Date <- as.Date(ndf$Date, "%Y-%m-%d")

In [31]:
ndf$Month <- months(ndf$Date)

In [32]:
ndf$Season <- ifelse(ndf$Month %in% c("December", "January", "February"), "Winter",
                    ifelse(ndf$Month %in% c("March", "April", "May"), "Spring",
                          ifelse(ndf$Month %in% c("June", "July", "August"), "Summer", "Fall")))

In [4]:
convert_time_to_hour <- function(x){
    s <- unlist(str_split(x, ":"))
    return(as.numeric(s[1])+as.numeric(s[2])/60)
}

In [43]:
excludedIDs <- c(717565, 742339, 764038, 819845, 1228447, 1354709, 1897910, 2184703, 2207685)

In [49]:
ndf <- ndf[!(ndf$UserID %in% excludedIDs),]

In [50]:
data <- ndf

In [51]:
data$Hour <- sapply(data$Time, function(x) convert_time_to_hour(x))

In [52]:
# nt - night time
ntdata <- data[(data$Hour >= 19 & data$Hour <= 24) | (data$Hour >= 0 & data$Hour <= 2),]

In [53]:
ntdata$Year <- year(ntdata$Date)

In [54]:
range(na.omit(ntdata$Date))

In [55]:
dim(ntdata)

In [56]:
ntdata <- ntdata[ntdata$Date >= "2018-06-01" & ntdata$Date <= "2022-05-31",]

In [57]:
dim(ntdata)

In [67]:
ntdata$YearMonth <- paste0(ntdata$Year, "-", ntdata$Month, "-", ntdata$Season)

In [68]:
x = ntdata$YearMonth [1]

In [69]:
s <- unlist(str_split(x, "-"))

In [70]:
s

In [71]:
create_year_season <- function(x){
    s <- unlist(str_split(x, "-"))
    if(s[2] == 'December'){
        yrseason <- paste0(as.numeric(s[1]) + 1, s[3])
    }else{
        yrseason <- paste0(s[1], s[3])
    }
    return(yrseason)
}

In [73]:
ntdata$YearSeason <- sapply(ntdata$YearMonth, function(x) create_year_season(x))

In [89]:
nt_by_year_season <- aggregate(x=list(NoTrips=ntdata$RouteID),
                          by=list(YearSeason=ntdata$YearSeason, Season=ntdata$Season),
                          FUN=function(x) length(unique(x)))

In [90]:
nt_by_year_season$Year <- sapply(nt_by_year_season$YearSeason, function(x) substr(x, 1, 4))

In [91]:
nt_by_year_season

YearSeason,Season,NoTrips,Year
<chr>,<chr>,<int>,<chr>
2018Fall,Fall,8973,2018
2019Fall,Fall,7857,2019
2020Fall,Fall,2557,2020
2021Fall,Fall,6520,2021
2019Spring,Spring,4675,2019
2020Spring,Spring,3722,2020
2021Spring,Spring,6670,2021
2022Spring,Spring,10862,2022
2018Summer,Summer,14010,2018
2019Summer,Summer,11215,2019


In [80]:
nt_by_season <- aggregate(x=list(AvgNoTrips=nt_by_year_season$NoTrips),
                          by=list(Season=nt_by_year_season$Season),
                          FUN=mean)

In [81]:
nt_by_season

Season,AvgNoTrips
<chr>,<dbl>
Fall,6476.75
Spring,6482.25
Summer,8933.75
Winter,3800.0


In [92]:
Covid_dt <- nt_by_year_season %>% filter(YearSeason %in% c('2020Spring', '2020Summer', '2020Fall', '2021Winter'))

In [96]:
ExCovid_dt <- nt_by_year_season %>% filter(!(YearSeason %in% c('2020Spring', '2020Summer', '2020Fall', '2021Winter')))

In [94]:
nt_by_season_covid <- aggregate(x=list(AvgNoTrips=Covid_dt$NoTrips),
                          by=list(Season=Covid_dt$Season),
                          FUN=mean)
nt_by_season_covid

Season,AvgNoTrips
<chr>,<dbl>
Fall,2557
Spring,3722
Summer,3811
Winter,1526


In [97]:
nt_by_season_Excovid <- aggregate(x=list(AvgNoTrips=ExCovid_dt$NoTrips),
                          by=list(Season=ExCovid_dt$Season),
                          FUN=mean)
nt_by_season_Excovid

Season,AvgNoTrips
<chr>,<dbl>
Fall,7783.333
Spring,7402.333
Summer,10641.333
Winter,4558.0


In [98]:
nt_by_year_season_loc <- aggregate(x=list(NoTrips=ntdata$RouteID),
                          by=list(YearSeason=ntdata$YearSeason, Season=ntdata$Season, 
                                  Longitude=ntdata$Longitude, Latitude=ntdata$Latitude),
                          FUN=function(x) length(unique(x)))

In [99]:
nt_by_season_loc <- aggregate(x=list(AvgNoTrips=nt_by_year_season_loc$NoTrips),
                          by=list(Season=nt_by_year_season_loc$Season,
                                 Longitude=nt_by_year_season_loc$Longitude, 
                                  Latitude=nt_by_year_season_loc$Latitude),
                          FUN=mean)

In [100]:
df2spdf <- function(df, lon_col_name, lat_col_name, trans = TRUE){
  lonlat <- sp::CRS("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0")
  lon_col_no <- which(names(df)==lon_col_name)
  lat_col_no <- which(names(df)==lat_col_name)
  xy <- data.frame(df[,c(lon_col_no,lat_col_no)])
  coordinates(xy) <- c(lon_col_name, lat_col_name)
  proj4string(xy) <- lonlat
  spdf <- sp::SpatialPointsDataFrame(coords = xy, data = df)
  if(trans){
    spdf <- spTransform(spdf,  CRS("+proj=merc +a=6378137 +b=6378137 +lat_ts=0 +lon_0=0 +x_0=0 +y_0=0 +k=1 +units=m +nadgrids=@null +wktext +no_defs +type=crs"))
  }
  return(spdf)
}

In [102]:
names(ntdata)

In [116]:
ntdata <- ntdata %>% drop_na(Longitude)

In [121]:
ntdata.spdf <- df2spdf(ntdata, "Longitude", "Latitude")

In [123]:
nt_by_season_loc <- nt_by_season_loc %>% drop_na(Longitude)

In [125]:
nt_by_season.spdf <- df2spdf(nt_by_season_loc, "Longitude", "Latitude")

In [126]:
outpath <- "T:/MPO/Bike&Ped/BikeCounting/StoryMap/BikeShare/Output"

In [128]:
writeOGR(ntdata.spdf, dsn=outpath, layer="Nighttime_Bike_Share_Detailed", 
         driver="ESRI Shapefile", overwrite_layer=TRUE)

In [129]:
writeOGR(nt_by_season.spdf, dsn=outpath, layer="Nighttime_Bike_Share_by_Season", 
         driver="ESRI Shapefile", overwrite_layer=TRUE)