In [None]:
#load the necessary packages
library(tidyverse)
library(ggrepel)
library(forcats)

In [None]:
# Load the bluebikes dataset
september_bluebikes <- read_csv("~/hodp/202209-bluebikes-tripdata.csv") 

In [None]:
# filtering to Graham and Parks, Quad, Law School, SEAS Cruft, Gund Hall, Verizon Innovation, Cambridge Main Library, Harvard Housing, Dewolfe, HS Mass Ave, HS Brattle, HKS
harvard_only <- september_bluebikes %>%
  filter(`start station id` %in% c(456, 104, 89, 108, 110, 221, 96, 87, 97, 74, 73, 70) |
           `end station id` %in% c(456, 104, 89, 108, 110, 221, 96, 87, 97, 74, 73, 70))

harvard_start_only <- harvard_only %>%
  filter(`start station id` %in% c(456, 104, 89, 108, 110, 221, 96, 87, 97, 74, 73, 70))

# removing outliers
harvard_start_only_n_o <- harvard_start_only %>%
  filter(tripduration < 3000) %>%
  rename("start_name"= "start station name")

# HARVARD START ONLY
# compare durations when starting from Harvard stations
ggplot(data = harvard_start_only_n_o,
       mapping = aes(x = reorder(start_name, tripduration, na.rm = TRUE, FUN = median),
                     y = tripduration, color = start_name)) +
  geom_boxplot() +
  theme(axis.text.y =
          element_text(size = 6)) +
  coord_flip() +
  scale_x_discrete(labels = function(x) str_wrap(str_replace_all(x, "foo", " "),
                                                 width = 37)) +
  theme_bw() +
  guides(color = "none") +
  labs(x = "Start Station Name",
       y = "Trip Duration (sec.)",
       title = "Duration of Bluebike Trips Originating Near Harvard Yard, September 2022",
       caption = "Ordered by median trip duration;\nIncludes only trips with durations of 50 minutes or less") +
  theme(text = element_text(size = 8))

In [None]:
# compare durations from each Harvard station, outliers removed
ggplot(data = harvard_start_only_n_o,
       mapping = aes(x = tripduration, fill = start_name)) +
  geom_histogram(bins = 19) +
  facet_wrap(~start_name, ncol = 3) +
  labs(y = "# of Bluebike Trips",
       x = "Trip Duration (sec.)",
       caption = "Includes only trips with durations of 50 minutes or less",
       title = "Bluebike Trips of Various Durations Originating Near Harvard Yard, September 2022") +
  theme_bw() +
  theme(strip.text = element_text(size = 5),
        plot.title = element_text(size=12),
        axis.text.y = element_text(size = 6),
        axis.text.x = element_text(size = 6)) +
  guides(fill = "none")

In [None]:
# overall durations, outliers removed, starting at Harv only
ggplot(data = harvard_start_only_n_o,
       mapping = aes(x = tripduration)) +
  geom_histogram(fill = "deepskyblue2", color = "orange") +
  labs(y = "# of Bluebike Trips",
       x = "Trip Duration (sec.)",
       caption = "Includes only trips with durations of 50 minutes or less",
       title = "Summary of Durations of All Bluebike Trips Originating Near Harvard Yard, September 2022") +
  theme(panel.background = element_rect(fill = "dodgerblue3",
                                colour = "dodgerblue3",
                                size = 0.5, linetype = "solid"),
        panel.grid.major = element_line(size = 0.5, linetype = 'solid',
                                colour = "orange"), 
        panel.grid.minor = element_line(size = 0.25, linetype = 'solid',
                                colour = "orange"),
        plot.title = element_text(size=11))

In [None]:
# HARVARD ONLY
# removing outliers
harvard_only_n_o <- harvard_only %>%
  filter(tripduration < 3000)

# overall durations, outliers removed, starting OR ending near Harv
ggplot(data = harvard_only_n_o,
       mapping = aes(x = tripduration)) +
  geom_histogram(fill = "orange", color = "deepskyblue2") +
  labs(y = "# of Bluebike Trips",
       x = "Trip Duration (sec.)",
       caption = "Includes only trips with durations of 50 minutes or less",
       title = "Durations of All Bluebike Trips Originating Or Ending Near Harvard Yard, September 2022") +
  theme(panel.background = element_rect(fill = "dodgerblue3",
                                colour = "dodgerblue3",
                                size = 0.5, linetype = "solid"),
        panel.grid.major = element_line(size = 0.5, linetype = 'solid',
                                colour = "deepskyblue2"), 
        panel.grid.minor = element_line(size = 0.25, linetype = 'solid',
                                colour = "deepskyblue2"),
        plot.title = element_text(size=11))

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=c30e3ece-dcda-46f9-a70f-078ca3c3dec4' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>