generated from chris-s-friedman/template-repo
/
data_exploration.Rmd
55 lines (47 loc) · 1.37 KB
/
data_exploration.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
---
title: "Exploration of Car Crash Data"
output: html_notebook
---
# Load the data
```{r}
library(readr)
print(getwd())
crash_data_dir = "../source_data/crash_data"
all_files = list.files(crash_data_dir, pattern="csv$", recursive=TRUE)
read_crash_file <- function(file_type) {
read_csv(list.files(path = crash_data_dir,
pattern = paste0(file_type,
"_[[:digit:]]{4}_Statewide.csv"),
recursive = TRUE,
full.names=TRUE))
}
# commveh = read_crash_file("COMMVEH")
crash = read_crash_file("CRASH")
# cycle = read_crash_file("CYCLE")
# flag = read_crash_file("FLAG")
# person = read_crash_file("PERSON")
# roadway = read_crash_file("ROADWAY")
# trailveh = read_crash_file("TRAILVEH")
# vehicle = read_crash_file("VEHICLE")
```
# Visualize the data with regression line
```{r}
library(ggplot2)
library(dplyr)
library(lubridate)
crash %>%
mutate(CRASH_DATE = ym(paste0(CRASH_YEAR, CRASH_MONTH))) %>%
count(CRASH_DATE) %>%
ggplot(aes(x = CRASH_DATE, y = n)) +
geom_smooth()
ggsave("pandemic_crash_regression.png")
```
# Visualize data as a line plot
```{r}
crash %>%
mutate(CRASH_DATE = ym(paste0(CRASH_YEAR, CRASH_MONTH))) %>%
count(CRASH_DATE) %>%
ggplot(aes(x = CRASH_DATE, y = n)) +
geom_line()
ggsave("pandemic_crash_line.png")
```