-
Notifications
You must be signed in to change notification settings - Fork 1
/
PhilaFire_ExploratoryAnalysis_BK.Rmd
159 lines (123 loc) · 5.2 KB
/
PhilaFire_ExploratoryAnalysis_BK.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
---
title: "PhilaFire Exploratory Analysis"
output: html_document
date: '2023-02-17'
---
# Exploratory Analysis - DO NOT RUN ANY OF THESE YET! NOT IMPORTANT FOR NOW!!!!!!!
## Fire by Neighborhood
```{r Neighborhood Data}
neighborhoods <- st_read("Neighborhoods_Philadelphia/Neighborhoods_Philadelphia.shp") %>% st_transform(st_crs(dat))
neighborhoods <- neighborhoods %>% dplyr::select(MAPNAME, Shape_Leng, Shape_Area, geometry)
# Joining the data. This may be the dataset used for spatial analysis
fire_neighborhood <- st_join(dat, neighborhoods)
```
```{r}
# Creating a new dataset because fire_neighborhood may want to be preserved
fire_neighborhood_1 <- fire_neighborhood
# Adding a column with a value of 1 to help count the number of fires by neighborhood
fire_neighborhood_1$fire <-1
fire_neighborhood_1 <- fire_neighborhood_1 %>% count(fire, MAPNAME, name= "count")
# Calculating the percentage
fire_neighborhood_1 <- fire_neighborhood_1 %>%
mutate(percent = (count/20947) * 100)
# Creating the data frame of top 10 neighborhoods
top_neighborhoods <- fire_neighborhood_1[order(fire_neighborhood_1$count,decreasing=T)[1:10],]
# Bar chart of top 10 neighborhoods
ggplot(top_neighborhoods, aes(x=reorder(MAPNAME,count), y=count)) +
geom_bar(position="dodge", stat = "identity") +
scale_fill_viridis(option = "mako") +
labs(x = "Neighborhood",
y = "Count",
title = "Philadelpiha Neighborhoods Experiencing The Most Fires") + coord_flip()
# Map of Top 10 Neighborhoods - THIS ISNT WORKING
# Should be polygons not multipoint
# Remove geometry and rejoin to neighborhoods
ggplot() +
geom_sf(data = fire_neighborhood_1,
aes(fill=count),
colour=NA) +
scale_fill_viridis(option="mako", trans = "reverse",
name = "Mean") +
labs(title = "") + mapTheme
property | year | #311 calls
- could start with a neighborhood first
- timelag
-check guillford project
```
## Fire by Type
```{r}
# Creating the data set
fire_type <- fire_neighborhood
fire_type_1 <- fire_type
# Adding a column with a value of 1 to help count the number of fires by type
fire_type_1$type <-1
# Creating the dataset for fire cause by neighborhood
fire_type_2 <- fire_type_1 %>% count(type,code_description, MAPNAME, name= "cause_by_nhood")
# Creating the data set for cause
fire_type_1 <- fire_type_1 %>% count(type, code_description, name= "cause")
# Creating the data frame of top 10 causes
top_causes <- fire_type_1[order(fire_type_1$cause,decreasing=T)[1:10],]
# Bar chart of top 10 neighborhoods
ggplot(top_causes, aes(x=reorder(code_description,count), y=count)) +
geom_bar(position="dodge", stat = "identity") +
scale_fill_viridis(option = "mako") +
labs(x = "Cause",
y = "Count",
title = "Top 10 Causes of Fire in Philadelphia") + coord_flip()
# Top cause of fire by neighborhood - THIS DOESNT WORK
top_causes_nhood <- fire_type_2 %>%
group_by(MAPNAME) %>% arrange(-cause_by_nhood) %>% slice(1)
```
## Fire by Land Use
```{r}
lu <- st_read("Land_Use/Land_Use.shp") %>% st_transform(st_crs(dat))
# Only viewing 100 for now before processing the full data set
lu_test <- lu %>% sample_n(100)
fire_lu <- st_join(dat %>% sample_n(100), lu)
# specific within, intersection, etc
# try to do a tabluar join with addresses then try a probablistic match
# fire nn centroid then do a matching
fire_lu$fire <-1
fire_lu <- fire_lu %>% count(Fire, C_DIG1, name= "Count")
```
# DO NOT RUN THE CODE BELOW. Kinda messy still
## Fire + Permit
```{r Permit}
# Loading Permit Data
permit1 <- read_csv("permits1.csv") #%>% st_transform(crs = 4326)
permit2 <- read_csv("permits2.csv") #%>% st_transform(crs = 4326)
# Combining Datasets
permit <- rbind(permit1,permit2)
# Parsing Month and Year
permit <- permit %>%
as.data.frame(permit$permitissuedate) %>%
mutate(Month = lubridate::month(permit$permitissuedate, label = FALSE),
Year = lubridate::year(permit$permitissuedate))
# Removing NA in location
permit <- permit %>% drop_na(lng, lat)
```
## Fire + 311 Calls
```{r 311 Calls}
all311 <- read_csv("OpenDataPhilly-311Calls.csv")
all311 <- all311 %>% drop_na(lat,lon,address)
```
```{r}
phila_data <- merge(permit, all311, by="address")
property311 <- filter(all311,
service_name == "Building Dangerous" |
service_name == "Dangerous Building Complaint " |
service_name == "Fire Safety Complaint" |
service_name == "Maintenance Complaint" |
service_name == "Maintenance Residential or Commercial" |
service_name == "Vacant House or Commercial" |
service_name == "Maintenance Residential or Commercial" |
service_name == "Fire Residential or Commercial" |
service_name == "Complaints against Fire or EMS" |
service_name == "Vacant Lot Clean-Up" ) %>%
dplyr::select(objectid, service_request_id, status, service_name, service_code, requested_datetime, agency_responsible, address, zipcode, lat, lon)%>%
st_as_sf(coords = c("lon", "lat"),
crs = "EPSG:4326")
permit_sf <- permit%>%
st_as_sf(coords = c("lng", "lat"),
crs = "EPSG:4326")
```