PhilaFire_ExploratoryAnalysis_BK.Rmd

---
title: "PhilaFire Exploratory Analysis"
output: html_document
date: '2023-02-17'
---

# Exploratory Analysis - DO NOT RUN ANY OF THESE YET! NOT IMPORTANT FOR NOW!!!!!!!

## Fire by Neighborhood
```{r Neighborhood Data}
neighborhoods <- st_read("Neighborhoods_Philadelphia/Neighborhoods_Philadelphia.shp") %>% st_transform(st_crs(dat))
neighborhoods <- neighborhoods %>% dplyr::select(MAPNAME, Shape_Leng, Shape_Area, geometry)

# Joining the data. This may be the dataset used for spatial analysis
fire_neighborhood <- st_join(dat, neighborhoods) 
```

```{r}
# Creating a new dataset because fire_neighborhood may want to be preserved
fire_neighborhood_1 <- fire_neighborhood

# Adding a column with a value of 1 to help count the number of fires by neighborhood
fire_neighborhood_1$fire <-1
fire_neighborhood_1 <- fire_neighborhood_1 %>% count(fire, MAPNAME, name= "count")

# Calculating the percentage
fire_neighborhood_1 <- fire_neighborhood_1 %>%
  mutate(percent = (count/20947) * 100)

# Creating the data frame of top 10 neighborhoods
top_neighborhoods <- fire_neighborhood_1[order(fire_neighborhood_1$count,decreasing=T)[1:10],]

# Bar chart of top 10 neighborhoods
ggplot(top_neighborhoods, aes(x=reorder(MAPNAME,count), y=count)) + 
  geom_bar(position="dodge", stat = "identity") +
  scale_fill_viridis(option = "mako") +
  labs(x = "Neighborhood",
         y = "Count",
         title = "Philadelpiha Neighborhoods Experiencing The Most Fires") +   coord_flip()

# Map of Top 10 Neighborhoods - THIS ISNT WORKING
# Should be polygons not multipoint
# Remove geometry and rejoin to neighborhoods
ggplot() +
  geom_sf(data = fire_neighborhood_1,
          aes(fill=count), 
          colour=NA) +
  scale_fill_viridis(option="mako", trans = "reverse",
                     name = "Mean") +
  labs(title = "") + mapTheme


property | year | #311 calls
  - could start with a neighborhood first
- timelag
-check guillford project
```

## Fire by Type
```{r}
# Creating the data set
fire_type <- fire_neighborhood
fire_type_1 <- fire_type

# Adding a column with a value of 1 to help count the number of fires by type
fire_type_1$type <-1

# Creating the dataset for fire cause by neighborhood
fire_type_2 <- fire_type_1 %>% count(type,code_description, MAPNAME, name= "cause_by_nhood")

# Creating the data set for cause
fire_type_1 <- fire_type_1 %>% count(type, code_description, name= "cause")

# Creating the data frame of top 10 causes
top_causes <- fire_type_1[order(fire_type_1$cause,decreasing=T)[1:10],]

# Bar chart of top 10 neighborhoods
ggplot(top_causes, aes(x=reorder(code_description,count), y=count)) + 
  geom_bar(position="dodge", stat = "identity") +
  scale_fill_viridis(option = "mako") +
  labs(x = "Cause",
         y = "Count",
         title = "Top 10 Causes of Fire in Philadelphia") + coord_flip()

# Top cause of fire by neighborhood - THIS DOESNT WORK
top_causes_nhood <- fire_type_2 %>% 
  group_by(MAPNAME) %>% arrange(-cause_by_nhood) %>% slice(1)

```

## Fire by Land Use
```{r}
lu <- st_read("Land_Use/Land_Use.shp") %>% st_transform(st_crs(dat))

# Only viewing 100 for now before processing the full data set
lu_test <- lu %>% sample_n(100)


fire_lu <- st_join(dat %>% sample_n(100), lu)
# specific within, intersection, etc
# try to do a tabluar join with addresses then try a probablistic match
# fire nn centroid then do a matching


fire_lu$fire <-1
fire_lu <- fire_lu %>% count(Fire, C_DIG1, name= "Count")

```

# DO NOT RUN THE CODE BELOW. Kinda messy still
## Fire + Permit
```{r Permit}
# Loading Permit Data
permit1 <- read_csv("permits1.csv") #%>% st_transform(crs = 4326) 
permit2 <- read_csv("permits2.csv") #%>% st_transform(crs = 4326) 

# Combining Datasets
permit <- rbind(permit1,permit2)

# Parsing Month and Year
permit <- permit %>%
  as.data.frame(permit$permitissuedate) %>%
  mutate(Month = lubridate::month(permit$permitissuedate, label = FALSE),
         Year = lubridate::year(permit$permitissuedate))

# Removing NA in location
permit <- permit %>% drop_na(lng, lat)
```

## Fire + 311 Calls
```{r 311 Calls}
all311 <- read_csv("OpenDataPhilly-311Calls.csv")

all311 <- all311 %>% drop_na(lat,lon,address)
```

```{r}
phila_data <- merge(permit, all311, by="address")

property311 <- filter(all311, 
                        service_name == "Building Dangerous" |  
                        service_name == "Dangerous Building Complaint " |  
                        service_name == "Fire Safety Complaint" | 
                        service_name == "Maintenance Complaint" |
                        service_name == "Maintenance Residential or Commercial" |
                        service_name == "Vacant House or Commercial" |
                        service_name == "Maintenance Residential or Commercial" |
                        service_name == "Fire Residential or Commercial" |
                        service_name == "Complaints against Fire or EMS" |
                        service_name == "Vacant Lot Clean-Up" ) %>%
  dplyr::select(objectid, service_request_id, status, service_name, service_code, requested_datetime, agency_responsible, address, zipcode, lat, lon)%>%
  st_as_sf(coords = c("lon", "lat"),
           crs = "EPSG:4326")

permit_sf <- permit%>%
  st_as_sf(coords = c("lng", "lat"),
           crs = "EPSG:4326")

```