INSERT INTRO HERE

In [1]:
library(tidyverse)
library(tidymodels)

#setting seed
set.seed(9876)

## 2023 CoV Property Tax Report Dataset
ptax_url <- "https://opendata.vancouver.ca/api/explore/v2.1/catalog/datasets/property-tax-report/exports/csv?lang=en&refine=report_year%3A%222023%22&timezone=America%2FLos_Angeles&use_labels=true&delimiter=%3B"
ptax_df <- read_delim(ptax_url, delim = ";") |>
            # Select relevant features for analysis
            select(LEGAL_TYPE,
                   LAND_COORDINATE, 
                   ZONING_CLASSIFICATION,
                   CURRENT_LAND_VALUE,
                   CURRENT_IMPROVEMENT_VALUE,
                   YEAR_BUILT,
                   BIG_IMPROVEMENT_YEAR,
                   TAX_LEVY)

## CoV Property Parcel Polygons Dataset
# For use with 2023 CoV Property Tax Report Dataset to merge geographic location values

parcel_url <- "https://opendata.vancouver.ca/api/explore/v2.1/catalog/datasets/property-parcel-polygons/exports/csv?lang=en&timezone=America%2FLos_Angeles&use_labels=true&delimiter=%3B"
parcel_df <- read_delim(parcel_url, delim = ";") |>
            # Select relevant features for analysis
            select(TAX_COORD, geo_point_2d) |>
            rename(LAND_COORDINATE = TAX_COORD)

# Merge CoV Property Parcel Polygons Dataset to attain geographic (lat,long) location of property
ptax_parcel_merged <- inner_join(ptax_df, parcel_df, by = "LAND_COORDINATE", relationship = "many-to-many") |>
# Separate (lat,long) into respective columns
            separate(col = geo_point_2d,
                     into = c("lat", "long"),
                     sep = ",")

── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.3.1 ──

[32m✔[39m [34mggplot2[39m 3.3.6     [32m✔[39m [34mpurrr  [39m 0.3.4
[32m✔[39m [34mtibble [39m 3.1.7     [32m✔[39m [34mdplyr  [39m 1.0.9
[32m✔[39m [34mtidyr  [39m 1.2.0     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 2.1.2     [32m✔[39m [34mforcats[39m 0.5.1

── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()

── [1mAttaching packages[22m ────────────────────────────────────── tidymodels 1.0.0 ──

[32m✔[39m [34mbroom       [39m 1.0.0     [32m✔[39m [34mrsample     [39m 1.0.0
[32m✔[39m [34mdials       [39m 1.0.0     [32m✔[39m [34mtune        [39m 1.0.0
[32m✔[39m [34minfer       [39m 1.0.2     [32m✔[39m [34mworkflows   [39m 1.0.0
[32m✔

In [10]:
head(ptax_parcel_merged)

LEGAL_TYPE,LAND_COORDINATE,ZONING_CLASSIFICATION,CURRENT_LAND_VALUE,CURRENT_IMPROVEMENT_VALUE,YEAR_BUILT,BIG_IMPROVEMENT_YEAR,TAX_LEVY,lat,long
<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>
STRATA,77019103,Commercial,496282,240000,1996,1996,2047.38,49.26333024794667,-123.09938095630145
STRATA,59623596,Multiple Dwelling,553000,49800,1977,1977,1676.2,49.27822295440411,-123.07086779777212
STRATA,59225072,Multiple Dwelling,280000,40800,1981,1981,892.04,49.28000735957126,-123.06899069724636
STRATA,59711728,Comprehensive Development,610000,277000,1995,1995,2466.49,49.28823994952549,-123.12534105465522
STRATA,57917821,Historical Area,933000,356000,1997,1997,3584.32,49.28370640951815,-123.10369848504855
STRATA,59011894,Comprehensive Development,285000,76800,1999,2000,3372.27,49.28795342891571,-123.12082069797776


In [27]:
#filter for housing properties 
property_housing_only <- ptax_parcel_merged |>
    filter(ZONING_CLASSIFICATION == c("Comprehensive Development",
                                      "Single Detached House",
                                      "Multiple Dwelling"))

#making the training and testing sets
property_split <- initial_split(property_housing_only, prop = 0.75, strata = CURRENT_LAND_VALUE)
property_train <- training(property_split)
property_test <- testing(property_split)

#summary table counting each housing type
property_summ_table <- property_train |>
    group_by(ZONING_CLASSIFICATION) |>
    summarize(COUNT = n())
property_summ_table

“longer object length is not a multiple of shorter object length”


ZONING_CLASSIFICATION,COUNT
<chr>,<int>
Comprehensive Development,20572
Multiple Dwelling,7952
Single Detached House,17056
