In [1]:
setwd("~/GitHub/ripa-analysis/lapd")

In [8]:
setwd("~/opp/lib")
source('../lib/opp.R')
source('../lib/threshold_test.R')
source('../lib/disparity.R')

In [10]:
# Load Data
ripa <- read_csv("RIPA_MASTER_July_April.csv",
                 col_types = cols(Basis_for_Search = "c"))
colnames(ripa) <- make_ergonomic(colnames(ripa))
lapd <- read_csv("LAPD_online_data_since_July_2018.csv")
colnames(lapd) <- make_ergonomic(colnames(lapd))

In [4]:
# Filter table to just vehicle stops and divisions
veh_frns <- lapd %>% 
  filter(stop_type == "VEH") %>%
  mutate(division = if_else(
    officer_1_division_number > 0 & officer_1_division_number <= 27,
    division_description_1,
    "OTHER")
  ) %>% 
  select(frn, division, division_description_1, officer_1_division_number) %>% 
  unique()

# The model groups the racial categories into Hispanic, black, white and other
tr_race <- c(
  Latino = "hispanic",
  Black = "black",
  White = "white",
  Asian = "other",
  MiddleEastSouthAsian = "other",
  multiracial = "other",
  `Pacific Islander` = "other",
  `Native American` = "other"
)

In [6]:
# Create Discretionary Search Heirarchy 
ripa_veh <- ripa %>% 
  select(frn, race, search, basis_for_search, contraband) %>% 
  filter(frn %in% veh_frns$frn) %>% 
  left_join(veh_frns, by = "frn") %>% 
  mutate(
    search_conducted = search == "TRUE", 
    basis_for_search_single = case_when(
      # Plain view (visible contraband) = 6
      str_detect(basis_for_search, "6") ~ 6,
      # Plain smell (odor of contraband) = 7
      str_detect(basis_for_search, "7") ~ 7,
      # Consent = 1
      str_detect(basis_for_search, "(^1$)|(^1,)|(,1,)|(,1$)") ~ 1,
      # Safety = 2
      str_detect(basis_for_search, "(^2$)|(^2,)|(,2,)|(,2$)") ~ 2,
      # Suspected weapon = 5
      str_detect(basis_for_search, "5") ~ 5,
      # Evidence of crime = 9
      str_detect(basis_for_search, "9") ~ 9,
      # Suspected violation of school policy = 13
      str_detect(basis_for_search, "13") ~ 13,
      # Exigent circumstances/emergency = 11
      str_detect(basis_for_search, "11") ~ 11,
      # K9 detection = 8
      str_detect(basis_for_search, "8") ~ 8,
      # Warrant = 3
      str_detect(basis_for_search, "(^3$)|(^3,)|(,3,)|(,3$)") ~ 3,
      # Probation/parole = 4
      str_detect(basis_for_search, "4") ~ 4,
      # Incident to arrest = 10
      str_detect(basis_for_search, "10") ~ 10,
      # Vehicle inventory = 12
      str_detect(basis_for_search, "12") ~ 12,
      TRUE ~ NA_real_
    ),
    # Non-discretionary searches:
    # 3 = warrant, 4 = as condition of probation/parole, 10 = incident to arrest, 12 = vehicle inventory)
    non_discretionary_search = basis_for_search_single %in% c(3, 4, 10, 12),
    contraband_found = contraband == "TRUE",
    contraband_found = if_else(!search_conducted, FALSE, contraband_found),
    subject_race = as.factor(tr_race[race]),
    sub_geography = division,
    geography = "LA"
  ) 

In [7]:
# Run threshold test for all searches and for just discretionary searches
tt_results_all_searches <- threshold_test(
  ripa_veh,
  sub_geography,
  geography_col = geography
)
write_rds(tt_results_all_searches, "tt_results_all_searches.rds")

tt_results_discretionary_searches <- threshold_test(
  ripa_veh %>% filter(!non_discretionary_search),
  sub_geography,
  geography_col = geography
)
write_rds(
  tt_results_discretionary_searches, 
  "tt_results_discretionary_searches.rds"
)


# Function wrapper for convergence checks and ppcs
model_checks <- function(model_result) {
  fit <- model_result$metadata$fit
  summary <- summary(fit)$summary
  # Want this to be < 1.05
  print("max Rhat")
  print(summary[,'Rhat'] %>% max(na.rm = T))
  # Want this to be > 0.001
  print("min n_eff")
  print(summary[,'n_eff'] %>% min(na.rm = T) / nrow(tbl))

  search_rate_ppc <- plt_ppc_rates(
    model_result$results$thresholds,
    rstan::extract(model_result$metadata$fit),
    "search_rate",
    numerator_col = n_action,
    denominator_col = n,
    title = str_c("LA threshold ppc - search rates")
  )

  hit_rate_ppc <- plt_ppc_rates(
    model_result$results$thresholds,
    rstan::extract(model_result$metadata$fit),
    "hit_rate",
    numerator_col = n_outcome,
    denominator_col = n_action,
    title = str_c("LA threshold ppc - hit rates")
  )

  list(
    search_rate_ppc = search_rate_ppc,
    hit_rate_ppc = hit_rate_ppc
  )
}

all_search_checks <- model_checks(tt_results_all_searches)
disc_search_checks <- model_checks(tt_results_discretionary_searches)

tt_results_all_searches$results$aggregate_thresholds
tt_results_discretionary_searches$results$aggregate_thresholds

"2.38% of data was null for required columns and removed"recompiling to avoid crashing R session
"number of items to replace is not a multiple of replacement length"

[1] "max Rhat"
[1] 1.007569
[1] "min n_eff"
numeric(0)
[1] "Weighted RMS prediction error: 0.35%"
[1] "Weighted RMS prediction error: NaN%"
[1] "max Rhat"
[1] 1.006518
[1] "min n_eff"
numeric(0)
[1] "Weighted RMS prediction error: 0.26%"
[1] "Weighted RMS prediction error: NaN%"


race,avg_threshold,threshold_ci,threshold_diff,diff_ci
black,15.70%,"(15.07%, 16.31%)",-1.96%,"(-3.41%, -0.55%)"
hispanic,14.97%,"(14.38%, 15.55%)",-2.68%,"(-4.13%, -1.30%)"
other,14.63%,"(13.12%, 16.24%)",-3.02%,"(-5.02%, -0.97%)"
white,17.65%,"(16.39%, 18.97%)",,


race,avg_threshold,threshold_ci,threshold_diff,diff_ci
black,18.32%,"(17.33%, 19.31%)",-6.44%,"(-9.63%, -3.56%)"
hispanic,18.85%,"(17.83%, 19.94%)",-5.91%,"(-9.02%, -2.98%)"
other,20.76%,"(18.01%, 23.88%)",-4.00%,"(-8.12%, 0.10%)"
white,24.76%,"(22.02%, 27.77%)",,
