# Threshold Test 

# Amy Shoemaker Code:

In [1]:
setwd("~/GitHub/ripa-analysis/lapd")

In [2]:
#setwd("~/opp/lib")
source('../lib/opp.R')
source('../lib/threshold_test.R')
source('../lib/disparity.R')

here() starts at C:/Users/bposton/Documents/GitHub/ripa-analysis

Attaching package: 'lubridate'

The following object is masked from 'package:here':

    here

The following object is masked from 'package:base':

    date


Attaching package: 'purrr'

The following object is masked from 'package:maps':

    map

The following object is masked from 'package:jsonlite':

    flatten


Attaching package: 'rlang'

The following objects are masked from 'package:purrr':

    %@%, as_function, flatten, flatten_chr, flatten_dbl, flatten_int,
    flatten_lgl, flatten_raw, invoke, list_along, modify, prepend,
    splice

The following objects are masked from 'package:jsonlite':

    flatten, unbox

Loading required package: sp
rgdal: version: 1.4-3, (SVN revision 828)
 Geospatial Data Abstraction Library extensions to R successfully loaded
 Loaded GDAL runtime: GDAL 2.2.3, released 2017/11/20
 Path to GDAL shared files: C:/Program Files/R/R-3.4.4/library/rgdal/gdal
 GDAL binary built with GEOS: 

In [3]:
# LOAD DATA
ripa <- read_csv("RIPA_MASTER_July_April.csv")
colnames(ripa) <- make_ergonomic(colnames(ripa))
lapd <- read_csv("LAPD_online_data_since_July_2018.csv")
colnames(lapd) <- make_ergonomic(colnames(lapd))

Parsed with column specification:
cols(
  .default = col_integer(),
  `Agency ORI` = col_character(),
  `Transaction Type` = col_character(),
  NFIA = col_character(),
  `Date of Stop` = col_character(),
  `Time of Stop` = col_time(format = ""),
  `Response to Call for Service` = col_character(),
  Location = col_character(),
  City = col_character(),
  `K12 School Code` = col_character(),
  Race = col_character(),
  `Is LGBT` = col_character(),
  `Person had Limited or No English Fluency` = col_character(),
  `Perceived or Known Disability` = col_number(),
  `If K-12 School Is Stop of a Student` = col_character(),
  `Reason for Stop Narrative` = col_character(),
  `Education Code Section` = col_character(),
  `Education Code Subdivision` = col_character(),
  `Suspicion Sub-Type` = col_number(),
  `Actions Taken` = col_character(),
  Search = col_logical()
  # ... with 3 more columns
)
See spec(...) for full column specifications.
"67797 parsing failures.
row # A tibble: 5 x 5 col     

In [4]:
# Get table of just vehicle stops and divisions
veh_frns <- lapd %>% 
  filter(stop_type == "VEH") %>%
  mutate(division = if_else(
    officer_1_division_number > 0 & officer_1_division_number <= 27,
    division_description_1,
    "OTHER")
  ) %>% 
  select(frn, division, division_description_1, officer_1_division_number) %>% 
  unique()

tr_race <- c(
  Latino = "hispanic",
  Black = "black",
  White = "white",
  Asian = "other",
  MiddleEastSouthAsian = "other",
  multiracial = "other",
  `Pacific Islander` = "other",
  `Native American` = "other"
)

In [5]:
# Get RIPA vehicle stops
ripa_veh <- ripa %>% 
  select(frn, race, search, basis_for_search, contraband) %>% 
  filter(frn %in% veh_frns$frn) %>% 
  left_join(veh_frns, by = "frn") %>% 
  mutate(
    search_conducted = search == "TRUE", 
    # Non-discretionary searches:
    # 4 = probation/parole, 10 = incident to arrest, 
    # 12 = vehicle inventory (impound?)
    non_discretionary_search = basis_for_search %in% c(4, 10, 12),
    contraband_found = contraband == "TRUE",
    contraband_found = if_else(!search_conducted, FALSE, contraband_found),
    subject_race = as.factor(tr_race[race]),
    sub_geography = division,
    geography = "LA"
  ) 

In [6]:
# Run threshold test
tt_results_all_searches <- threshold_test(
  ripa_veh,
  sub_geography,
  geography_col = geography
)

tt_results_discretionary_searches <- threshold_test(
  ripa_veh %>% filter(!non_discretionary_search),
  sub_geography,
  geography_col = geography
)

"2.38% of data was null for required columns and removed"recompiling to avoid crashing R session
"number of items to replace is not a multiple of replacement length"

In [47]:
#tt_results_all_searches$results$aggregate_thresholds
#tt_results_discretionary_searches$results$aggregate_thresholds

race,avg_threshold,threshold_ci,threshold_diff,diff_ci
black,15.92%,"(15.35%, 16.50%)",-2.05%,"(-3.50%, -0.59%)"
hispanic,15.39%,"(14.86%, 15.92%)",-2.58%,"(-4.02%, -1.19%)"
other,15.00%,"(13.47%, 16.60%)",-2.96%,"(-5.01%, -0.90%)"
white,17.97%,"(16.66%, 19.31%)",,


race,avg_threshold,threshold_ci,threshold_diff,diff_ci
black,18.59%,"(17.71%, 19.44%)",-4.30%,"(-6.77%, -2.01%)"
hispanic,18.42%,"(17.65%, 19.22%)",-4.47%,"(-6.90%, -2.25%)"
other,19.12%,"(16.90%, 21.59%)",-3.77%,"(-6.98%, -0.52%)"
white,22.89%,"(20.81%, 25.22%)",,


In [7]:
# Function wrapper for convergence checks and ppcs
model_checks <- function(model_result) {
  fit <- model_result$metadata$fit
  summary <- summary(fit)$summary
  # Want this to be < 1.05
  print("max Rhat")
  print(summary[,'Rhat'] %>% max(na.rm = T))
  # Want this to be > 0.001
  print("min n_eff")
  print(summary[,'n_eff'] %>% min(na.rm = T) / nrow(tbl))

  search_rate_ppc <- plt_ppc_rates(
    model_result$results$thresholds,
    rstan::extract(model_result$metadata$fit),
    "search_rate", 
    numerator_col = n_action,
    denominator_col = n,
    title = str_c("LA threshold ppc - search rates")
  )
  
  hit_rate_ppc <- plt_ppc_rates(
    model_result$results$thresholds,
    rstan::extract(model_result$metadata$fit),
    "hit_rate", 
    numerator_col = n_outcome,
    denominator_col = n_action,
    title = str_c("LA threshold ppc - hit rates")
  )
  
  list(
    search_rate_ppc = search_rate_ppc,
    hit_rate_ppc = hit_rate_ppc
  )
}

all_search_checks <- model_checks(tt_results_all_searches)
non_disc_search_checks <- model_checks(tt_results_discretionary_searches)

tt_results_all_searches$results$aggregate_thresholds
tt_results_discretionary_searches$results$aggregate_thresholds

[1] "max Rhat"
[1] 1.005374
[1] "min n_eff"
numeric(0)
[1] "Weighted RMS prediction error: 0.04%"
[1] "Weighted RMS prediction error: 4.99%"
[1] "max Rhat"
[1] 1.003136
[1] "min n_eff"
numeric(0)
[1] "Weighted RMS prediction error: 0.04%"
[1] "Weighted RMS prediction error: 6.26%"


race,avg_threshold,threshold_ci,threshold_diff,diff_ci
black,15.91%,"(15.34%, 16.50%)",-2.06%,"(-3.52%, -0.65%)"
hispanic,15.38%,"(14.86%, 15.93%)",-2.58%,"(-4.00%, -1.19%)"
other,15.00%,"(13.45%, 16.60%)",-2.97%,"(-5.05%, -0.91%)"
white,17.97%,"(16.69%, 19.31%)",,


race,avg_threshold,threshold_ci,threshold_diff,diff_ci
black,18.59%,"(17.74%, 19.45%)",-4.29%,"(-6.75%, -1.98%)"
hispanic,18.42%,"(17.64%, 19.23%)",-4.46%,"(-6.89%, -2.20%)"
other,19.12%,"(16.83%, 21.65%)",-3.76%,"(-7.02%, -0.50%)"
white,22.88%,"(20.76%, 25.19%)",,


# Previous Ryan + Ben Code

In [None]:
exclude_probation = read_csv('ripa_tt_data_5_15_2019_No_Probation_Parole_Exclude_test.csv')

In [53]:
source('../lib/opp.R')

In [3]:
source('../lib/threshold_test.R')

In [30]:
exclude_probation = read_csv('ripa_tt_data_5_15_2019_No_Probation_Parole_Exclude_test.csv')

Parsed with column specification:
cols(
  Race = col_character(),
  Search = col_logical(),
  Contraband = col_logical(),
  Division = col_character(),
  geography = col_character()
)


In [35]:
names(exclude_probation) <- c('subject_race', 'search_conducted','contraband_found', 'division', 'geography')

In [36]:
glimpse(exclude_probation)

Observations: 362,321
Variables: 5
$ subject_race     <chr> "Asian", "Asian", "Asian", "Asian", "Asian", "Asia...
$ search_conducted <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, F...
$ contraband_found <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, F...
$ division         <chr> "blank", "blank", "blank", "blank", "blank", "blan...
$ geography        <chr> "Los Angeles", "Los Angeles", "Los Angeles", "Los ...


In [20]:
#tt.exclude_probation4 <- threshold_test(exclude_probation4 %>% mutate(city ='Los Angeles'))

In [37]:
#Get RIPA vehicle stops
exclude_probation <- exclude_probation %>% mutate(sub_geography = division, geography = "Los Angeles") 

In [39]:
#tt.exclude_probation <- threshold_test(
 # exclude_probation,
  #sub_geography,
  #geography_col = geography
#)