# Oakland Police Department Charges

In [1]:
import sys

# append the directory of law module to sys.path list
sys.path.append('../../../modules')

In [2]:
import random
import re

import arrest
import law
import numpy as np
import pandas as pd
import requests

In [3]:
df = pd.read_csv('../04_outputs/c00_arrest_data.csv',
                 dtype=str,
                 keep_default_na=False)

In [4]:
df.rename(
    columns={
        'PerSta_Statute_Desc': '_original_charge_description',
        'PerSta_Statute_Cd': '_original_charge_code'
    },
    inplace=True,
)

In [5]:
df['Arr_DateTime'] = pd.to_datetime(df['Arr_DateTime'])

## Parse charge code field

In [6]:
df[['_code_type', '_code']] = df.apply(lambda x: arrest.Charge.parse_code_type(
    x['_original_charge_code'], other_codes=['OM', 'B/W', 'O/W']), axis=1, result_type='expand')

### Code Type and Code

In [7]:
df[df['_code_type'] == ''].groupby(
    ['_code', '_original_charge_description', ])['_arrest_id'].nunique()

_code      _original_charge_description               
ATTSUI     ATTEMPT SUICIDE                                11
CP1209     CONTEMPT OF COURT                               1
DD         DOMESTIC DISPUTE                                8
GC8634     CURFEW/ETC VIOLATION DURING LOCAL EMERGENCY     1
HN305      SINK/ETC VESSEL/ETC                             1
HN655 (B)  USE WATERCRAFT WHILE UNDER INFLUENCE            1
OAR        ASSIST OUTSIDE AGENCY                           8
OP         OPERTATIONS PLAN                                1
PROB       PROBATION HOLD                                  1
SC207      SC KIDNAPPING                                   1
SC211      SC ROBBERY - OTHER DANGEROUS WEAPON             1
ZZ65000    LOCAL ORDINANCE VIOL                            1
Name: _arrest_id, dtype: int64

### Code and 'meta,' e.g. 'attempted'

In [8]:
df[['_code', '_meta_code']] = df.apply(lambda x: arrest.Charge.parse_meta(
    x['_code'], x['_code_type']), axis=1, result_type='expand')

In [9]:
df[df['_meta_code'] != ''][['_original_charge_code',
                            '_original_charge_description', '_code', '_meta_code']].head()

Unnamed: 0,_original_charge_code,_original_charge_description,_code,_meta_code
351,PC666,PETTY THEFT W/PRIOR JAIL TERM FOR SPECIFIC OFF...,,666
362,PC484 (A)/666,THEFT/PETTY THEFT W/PRIOR,484 (A),666
434,PC666,PETTY THEFT W/PRIOR JAIL TERM FOR SPECIFIC OFF...,,666
559,PC664 /484,ATTEMPT THEFT,484,664
671,PC484 (A)/666,THEFT/PETTY THEFT W/PRIOR,484 (A),666


### Idiosyncratic modification

#### `PC` transposed

In [10]:
df.loc[df['_original_charge_code'] == 'CP1209', '_code_type'] = 'PC'
df.loc[df['_original_charge_code'] == 'CP1209', '_section'] = '1209'

### Section and subparts

Reference for determining section subpart nomenclature: https://leginfo.legislature.ca.gov/faces/codes_displaySection.xhtml?lawCode=PEN&sectionNum=667 and https://leginfo.legislature.ca.gov/faces/codes_displaySection.xhtml?lawCode=PEN&sectionNum=490.2.&highlight=true&keyword=clause+subparagraph

Unfortunately, this is sometimes inaccurate, but it's the best system I could come up with!

PC 490.2:

> Notwithstanding Section 487 or any other provision of law defining grand theft, obtaining any property by theft where the value [...] does not exceed nine hundred fifty dollars ($950) shall be considered petty theft and shall be punished as a misdemeanor, except that such person may instead be punished pursuant to subdivision (h) of Section 1170 if that person has one or more prior convictions for an offense specified in **clause (iv) of subparagraph (C) of paragraph (2) of subdivision (e) of Section 667** or for an offense requiring registration pursuant to subdivision (c) of Section 290.

> clause (iv) of subparagraph (C) of paragraph (2) of subdivision (e) of Section 667 translates to:

| section | subdivision | paragraph | subparagraph | clause |
|---------|-------------|-----------|--------------|--------|
| 667     | (e)         | (2)       | (C)          | (iv)   |

From California Style Manual 4e, p. 50:

> A potential for citational imprecision results from the various styles used over the years in numbering or designating code sections and subdivisions. Subdivisions are generally designated alphabetically by lowercase letters in parentheses, but numerical designations are sometimes used, and some designations may use words rather than figures for numbers. Likewise, the Legislature has not always used parentheses in designating subdivisions. And not all alphabetically or numerically designated enumerations within sections are characterized as subdivisions. For example, enumerations of material encompassing only a portion of the substantive content are not subdivisions (see, e.g., Civ. Code, § 47, subd. (b)).

In [11]:
df['_code'] = df['_code'].str.replace('\s', '', regex=True)

In [None]:
df[['_section', '_subdivision', '_paragraph', '_subparagraph', '_clause']] = df.apply(
    lambda x: arrest.Charge.parse_section_and_subparts(
        x['_code'], arrests=True),
    axis=1,
    result_type='expand',
)

#### Reconstruct charge code from normalized data

In [None]:
df['_charge_reconstructed'] = df['_code_type'].str.cat(
    df[['_section', '_subdivision', '_paragraph', '_subparagraph', '_clause']], sep='', na_rep='')

#### Normalize offense level (`_level`) data

In [None]:
df['_offense_level'] = df['PerSta_Severity'].replace(
    {'FELONY': 'F', 'MISDEMEANOR': 'M', 'INFRACTION': 'I'})

## Separate charges by `_code_type` for processing

In [None]:
municipal_codes = set(['OM'])
statute_codes = set(['HS', 'PC', 'BP', 'VC', 'WI', 'PU', 'FG', 'US'])
warrant_codes = set(['O/W', 'B/W'])

expected_codes = municipal_codes | statute_codes | warrant_codes
expected_codes

{'B/W', 'BP', 'FG', 'HS', 'O/W', 'OM', 'PC', 'PU', 'US', 'VC', 'WI'}

### Warrant charges without codes


In [None]:
warrant_charge_df = df[df['_code_type'].isin(warrant_codes)].copy()

In [None]:
warrant_charge_df['_code'] = warrant_charge_df['_code_type'] + \
    warrant_charge_df['_code']

In [None]:
warrant_charge_df[['_code_type', '_code', 'PerSta_Severity']].drop_duplicates()

Unnamed: 0,_code_type,_code,PerSta_Severity
0,B/W,B/W-FEL,FELONY
1,B/W,B/W-MISD,MISDEMEANOR
280,O/W,O/W-FEL,FELONY
543,O/W,O/W-MISD,MISDEMEANOR
18225,B/W,B/W-FEL,MISDEMEANOR
61593,B/W,B/W-MISD,FELONY
70058,B/W,B/W-MISD,INFRACTION
82313,O/W,O/W-MISD,FELONY
87382,O/W,O/W-MISD,INFRACTION


### Municipal charges

In [None]:
municipal_charge_df = df[df['_code_type'].isin(municipal_codes)].copy()

### State and federal charges

In [None]:
statute_charge_df = df[df['_code_type'].isin(statute_codes)].copy()

### Charges without `_code_type`

See [Ambiguous](#Ambiguous)

In [None]:
ambiguous_charge_df = df[~df['_code_type'].isin(expected_codes)].copy()

### Check length of individual charge `df`s against length of original df

In [None]:
(len(statute_charge_df)+len(ambiguous_charge_df) +
 len(municipal_charge_df)+len(warrant_charge_df))-len(df)

0

## Process charges

### Municipal

Are `PerSta_Statute_Desc` values unique to `_charge_reconstructed` values?

In [None]:
municipal_charge_df[municipal_charge_df.groupby('_charge_reconstructed', dropna=False)[
    '_original_charge_description'].transform('nunique') > 1]

Unnamed: 0,_person_id,_arrest_id,Arr_DateTime,Arr_Event_Nbr,Arr_Beat,EvnPer_Assoc,PerSta_Severity,_original_charge_code,_original_charge_description,_code_type,_code,_meta_code,_section,_subdivision,_paragraph,_subparagraph,_clause,_charge_reconstructed,_offense_level


Because they are, I don't need to retrieve descriptions for the charges.

In [None]:
municipal_charge_df['_municipal'] = True

### State and federal statute

In [None]:
statute_data = pd.read_json(
    '../../01_inputs/processed/c01_chsoff.json')

In [None]:
statute_data.head(2)

Unnamed: 0,_original_code,_code_type,_offense_level,_section,_charge_reconstructed,_chsoff_violent,_enactment_data,StatuteLiteral,EnactDate,RepealAmendDate
0,104495(D),HS,I,104495,HS104495(D),False,"[{'EnactDate': '2017-01-01T00:00:00Z', 'Repeal...","NO TOBACCO:250FT YTH SPRT, RETALIATE:FOR COMPL...",,
1,11175,HS,M,11175,HS11175,False,"[{'EnactDate': '1977-01-01T00:00:00Z', 'Repeal...","POSS/ETC NONCONFOR PRESC, ILL C/SUB PRESC:OBT ...",,


In [None]:
df_with_statute = pd.merge(
    statute_charge_df,
    statute_data,
    how='left',
    on=[
        '_code_type',
        '_section',
        '_charge_reconstructed',
        '_offense_level',
    ],
    validate='m:1',
)

In [None]:
matched_df_with_statute = df_with_statute[df_with_statute['StatuteLiteral'].notnull(
)].copy()

#### Separate unmatched for joining irrespective of `_offense_level`

Sometimes a person is charged with e.g. a felony per a code that is only chargeable as a misdemeanor. I want to match the charge code on the descriptions per the Office of the Attorney General regardless, and keep track of incongruities between the recorded offense level and the offense levels chargeable.

In [None]:
unmatched_df_with_statute = df_with_statute[
    df_with_statute['StatuteLiteral'].isnull()
][statute_charge_df.columns].copy()

In [None]:
unmatched_df_with_statute.groupby(['_code_type', '_section', '_charge_reconstructed'], dropna=False)[
    '_arrest_id'].count().sort_values().reset_index()

Unnamed: 0,_code_type,_section,_charge_reconstructed,_arrest_id
0,BP,4326,BP4326(A),1
1,PC,667.5,PC667.5(A),1
2,PC,241,PC241(243),1
3,PC,22610,PC22610(A),1
4,PC,853.8,PC853.8,1
...,...,...,...,...
215,PC,602,PC602(L),292
216,PC,484,PC484(A),392
217,PC,148,PC148(A),813
218,PC,1203.2,PC1203.2(A),2623


In [None]:
statute_data_ignoring_level = pd.read_json(
    '../../01_inputs/processed/c01_chsoff_ignoring_level.json')

In [None]:
unmatched_df_with_statute.columns.intersection(
    statute_data_ignoring_level.columns)

Index(['_code_type', '_section', '_charge_reconstructed'], dtype='object')

In [None]:
df_with_statute_without_level = pd.merge(
    unmatched_df_with_statute, statute_data_ignoring_level, how='left', validate='m:1')

In [None]:
matched_df_with_statute_without_level = df_with_statute_without_level[
    df_with_statute_without_level['StatuteLiteral'].notnull()
].copy()

In [None]:
matched_df_with_statute_without_level['_incongruity'] = 'offense level'

#### Separate unmatched for joining irrespective of both level and subparts

In [None]:
unmatched_df_with_statute_without_level = df_with_statute_without_level[
    df_with_statute_without_level['StatuteLiteral'].isnull()
][statute_charge_df.columns].copy()

There are also sometimes typos or absences of subparts for some charge codes. In the latter case, it's often because a charge corresponds to the first subpart of a particular section. Here are the most common examples:

In [None]:
unmatched_df_with_statute_without_level.groupby(['_code_type', '_code'])[
    '_arrest_id'].nunique().sort_values()

_code_type  _code      
BP          4326(A)          1
PC          12031.5(A)       1
            12101(A)         1
            12552            1
            12553(A)(1)      1
                          ... 
            594(B)(4)      120
                           147
            166.4          192
            602(L)         289
            148(A)         793
Name: _arrest_id, Length: 137, dtype: int64

***
PC 148 is the most common charge that didn't match on subparts.

Directly from [https://leginfo.legislature.ca.gov](https://leginfo.legislature.ca.gov/faces/codes_displaySection.xhtml?lawCode=PEN&sectionNum=148):

> **148.**
>
> (a) (1) Every person who willfully resists, delays, or obstructs any public officer, peace officer, or an emergency medical technician, as defined in Division 2.5 (commencing with Section 1797) of the Health and Safety Code, in the discharge or attempt to discharge any duty of his or her office or employment, when no other punishment is prescribed, shall be punished by a fine not exceeding one thousand dollars ($1,000), or by imprisonment in a county jail not to exceed one year, or by both that fine and imprisonment.

In this case, the subdivision was present, but the code itself has no "148(A)" alone; it also always has a paragraph code ("(1)" or "(2)" according to the OAG data):

In [None]:
statute_data[
    (statute_data['_original_code'].str.contains('148(A)', regex=False))
    & (statute_data['_code_type'] == 'PC')
][
    [
        '_original_code',
        '_code_type',
        '_offense_level',
        '_section',
        'StatuteLiteral',
    ]
]

Unnamed: 0,_original_code,_code_type,_offense_level,_section,StatuteLiteral
1991,148(A)(1),PC,M,148,OBSTRUCT/ETC PUB OFCR/ETC
1992,148(A)(2),PC,M,148,OBSTR/ETC PUB OFC:RADIO


In [None]:
statute_data_ignoring_subparts = pd.read_json(
    '../../01_inputs/processed/c01_chsoff_ignoring_subparts.json')

In [None]:
df_with_statute_without_subparts = pd.merge(
    unmatched_df_with_statute_without_level,
    statute_data_ignoring_subparts,
    how='left',
    on=['_code_type', '_section', '_offense_level'],
    suffixes=('', '_chsoff'),
    validate='m:1',
)

In [None]:
df_with_statute_without_subparts.columns

Index(['_person_id', '_arrest_id', 'Arr_DateTime', 'Arr_Event_Nbr', 'Arr_Beat',
       'EvnPer_Assoc', 'PerSta_Severity', '_original_charge_code',
       '_original_charge_description', '_code_type', '_code', '_meta_code',
       '_section', '_subdivision', '_paragraph', '_subparagraph', '_clause',
       '_charge_reconstructed', '_offense_level', '_original_code',
       '_subdivision_chsoff', '_paragraph_chsoff', '_subparagraph_chsoff',
       '_clause_chsoff', '_charge_reconstructed_chsoff', '_chsoff_violent',
       '_enactment_data', 'StatuteLiteral', 'EnactDate', 'RepealAmendDate',
       'statute_object', '_level_data', '_level_and_subpart_data',
       '_level_quantified'],
      dtype='object')

In [None]:
df_with_statute_without_subparts.loc[df_with_statute_without_subparts['StatuteLiteral'].notnull(
), '_incongruity'] = 'subparts'

In [None]:
df_with_statute_without_subparts.loc[(df_with_statute_without_subparts['StatuteLiteral'].notnull(
)) & (df_with_statute_without_subparts['_offense_level'] == ''), '_incongruity'] = 'subparts and offennse level'

In [None]:
processed_statute_charge_df = pd.concat([matched_df_with_statute_without_level, matched_df_with_statute,
                                        df_with_statute_without_subparts], ignore_index=True)

In [None]:
set(processed_statute_charge_df['_arrest_id']) == set(
    statute_charge_df['_arrest_id'])

True

In [None]:
len(processed_statute_charge_df['_arrest_id']) == len(
    statute_charge_df['_arrest_id'])

True

### Ambiguous

In [None]:
ambiguous_charge_df.groupby(['_code_type', '_section', '_original_charge_description'], dropna=False)[
    '_arrest_id'].nunique().sort_values()

_code_type  _section  _original_charge_description               
                      CURFEW/ETC VIOLATION DURING LOCAL EMERGENCY     1
                      LOCAL ORDINANCE VIOL                            1
                      OPERTATIONS PLAN                                1
                      PROBATION HOLD                                  1
                      SC KIDNAPPING                                   1
                      SC ROBBERY - OTHER DANGEROUS WEAPON             1
                      SINK/ETC VESSEL/ETC                             1
                      USE WATERCRAFT WHILE UNDER INFLUENCE            1
                      ASSIST OUTSIDE AGENCY                           8
                      DOMESTIC DISPUTE                                8
                      ATTEMPT SUICIDE                                11
Name: _arrest_id, dtype: int64

## Combine processed `df`s

In [48]:
processed_df = pd.concat(
    [municipal_charge_df, processed_statute_charge_df,
        warrant_charge_df, ambiguous_charge_df],
    ignore_index=True,
)

In [49]:
set(processed_df['_arrest_id']) == set(df['_arrest_id'])

True

In [50]:
len(processed_df) == len(df)

True

In [51]:
processed_df['_municipal'].fillna(False, inplace=True)

In [52]:
processed_df['_charge_description'] = processed_df['StatuteLiteral'].fillna(
    processed_df['_original_charge_description'])

In [53]:
processed_df['_charge_description'] = processed_df['_charge_description'].apply(
    lambda x: x.upper() if type(x) is str else x)

In [54]:
processed_df['_charge_reconstructed'].fillna('', inplace=True)

## Violent

In [55]:
def generate_expression(code):
    look_behind = '(?<!\d|\.)'
    look_ahead = '(?!\d)'
    escaped = re.sub('\.', '\.', code)
    group = look_behind + escaped + look_ahead
    return group

In [56]:
expression_list = [generate_expression(
    x) for x in law.California.person_crime_sections]

In [57]:
expression = '|'.join(expression_list)

In [58]:
def detect_violent_crime(offense, pattern):
    check = re.findall(pattern, offense)
    if len(check) > 0:
        return check[0]
    else:
        return ''

In [59]:
processed_df['check'] = processed_df['_original_charge_code'].apply(
    lambda x: detect_violent_crime(x, expression))

In [60]:
processed_df[processed_df['check'] != ''].groupby(['_original_charge_code'], dropna=False)[
    '_arrest_id'].nunique().sort_values()

_original_charge_code
SC211                1
PC289 (D)(3)         1
PC243.1              1
PC243.2(A)(1)        1
PC243.6              1
                 ...  
PC273.5           1510
PC211             1938
PC242             2131
PC245 (A)(1)      2967
PC243 (E)(1)     12039
Name: _arrest_id, Length: 203, dtype: int64

In [61]:
processed_df[processed_df['check'] != ''].groupby(
    [
        '_charge_reconstructed',
        '_original_charge_code',
        '_charge_description',
        '_original_charge_description',
    ]
).agg(oakland_arrests=('_arrest_id', 'nunique')).to_clipboard()

In [62]:
processed_df['_violent'] = (processed_df['check'] != '') | (
    processed_df['_chsoff_violent'] == True)

In [63]:
processed_df[processed_df['_violent'] == True]['_chsoff_violent'].unique()

array([nan, True], dtype=object)

In [64]:
processed_df[(processed_df['_chsoff_violent'] == False)
             & (processed_df['_violent'] == True)]

Unnamed: 0,_person_id,_arrest_id,Arr_DateTime,Arr_Event_Nbr,Arr_Beat,EvnPer_Assoc,PerSta_Severity,_original_charge_code,_original_charge_description,_code_type,...,_subparagraph_chsoff,_clause_chsoff,_charge_reconstructed_chsoff,statute_object,_level_data,_level_and_subpart_data,_level_quantified,_charge_description,check,_violent


In [65]:
processed_df[(processed_df['_chsoff_violent'] == True) & (
    processed_df['_violent'] == False)]['_charge_reconstructed'].unique()

array([], dtype=object)

### Check for false negatives via `_descriptions` field

In [66]:
processed_df[
    (processed_df['_violent'] == False)
    & (processed_df['_charge_description'].str.contains('ASSAULT'))
].groupby(
    [
        '_code_type',
        '_original_charge_code',
        '_charge_reconstructed',
        '_charge_description',
    ]
)[
    '_arrest_id'
].nunique().sort_values().reset_index()

Unnamed: 0,_code_type,_original_charge_code,_charge_reconstructed,_charge_description,_arrest_id
0,PC,PC12280 (A)(1),PC12280(A)(1),MAKE ANY ASSAULT WEAPON,22
1,PC,PC12280 (B),PC12280(B),ILLEGAL POSS ASSAULT WPN,47
2,PC,PC30600 (A),PC30600(A),MFG/SELL/ETC ASSAULT WPN,75
3,PC,PC30605 (A),PC30605(A),ILL POSS ASSAULT WPN,232


### Check for false positives via `_descriptions` field

In [67]:
processed_df[processed_df['_violent'] == True].groupby(
    [
        '_code_type',
        '_original_charge_code',
        '_charge_reconstructed',
        '_charge_description',
    ]
)[
    '_arrest_id'
].nunique().sort_values().reset_index()

Unnamed: 0,_code_type,_original_charge_code,_charge_reconstructed,_charge_description,_arrest_id
0,,SC207,,SC KIDNAPPING,1
1,PC,PC26100 (D),PC26100(D),DISCHARGE F/A FROM VEH,1
2,PC,PC261.5(D),PC261.5(D),SEX W/MINOR -16:PERP 21+,1
3,PC,PC261 (A)(5),PC261(A)(5),RAPE:VICT KNOWS PERSON,1
4,PC,PC261 (A)(4)(A),PC261(A)(4)(A),RAPE:VICT UNCONSCIOUS,1
...,...,...,...,...,...
236,PC,PC273.5,PC273.5,INFLICT CRPL INJ SP/COHAB,1510
237,PC,PC211,PC211,"ROBBERY:SECOND DEGREE, ROBBERY:FIRST DEGREE, R...",1938
238,PC,PC242,PC242,"BATTERY ON PERSON, BAT:SPOUSE/EX SP/DATE/ETC, ...",2072
239,PC,PC245 (A)(1),PC245(A)(1),"FORCE/ADW NOT FIREARM:GBI, ADW NOT FIREARM",2750


In [68]:
processed_df[processed_df['_charge_reconstructed'].str.contains('^PC460', regex=True)].groupby(
    [
        '_code_type',
        '_original_charge_code',
        '_charge_reconstructed',
        '_charge_description',
        '_violent', 'check'
    ]
)[
    '_arrest_id'
].nunique().sort_values().reset_index()

Unnamed: 0,_code_type,_original_charge_code,_charge_reconstructed,_charge_description,_violent,check,_arrest_id
0,PC,PC460(B),PC460(B),BURGLARY:SECOND DEGREE,False,,1
1,PC,PC460 (B),PC460(B),BURGLARY:SECOND DEGREE,False,,7
2,PC,PC460 (A),PC460(A),BURGLARY:FIRST DEGREE,True,,8


## Warrants

'PC1551', 'WI663', 'PC853.8', 'PC978.5',

In [69]:
warrant_codes = law.California.warrant_codes + \
    ['B/W', 'O/W']

In [70]:
processed_df['_warrant'] = (
    processed_df['_charge_reconstructed'].isin(warrant_codes))

In [71]:
processed_df[processed_df['_warrant'] == True]['_code_type'].unique()

array(['PC', 'B/W', 'O/W'], dtype=object)

### Check for false negatives

In [72]:
processed_df[(processed_df['_warrant'] == False) & (
    processed_df['_original_charge_description'].str.contains('WARRANT'))].groupby(
    ['_charge_reconstructed', '_charge_description'], dropna=False)['_arrest_id'].count()

_charge_reconstructed  _charge_description    
WI663                  WARRANT OF ARREST:MINOR    2
Name: _arrest_id, dtype: int64

### Check for false positives via `_descriptions` field

In [73]:
processed_df[processed_df['_warrant'] == True].groupby(
    ['_charge_reconstructed', '_charge_description'], dropna=False)['_arrest_id'].count()

_charge_reconstructed  _charge_description              
B/W                    FELONY BENCH WARRANT - LOCAL          8852
                       MISDEMEANOR BENCH WARRANT - LOCAL    13469
O/W                    OUTSIDE WARRANT - FELONY              7147
                       OUTSIDE WARRANT - MISDEMEANOR         4674
PC1551                 FUG JUST:WARRANT ARREST                 14
PC853.8                FAILURE TO APPEAR WARRANT                1
PC978.5                BENCH WARRANT:FTA:FELONY                 1
                       BENCH WARRANT:FTA:MISD                  30
Name: _arrest_id, dtype: int64

## FTA

In [74]:
processed_df['_fta'] = processed_df['_charge_reconstructed'].isin(
    law.California.fta_codes)

### Check for false negatives

In [75]:
processed_df[(processed_df['_fta'] == False) & (
    processed_df['_original_charge_description'].str.contains('FTA|APPEAR', regex=True))].groupby(
    ['_charge_reconstructed', '_charge_description', ], dropna=False)['_arrest_id'].count()

Series([], Name: _arrest_id, dtype: int64)

Note that `40302(B)` is not a failure to appear after written promise (`VEH 40508(A)`. Per [leginfo.legislature.ca.gov](https://leginfo.legislature.ca.gov/faces/codes_displaySection.xhtml?lawCode=VEH&sectionNum=40302):
>**40302.**
>
>Whenever any person is arrested for any violation of this code, not declared to be a felony, the arrested person shall be taken without unnecessary delay before a magistrate within the county in which the offense charged is alleged to have been committed and who has jurisdiction of the offense and is nearest or most accessible with reference to the place where the arrest is made in any of the following cases:
>
>[...]
>
>(b) When the person arrested refuses to give his or her written promise to appear in court.

### Check for false positives

In [76]:
processed_df[processed_df['_fta'] == True].groupby(
    ['_charge_reconstructed', '_charge_description'], dropna=False)['_arrest_id'].count()

_charge_reconstructed  _charge_description      
PC1320(A)              FTA:MISDEMEANOR CHARGE          1
PC1331                 FTA AS WITNESS                  1
PC166(A)(4)            CONTEMPT:DISOBEY CRT ORDR    1722
PC853.7                FTA AFTER WRITTEN PROMISE      89
PC853.8                FAILURE TO APPEAR WARRANT       1
PC978.5                BENCH WARRANT:FTA:FELONY        1
                       BENCH WARRANT:FTA:MISD         30
VC40508(A)             FAIL T/APPEAR:WRIT PROMIS     113
Name: _arrest_id, dtype: int64

## Supervision

In [77]:
processed_df['_supervision'] = (processed_df['_charge_reconstructed'].isin(
    law.California.supervision_codes)) & (processed_df['_code_type'] == 'PC')

### Check for false negatives

In [78]:
processed_df[(processed_df['_supervision'] == False) & (
    processed_df['_original_charge_description'].str.contains('PAROLE|PROB|PRCS|FLASH'))].groupby(
    ['_charge_reconstructed', '_charge_description', ], dropna=False)['_arrest_id'].nunique().sort_values().reset_index()

Unnamed: 0,_charge_reconstructed,_charge_description,_arrest_id
0,,PROBATION HOLD,1
1,PC3060,SUSPEND/REVOKE PAROLE,2
2,PC12021(D),FELN/ADD/ETC POSS FIREARM,3
3,PC1203.073,A PERSON CONVICTED OF A FELONY SPECIFIED IN SU...,5
4,WI1767.3,"ESCAPE CUSTDY JUV FACILTY, REVOKE/ETC PAROLE:CYA",5
5,PC1203.016(C),REAREST:ELEC MONITOR PROB,6
6,WI777,MOD COURT ORD:COMMIT/PROB,7
7,PC29815(A),CONV PRSN POSS/ETC F/ARM,82
8,PC1203.3,PROBATION REVOKED/ETC,243


### Check for false positives

In [79]:
processed_df[processed_df['_supervision'] == True].groupby(
    ['_charge_reconstructed', '_charge_description'], dropna=False)['_arrest_id'].count()

_charge_reconstructed  _charge_description                      
PC1203.2               PROB VIOL:REARREST/REVOKE                    7545
PC1203.2(A)            PROB VIOL:REAREST/REVOKE                     2623
PC3000.08              FLASH INCARCERATION, VIOLATION OF PAROLE       25
PC3056                 VIOLATION OF PAROLE:FEL                      2333
PC3455                 PRCS VIOLATION                                103
PC3455(A)              REVOKE/TERMINATE POST-RELEASE SUPERVISION      33
Name: _arrest_id, dtype: int64

## Felony

In [80]:
processed_df['_felony'] = processed_df['_offense_level'] == 'F'

In [81]:
processed_df.columns

Index(['_person_id', '_arrest_id', 'Arr_DateTime', 'Arr_Event_Nbr', 'Arr_Beat',
       'EvnPer_Assoc', 'PerSta_Severity', '_original_charge_code',
       '_original_charge_description', '_code_type', '_code', '_meta_code',
       '_section', '_subdivision', '_paragraph', '_subparagraph', '_clause',
       '_charge_reconstructed', '_offense_level', '_municipal',
       '_data_per_charge_level', '_potential_offense_levels', 'StatuteLiteral',
       '_incongruity', '_original_code', '_chsoff_violent', '_enactment_data',
       'EnactDate', 'RepealAmendDate', '_subdivision_chsoff',
       '_paragraph_chsoff', '_subparagraph_chsoff', '_clause_chsoff',
       '_charge_reconstructed_chsoff', 'statute_object', '_level_data',
       '_level_and_subpart_data', '_level_quantified', '_charge_description',
       'check', '_violent', '_warrant', '_fta', '_supervision', '_felony'],
      dtype='object')

In [82]:
def flag_level_incongruity(row):
    if type(row['_potential_offense_levels']) is str:
        if re.search(row['_offense_level'], row['_potential_offense_levels']):
            return True
        else:
            return False
    else:
        return np.nan

In [83]:
processed_df['_levels_congruent'] = processed_df.apply(
    lambda x: flag_level_incongruity(x), axis=1)

In [84]:
processed_df.loc[processed_df['_felony'] == True,
                 '_code_type_of_felony'] = processed_df['_code_type']

## Federal

In [85]:
processed_df['_federal'] = processed_df['_code_type'] == 'US'

In [86]:
processed_df[processed_df['_federal'] == True]

Unnamed: 0,_person_id,_arrest_id,Arr_DateTime,Arr_Event_Nbr,Arr_Beat,EvnPer_Assoc,PerSta_Severity,_original_charge_code,_original_charge_description,_code_type,...,_charge_description,check,_violent,_warrant,_fta,_supervision,_felony,_levels_congruent,_code_type_of_felony,_federal
119176,5Z2H,5Z2H_2012-03-13 13:30:00,2012-03-13 13:30:00,ARR12-002591,,ARRESTEE,FELONY,US21 841(A)(1),MANUFACTURE/DISTRIBUTE/ETC CONTROL SUBSTANCE,US,...,MANUFACTURE/DISTRIBUTE/ETC CONTROL SUBSTANCE,,False,False,False,False,True,,US,True
119177,5Z2H,5Z2H_2012-03-13 13:30:00,2012-03-13 13:30:00,ARR12-002591,,ARRESTEE,FELONY,US21 846,ATTEMPT/CONSPIRACY:DRUGS,US,...,ATTEMPT/CONSPIRACY:DRUGS,,False,False,False,False,True,,US,True
119206,5HNU,5HNU_2012-03-30 12:30:00,2012-03-30 12:30:00,ARR12-013325,11X,ARRESTEE,FELONY,US18 922 (G),FELON/ETC REC/ETC FIREARM/AMMO,US,...,FELON/ETC REC/ETC FIREARM/AMMO,,False,False,False,False,True,,US,True
119207,5HNU,5HNU_2012-03-30 12:30:00,2012-03-30 12:30:00,ARR12-013325,11X,ARRESTEE,FELONY,US18 924(C),KNOWINGLY IMPORT/BRING IN FIREARM/AMMUN IN US,US,...,KNOWINGLY IMPORT/BRING IN FIREARM/AMMUN IN US,,False,False,False,False,True,,US,True
119298,592D,592D_2012-07-08 17:33:00,2012-07-08 17:33:00,ARR12-124743,26X,ARRESTEE,FELONY,US18 472,ILLEGAL POSSESSION/ETC FALSE U S DOCUMENT,US,...,ILLEGAL POSSESSION/ETC FALSE U S DOCUMENT,,False,False,False,False,True,,US,True
119882,385J,385J_2015-08-27 11:05:00,2015-08-27 11:05:00,ARR15-043869,30X,ARRESTEE,FELONY,US21 841(A)(1),MANUFACTURE/DISTRIBUTE/ETC CONTROL SUBSTANCE,US,...,MANUFACTURE/DISTRIBUTE/ETC CONTROL SUBSTANCE,,False,False,False,False,True,,US,True
120237,5DRJ,5DRJ_2018-01-23 10:45:00,2018-01-23 10:45:00,ARR18-000894,99X,ARRESTEE,FELONY,US21 841(A)(1),MANUFACTURE/DISTRIBUTE/ETC CONTROL SUBSTANCE,US,...,MANUFACTURE/DISTRIBUTE/ETC CONTROL SUBSTANCE,,False,False,False,False,True,,US,True
120447,5U2W,5U2W_2019-08-08 15:00:00,2019-08-08 15:00:00,ARR19-005896,77X,ARRESTEE,FELONY,US21 841(A)(1),MANUFACTURE/DISTRIBUTE/ETC CONTROL SUBSTANCE,US,...,MANUFACTURE/DISTRIBUTE/ETC CONTROL SUBSTANCE,,False,False,False,False,True,,US,True
120448,5U2W,5U2W_2019-08-08 15:00:00,2019-08-08 15:00:00,ARR19-005903,77X,ARRESTEE,FELONY,US21 841(A)(1),MANUFACTURE/DISTRIBUTE/ETC CONTROL SUBSTANCE,US,...,MANUFACTURE/DISTRIBUTE/ETC CONTROL SUBSTANCE,,False,False,False,False,True,,US,True


## PCS

In [87]:
processed_df['_pcs'] = (processed_df['_code_type'] == 'HS') & (
    processed_df['_charge_reconstructed'].isin(law.California.pcs_codes))

### Check for false negatives

In [88]:
processed_df[(processed_df['_pcs'] == False) & (processed_df['_code_type'] == 'HS')].groupby(
    ['_section', '_charge_reconstructed', '_charge_description'])['_arrest_id'].nunique().sort_values().reset_index().tail(10)

Unnamed: 0,_section,_charge_reconstructed,_charge_description,_arrest_id
58,11366.0,HS11366,KEP PLACE:SEL/ETC N/C/SUB,41
59,11379.0,HS11379,TRANSP/ETC CNTL SUB,67
60,11359.0,HS11359(B),POSS MARIJUANA FOR SALE,114
61,11360.0,HS11360(A),"SELL MARIJUANA, SELL/TRNSP/ETC MARIJUANA",490
62,11370.1,HS11370.1(A),POSS CNTL SUB WHILE ARMED,537
63,11378.0,HS11378,POSS CNTL SUB FOR SALE,680
64,11352.0,HS11352(A),TRANSP/SELL NARC/CNTL SUB,775
65,11351.0,HS11351,POS/PUR F/SALE NARC/C/SUB,1091
66,11351.5,HS11351.5,POSS/PUR COKE BASE F/SALE,1188
67,11359.0,HS11359,POSS MARIJUANA FOR SALE,1889


In [89]:
processed_df[(processed_df['_pcs'] == False) & (processed_df['_code_type'] != 'HS') & (processed_df['_charge_description'].str.contains('POSS'))].groupby(
    ['_code_type', '_section', '_charge_reconstructed', '_charge_description'])['_arrest_id'].nunique().sort_values().reset_index().tail(20)

Unnamed: 0,_code_type,_section,_charge_reconstructed,_charge_description,_arrest_id
69,PC,31360,PC31360(A),VIOL FEL POSS BODY ARMOR,19
70,PC,22810,PC22810(A),UNLAWFL POSS/USE TEAR GAS,20
71,PC,311.11,PC311.11(A),POSS/ETC OBS MTR:MNR:SEX,21
72,BP,25620,BP25620,POSS OPEN ALCOHOL:PUBLIC,24
73,PC,22610,PC22610(A),FELON POSS/ETC STUN GUN,30
74,PC,32625,PC32625(A),POSSESS/ETC MACHINEGUN,42
75,PC,23920,PC23920,POSS/ETC F/ARM W/O ID MRK,47
76,PC,12280,PC12280(B),ILLEGAL POSS ASSAULT WPN,47
77,PC,245,PC245(A)(4),ADW/FORCE:POSSIBLE GBI,59
78,PC,29805,PC29805(A),POSS F/ARM BY CONV MISD,64


### Check for false positives

In [90]:
processed_df[processed_df['_pcs'] == True].groupby(
    ['_charge_reconstructed', '_charge_description'], dropna=False)['_arrest_id'].count()

_charge_reconstructed  _charge_description                                               
HS11350                POSSESS NARCOTIC CNTL SUB                                                2
                       SIMPLE POSSESSION OF HEROIN, COCAINE, LISTED CONTROLLED SUBSTANCES       1
HS11350(A)             POSSESS NARCOTIC CNTL SUB                                             3922
HS11357(A)             POS CONCENTRATED CANNABIS                                               92
                       POSS CONCENTRATE CANNABIS                                               54
                       POSS MARIJUANA 28.5- GRMS                                                1
HS11357(B)             POSS MARIJ OVER 28.5 GRAM                                              262
HS11358                PLANT/ETC MARIJUANA                                                    128
HS11362.4(B)           SMOKE MARIJ TOBACO PROHIB                                                1
HS11364                CNTL 

## Disorder

In [91]:
processed_df['_disorder'] = (
    processed_df['_charge_reconstructed'].isin(
        ['PC647', 'PC647(E)', 'PC647(F)', 'PC647(C)', 'PC647C', 'PC647(G)',
         'PC647(H)', 'PC647(I)', 'PC647(J)']
    )
) & (~processed_df['_original_charge_description'].isin(['DISORDERLY CONDUCT:PEEK INTO INHABITED BUILDING (AMENDED)',
                                                         'INVADE PRIVACY:PEEK THROUGH HOLE IN BATHROOM (AMENDED)', 'FELONY PROSTITUTION']))

### Check for false negatives

In [92]:
processed_df[(processed_df['_section'].str.contains('^647', regex=True)) & (processed_df['_disorder'] == False)].groupby(
    ['_section', '_charge_reconstructed', '_original_charge_code', '_original_charge_description'])['_arrest_id'].nunique().sort_values().reset_index()

Unnamed: 0,_section,_charge_reconstructed,_original_charge_code,_original_charge_description,_arrest_id
0,647,PC647(J),PC647 (J),INVADE PRIVACY:PEEK THROUGH HOLE IN BATHROOM (...,1
1,647.6,PC647.6,PC647.6,ANNOY/ETC CHILDREN WITH SPECIFIC PRIOR FELONY ...,1
2,647.6,PC647.6,PC647.6,ANNOY/MOLESTS VIC UNDER 18,1
3,647.6,PC647.6(A)(2),PC647.6 (A)(2),ANNOY/MOLEST VICTIM BELIEVED TO BE UNDER 18 YE...,1
4,647,PC647(F),PC647 F,FELONY PROSTITUTION,2
5,647,PC647(H),PC647 (H),DISORDERLY CONDUCT:PEEK INTO INHABITED BUILDIN...,2
6,647,PC647(D),PC647 (D),DISORDERLY CONDUCT:LOITER IN OR ABOUT TOILET,3
7,647,PC647(K),PC647 (K),INVADE PRIVACY:PEEK THROUGH HOLE IN BATHROOM (...,3
8,647.6,PC647.6,PC647.6,ANNOY/MOLEST CHILDREN UNDER 18 (AMENDED),4
9,647.6,PC647.6(A),PC647.6(A),ANNOY/ETC CHILD UNDER 18,4


### Check for false positives

In [93]:
processed_df[processed_df['_disorder'] == True].groupby(
    ['_section', '_charge_reconstructed', '_original_charge_description']
)['_arrest_id'].nunique().sort_values().reset_index()

Unnamed: 0,_section,_charge_reconstructed,_original_charge_description,_arrest_id
0,647,PC647(G),DISORDERLY CONDUCT:LOITER ON PRIVATE PROPERTY ...,1
1,647,PC647(J),DISORDERLY CONDUCT:LODGE WITHOUT CONSENT,2
2,647,PC647,DISORDERLY CONDUCT:UNDER INFLUENCE OF DRUG,3
3,647,PC647(I),DISORDERLY CONDUCT:LODGE W/O CONSENT (AMENDED),4
4,647,PC647(E),DISORDERLY CONDUCT: LODGING IN PUBLIC OR PRIVA...,5
5,647,PC647(H),DISORDERLY CONDUCT:LOITER/ETC PRIVATE PROPERTY,15
6,647C,PC647C,OBSTRUCT PERSON'S MOVEMENT:PUBLIC PLACE,23
7,647,PC647(C),DISORDERLY CONDUCT:BEGGING,52
8,647,PC647(E),DISORDERLY CONDUCT:LOITER/REFUSE TO IDENTIFY SELF,104
9,647,PC647(F),DISORDERLY CONDUCT:INTOX DRUG WITH ALCOHOL,254


## Housing status

In [94]:
person_data = pd.read_csv(
    '../04_outputs/c00_person_data.csv', dtype=str,parse_dates=['Arr_DateTime'], keep_default_na=False)

In [95]:
person_data.loc[person_data['_category'].isin(
    ['unhoused', 'unhoused; unknown']), '_housing_status'] = 'unhoused'

In [96]:
person_data.loc[person_data['_subcategory'] ==
                'no address information', '_housing_status'] = 'no information'

In [97]:
person_data.loc[person_data['_subcategory'].isin(
    ['no address information; unknown', 'unknown', 'po box']), '_housing_status'] = 'unknown'

In [98]:
person_data['_housing_status'].fillna('housed', inplace=True)

## Race

In [99]:
person_data[['_race', '_ethnicity']].drop_duplicates()

Unnamed: 0,_race,_ethnicity
0,BLACK,NOT OF HISPANIC ORIGIN
1,HISPANIC,HISPANIC ORIGIN
14,OTHER,NOT OF HISPANIC ORIGIN
15,WHITE,NOT OF HISPANIC ORIGIN
27,JAPANESE,NOT OF HISPANIC ORIGIN
...,...,...
48694,HAWAIIAN,
51157,LAOTIAN,
69903,CAMBODIAN,HISPANIC ORIGIN
73880,GUAMANIAN,


In [100]:
person_data[person_data['_race'] == 'HISPANIC']['_ethnicity'].unique()

array(['HISPANIC ORIGIN', 'nan', 'NOT OF HISPANIC ORIGIN', '',
       'NOT OF HISPANIC ORIGIN; HISPANIC ORIGIN'], dtype=object)

In [101]:
census_race_dict = {
    'BLACK': 'Black or African American',
    'HISPANIC': 'Other/Unknown',
    'OTHER': 'Other/Unknown',
    'WHITE': 'White',
    'JAPANESE': 'Asian',
    'OTHER ASIAN': 'Asian',
    'CAMBODIAN': 'Asian',
    'LAOTIAN': 'Asian',
    'SAMOAN': 'Native Hawaiian and Other Pacific Islander',
    'VIETNAMESE': 'Asian',
    'PACIFIC ISLANDER': 'Native Hawaiian and Other Pacific Islander',
    'CHINESE': 'Asian',
    'KOREAN': 'Asian',
    'ASIAN INDIAN': 'Asian',
    'FILIPINO': 'Asian',
    'AMERICAN INDIAN': 'American Indian and Alaska Native',
    'nan': 'Other/Unknown',
    '': 'Other/Unknown',
    'GUAMANIAN': 'Native Hawaiian and Other Pacific Islander',
    'HAWAIIAN': 'Native Hawaiian and Other Pacific Islander',
}

In [102]:
person_data['_census_race'] = person_data['_race'].replace(census_race_dict)

In [103]:
person_data.loc[person_data['_race'] == 'HISPANIC',
            '_census_ethnicity'] = 'Hispanic or Latino (of any race)'

In [104]:
person_data['_ethnicity'].unique()

array(['NOT OF HISPANIC ORIGIN', 'HISPANIC ORIGIN', 'nan', '',
       'NOT OF HISPANIC ORIGIN; nan',
       'NOT OF HISPANIC ORIGIN; HISPANIC ORIGIN'], dtype=object)

In [105]:
census_ethnicity_dict = {
    'NOT OF HISPANIC ORIGIN': 'Not Hispanic or Latino',
    'HISPANIC ORIGIN': 'Hispanic or Latino (of any race)',
    'nan': '',
    '': '',
    'NOT OF HISPANIC ORIGIN; nan': 'Not Hispanic or Latino',
    'HISPANIC ORIGIN; NOT OF HISPANIC ORIGIN': 'Unknown',
}

In [106]:
person_data['_census_ethnicity'] = person_data['_ethnicity'].replace(
    census_ethnicity_dict
)

In [107]:
final_df = pd.merge(processed_df, person_data, validate='m:1', how='left')

In [108]:
final_df.rename(columns={'Arr_DateTime': '_arrest_date'},inplace=True)

In [110]:
final_df[['_person_id', '_census_race', '_census_ethnicity',
          '_gender', '_arrest_age', '_housing_status', '_arrest_id', '_arrest_date',
          '_original_charge_code', '_original_charge_description', '_code_type',
          '_section', '_meta_code', '_charge_reconstructed', '_municipal',
          '_offense_level', '_charge_description', '_incongruity', '_violent',
          '_warrant', '_fta', '_supervision', '_felony', '_federal',
          '_potential_offense_levels', '_levels_congruent', '_code_type_of_felony',
          '_pcs', '_disorder', ]].to_csv('../04_outputs/c01_final_df.csv', index=False)