In [1]:
import sys

# append the directory of law module to sys.path list
sys.path.append('../../../modules/')

In [2]:
import random
import re

import charge
import law
import numpy as np
import pandas as pd
import requests

In [3]:
df = pd.read_csv('../04_outputs/c01_final_df.csv',
                 parse_dates=['_arrest_date'], keep_default_na=False)

In [4]:
arrestee_df = df[['_arrest_id', '_arrest_date', '_gender', '_census_race',
                  '_census_ethnicity', '_housing_status']].copy().drop_duplicates()

In [5]:
df.columns

Index(['_person_id', '_census_race', '_census_ethnicity', '_gender',
       '_arrest_age', '_housing_status', '_arrest_id', '_arrest_date',
       '_original_charge_code', '_original_charge_description', '_code_type',
       '_section', '_meta_code', '_charge_reconstructed', '_municipal',
       '_offense_level', '_charge_description', '_incongruity', '_violent',
       '_warrant', '_fta', '_supervision', '_felony', '_federal',
       '_potential_offense_levels', '_levels_congruent',
       '_code_type_of_felony', '_pcs', '_disorder'],
      dtype='object')

In [6]:
help(charge.transform_to_sets)

Help on function transform_to_sets in module charge:

transform_to_sets(df, offense_level_available=True)
    Produce sets of unique values for charges such that each row in the
    resultant dataframe represents a single arrest.
    
    Parameters
    ----------
    df : pd.DataFrame
        DataFrame that should be read in from 04_outputs/ and begin with c01.
    
    offense_level_available : bool
        Whether charge offense level information is available (default True)
    
    Returns
    -------
    pd.DataFrame



Example:

Input

| _arrest_id | _code_type | _section | _charge_description       | _municipal | _offense_level | _felony | _violent |
|------------|------------|----------|---------------------------|------------|----------------|---------|----------|
| A1         | OM         | 9.08.180 | ALCOHOL BEV. ON PUBLIC ST | True       | Infraction     | False   | False    |
| A1         | PC         | 647(F)   | DISORDERLY CONDUCT        | False      | Misdemeanor    | False   | False    |

Output

| _arrest_id | _code_types | _sections        | _charge_descriptions                          | _arrest_type   | _felony           | _violent            |
|------------|-------------|------------------|-----------------------------------------------|----------------|-------------------|---------------------|
| A1         | OM, PC      | 9.08.180, 647(F) | ALCOHOL BEV. ON PUBLIC ST, DISORDERLY CONDUCT | low-level only | no felony charges |  no violent charges |

In [7]:
set_df = charge.transform_to_sets(df, offense_level_available=True)

Charge sets produced. Now working on non-procedural charge sets.
Now merging procedural and non-procedural charge sets.
Now categorizing serious charges.
Done!


In [8]:
count_df = charge.transform_to_counts(df)

Charge counts produced. Now assessing procedural and low-level charges.
Charges assessed. Now transforming dataframe.
Done!


In [9]:
charge_df = pd.merge(count_df, set_df)

In [10]:
arrestee_df = df[['_arrest_id', '_arrest_date', '_gender', '_census_race',
                  '_census_ethnicity', '_housing_status']].copy().drop_duplicates()

In [11]:
arrestee_df[arrestee_df.duplicated(subset=['_arrest_id'])]

Unnamed: 0,_arrest_id,_arrest_date,_gender,_census_race,_census_ethnicity,_housing_status


In [12]:
arrestee_df['_arrest_date'] = pd.to_datetime(arrestee_df['_arrest_date'])

In [13]:
arrestee_df['_arrest_year'] = arrestee_df['_arrest_date'].dt.year

In [14]:
final_df = pd.merge(arrestee_df, charge_df)

In [16]:
final_df.to_csv(
    '../04_outputs/a01_oak_charge_sets.csv', index=False)