# 10. Use-of-force (and other) files

I will just take a look over all UOF files in this kernel.

In [1]:
import collections
import pandas

from cpe_help import list_departments

In [2]:
depts = list_departments()

In [3]:
[dept.full_name for dept in depts]

['11-00091 (Boston, MA)',
 '23-00089 (Lawrence, IN)',
 '24-00013 (Minneapolis, MN)',
 '24-00098 (St. Paul, MN)',
 '35-00016 (Orlando, FL)',
 '35-00103 (Charlotte, NC)',
 '37-00027 (Austin, TX)',
 '37-00049 (Dallas, TX)',
 '49-00009 (Seattle, WA)',
 '49-00033 (Los Angeles, CA)',
 '49-00035 (Oakland, CA)',
 '49-00081 (San Francisco, CA)']

In [4]:
files = []
for dept in depts:
    uofs = [x for x in dept.external_dir.iterdir() if x.is_file()]
    if uofs:
        files.append(uofs[0])
        # 1 per police dept
files

[PosixPath('/amnt/code/cpe/data/departments/11-00091/external/11-00091_Field-Interviews_2011-2015.csv'),
 PosixPath('/amnt/code/cpe/data/departments/23-00089/external/23-00089_UOF-P.csv'),
 PosixPath('/amnt/code/cpe/data/departments/24-00013/external/24-00013_UOF_2008-2017_prepped.csv'),
 PosixPath('/amnt/code/cpe/data/departments/24-00098/external/24-00098_Vehicle-Stops-data.csv'),
 PosixPath('/amnt/code/cpe/data/departments/35-00016/external/35-00016_UOF-OIS-P.csv'),
 PosixPath('/amnt/code/cpe/data/departments/35-00103/external/35-00103_UOF-OIS-P_prepped.csv'),
 PosixPath('/amnt/code/cpe/data/departments/37-00027/external/37-00027_UOF-P_2014-2016_prepped.csv'),
 PosixPath('/amnt/code/cpe/data/departments/37-00049/external/37-00049_UOF-P_2016_prepped.csv'),
 PosixPath('/amnt/code/cpe/data/departments/49-00009/external/49-00009_UOF.csv'),
 PosixPath('/amnt/code/cpe/data/departments/49-00033/external/49-00033_Arrests_2015.csv'),
 PosixPath('/amnt/code/cpe/data/departments/49-00035/exter

In [5]:
sum(file.stat().st_size for file in files) / 10 ** 6

# total size in MB -- okay for memory :D

185.2544

In [6]:
frames = [pandas.read_csv(file, low_memory=False) for file in files]  # mixed dtype warning

# Quick glance

In [7]:
# field interviews
frames[0].head().T

Unnamed: 0,0,1,2,3,4
INCIDENT_UNIQUE_IDENTIFIER,SEQ_NUM,938283,881897,882060,971623
INCIDENT_UNIQUE_IDENTIFIER.1,FIO_ID,621880,565177,565340,653895
SUBJECT_GENDER,SEX,MALE,MALE,MALE,MALE
LOCATION_FULL_STREET_ADDRESS_OR_INTERSECTION,LOCATION,NORMANDY ST at SUPPLE RD,75 BOYLSTON ST,5 TOPLIFF ST,170 MORTON ST
LOCATION_DISTRICT,DIST,B2,A1,C11,B2
LOCATION_DISTRICT.1,DIST_ID,3,1,5,3
INCIDENT_DATE,FIO_DATE,12/10/13 0:00,5/24/12 0:00,6/4/12 0:00,6/21/14 0:00
INCIDENT_TIME,FIO_TIME,12/10/13 0:00,5/24/12 0:00,6/4/12 0:00,6/21/14 0:00
SUBJECT_DETAILS,PRIORS,YES,NO,YES,YES
SUBJECT_RACE,DESCRIPTION,B(Black),W(White),B(Black),B(Black)


In [8]:
# use of force
frames[2].head().T

Unnamed: 0,0,1,2,3,4
LOCATION_LONGITUDE,X,-93.27169927,-93.27169927,-93.27169927,-93.24701977
LOCATION_LATITUDE,Y,45.00803595,45.00803595,45.00803595,44.95974418
INCIDENT_UNIQUE_IDENTIFIER,PoliceUseOfForceID,1,2,3,4
INCIDENT_DATE,ResponseDate,2008-01-01,2008-01-01,2008-01-01,2008-01-01
INCIDENT_TIME,,01:15:31.000Z,01:15:31.000Z,01:15:31.000Z,01:27:27.000Z
SUBJECT_INJURY,SubjectInjury,-1,-1,-1,0
TYPE_OF_FORCE_USED,ForceTypeDetail,BodilyForceType,BodilyForceType,BodilyForceType,BodilyForceType
WEAPON_OR_TOOL_USED,ForceTypeAction,Push Away,Body Weight to Pin,Punches,Joint Lock
SUBJECT_RACE,Race,Black,Black,Black,White
SUBJECT_GENDER,Sex,Female,Female,Female,Male


In [9]:
# vehicle stops
frames[3].head().T

Unnamed: 0,0,1,2,3,4
INCIDENT_DATE_YEAR,YEAR OF STOP,2001,2001,2001,2001
INCIDENT_DATE,DATE OF STOP,1/1/01 0:11,1/1/01 0:14,1/1/01 0:23,1/1/01 0:28
SUBJECT_RACE,RACE OF DRIVER,White,White,White,White
SUBJECT_GENDER,GENDER OF DRIVER,Female,Male,Male,Male
SEARCH_CONDUCTED_PERSON,DRIVER FRISKED?,No,No,No,No
SEARCH_CONDUCTED_VEHICLE,VEHICLE SEARCHED?,No,No,No,No
SUBJECT_ISSUED_CITATION,CITATION ISSUED?,No,No,No,No
SUBJECT_AGE,AGE OF DRIVER,,,,
INCIDENT_REASON,REASON FOR STOP,No Data,No Data,No Data,No Data
LOCATION_DISTRICT,POLICE GRID NUMBER,55,106,58,44


In [10]:
# officer-involved shootings
frames[4].head().T

Unnamed: 0,0,1,2,3,4
INCIDENT_UNIQUE_IDENTIFIER,Case #,16-179658,16-242039,16-153523,16-025283
INCIDENT_DATE,Date,5/1/16,6/12/16,4/13/16,1/18/16
LOCATION_FULL_STREET_ADDRESS_OR_INTERSECTION,Address,707 S Orange Blossom Tr,1912 S. Orange Ave,Bowen DR at Folkstone,6584 Swissco Dr #734
Unnamed: 3,Incident Type,Armed Subject,Active Shooter,Battery,Well Being Check
NUMBER_OF_OFFICERS,Number of Officers Involved,2,11,1,2
OFFICER_RACE,Officer Race,"W, W","W,W, W, W, W, B, W, W, B, W, W",W,"W, B"
OFFICER_ETHNICITY,Ethnicity,"N, N","N,N, N, N, N, H, N, N, N, N, H",H,"H, N"
OFFICER_GENDER,Officer Gender,"M, M","M, M, M, M, M, M, M, M, M, M, M",M,"M, F"
OFFICER_AGE,Officer Age,"28, 28","29, 39, 48, 46, 34, 45, 47, 44, 47, 38, 31",34,"41, 37"
OFFICER_YEARS_ON_FORCE,Officer Tenure,"3, 1","4, 14, 25, 23, 11, 23, 18, 21, 21, 14, 2",8,"4, 5"


In [11]:
# arrests
frames[9].head().T

Unnamed: 0,0,1,2,3,4
INCIDENT_DATE,ARST DATE,12/31/15 0:00,12/31/15 0:00,12/31/15 0:00,12/31/15 0:00
INCIDENT_TIME,TIME,2345,2330,2320,2319
INCIDENT_UNIQUE_IDENTIFIER,RPT_ID,4538991,4538996,4539142,4538947
LOCATION_DISTRICT,AREA,14,1,6,21
LOCATION_TYPE,AREA_DESC,Pacific,Central,Hollywood,Topanga
UNKNOWN_FIELD_TYPE,RD,1435,101,646,2189
SUBJECT_AGE,AGE,43,27,31,28
SUBJECT_GENDER,SEX_CD,M,M,M,M
SUBJECT_RACE,DESCENT_CD,H,W,W,H
UNKNOWN_FIELD_TYPE.1,CHRG_GRP_CD,22,4,16,22


In [12]:
# incidents
frames[10].head().T

Unnamed: 0,0,1,2,3,4
CRIME_TYPE,CrimeType,DOMESTIC VIOLENCE,STOLEN VEHICLE,FELONY ASSAULT,NARCOTICS
INCIDENT_DATE,DateTime,12/6/16 16:12,12/6/16 7:20,12/6/16 19:00,12/6/16 19:20
INCIDENT_UNIQUE_IDENTIFIER,CaseNumber,16-064496,16-064497,16-064499,16-064501
INCIDENT_REASON,Description,THREAT CRIME:INT:TERRORIZE,VEHICLE THEFT - AUTO,OBSTRUCT/RESIST/ETC PUBLIC/PEACE OFFICER/EMERG...,POSSESS CONTROLLED SUBSTANCE PARAPHERNALIA
LOCATION_DISTRICT,PoliceBeat,04X,27X,08X,07X
LOCATION_FULL_STREET_ADDRESS_OR_INTERSECTION,Address,1700 MARTIN LUTHER KING JR W,1600 HIGH ST,2800 TELEGRAPH AV,34TH ST & LINDEN ST
LOCATION_CITY,City,Oakland,Oakland,Oakland,Oakland
LOCATION_STATE,State,CA,CA,CA,CA


# General info

- There are **12 departments** (related to [report][1]?)
- Number of entries vary a lot (55 to 710k)
- 15 columns (median)

# Types of data

- Use of force (UOF) - 5 departments
- Officer-involved shootings (OIS) - 2 departments
- [Incidents][12] - 2 departments
- Vehicle stops - 1 department
- [Field interviews][11] - 1 department
- Arrests - 1 department

# Some findings

- All datasets come with the `INCIDENT_DATE` column (different formats)
- The `LOCATION` indicator comes in the most varied forms
- The variables are not totally cleaned (e.g. check `SUBJECT_RACT` column in Lawrence)
- Lots of checks/preparation will still need to be made if we want to automate things (these checks and preparation themselves may be partially automated)

[1]: http://policingequity.org/wp-content/uploads/2016/07/CPE_SoJ_Race-Arrests-UoF_2016-07-08-1130.pdf
[11]: https://bizfluent.com/info-8223154-field-interview.html
[12]: https://www.policedatainitiative.org/datasets/incidents/

In [13]:
overall = pandas.DataFrame({
    'dept': depts,
    'dept_full_name': [d.full_name for d in depts],
    'dept_name': [d.name for d in depts],
    'dept_loc': [d.location for d in depts],
    'file_name': [f.name for f in files],
    'dframe': frames,
    'ncolumns': [f.shape[1] for f in frames],
    'nrows': [f.shape[0] for f in frames]
})
overall

Unnamed: 0,dept,dept_full_name,dept_name,dept_loc,file_name,dframe,ncolumns,nrows
0,Department('11-00091'),"11-00091 (Boston, MA)",11-00091,"Boston, MA",11-00091_Field-Interviews_2011-2015.csv,INCIDENT_UNIQUE_IDENTIFIER INCIDENT_UNI...,34,152231
1,Department('23-00089'),"23-00089 (Lawrence, IN)",23-00089,"Lawrence, IN",23-00089_UOF-P.csv,INCIDENT_UNIQUE_IDENTIFIER INCID...,26,10275
2,Department('24-00013'),"24-00013 (Minneapolis, MN)",24-00013,"Minneapolis, MN",24-00013_UOF_2008-2017_prepped.csv,LOCATION_LONGITUDE LOCATION_LATITUDE INC...,13,25802
3,Department('24-00098'),"24-00098 (St. Paul, MN)",24-00098,"St. Paul, MN",24-00098_Vehicle-Stops-data.csv,INCIDENT_DATE_YEAR INCIDENT_DATE S...,12,710473
4,Department3500016('35-00016'),"35-00016 (Orlando, FL)",35-00016,"Orlando, FL",35-00016_UOF-OIS-P.csv,INCIDENT_UNIQUE_IDENTIFIER INCIDENT_DATE \...,21,55
5,Department3500103('35-00103'),"35-00103 (Charlotte, NC)",35-00103,"Charlotte, NC",35-00103_UOF-OIS-P_prepped.csv,INCIDENT_UNIQUE_IDENTIFIER INCIDENT_DATE ...,9,78
6,Department3700027('37-00027'),"37-00027 (Austin, TX)",37-00027,"Austin, TX",37-00027_UOF-P_2014-2016_prepped.csv,RIN PRIMARY_KEY INCIDENT_DATE LOCAT...,29,9483
7,Department('37-00049'),"37-00049 (Dallas, TX)",37-00049,"Dallas, TX",37-00049_UOF-P_2016_prepped.csv,INCIDENT_DATE INCIDENT_TIME \ 0 OC...,47,2384
8,Department('49-00009'),"49-00009 (Seattle, WA)",49-00009,"Seattle, WA",49-00009_UOF.csv,INCIDENT_UNIQUE_IDENTIFIER INCIDENT_UNIQU...,11,8688
9,Department('49-00033'),"49-00033 (Los Angeles, CA)",49-00033,"Los Angeles, CA",49-00033_Arrests_2015.csv,INCIDENT_DATE INCIDENT_TIME INCIDENT_U...,18,126855


## Use of force (UOF)

### General characteristics

- Present in 5 departments
  - 23-00089 (Lawrence, IN)
  - 24-00013 (Minneapolis, MN) (2008-2017)
  - 37-00027 (Austin, TX) (2014-2016)
  - 37-00049 (Dallas, TX) (2016)
  - 49-00009 (Seattle, WA)
- 11 to 47 variables
- 2384 to 25802 entries

### Normalized Variable Groups

*variables in bold appeared in at least 4 of the 5 datasets*

#### Incident

- **`INCIDENT_DATE`**
- `INCIDENT_REASON`
- `INCIDENT_TIME`
- `INCIDENT_UNIQUE)IDENTIFIER.1`
- `INCIDENT_UNIQUE_IDENTIFIER`

#### Location

- `LOCATION_CITY`
- **`LOCATION_DISTRICT`**
- `LOCATION_DISTRICT.1`
- `LOCATION_DISTRICT.2`
- `LOCATION_FULL_STREET_ADDRESS_OR_INTERSECTION`
- `LOCATION_LATITUDE`
- `LOCATION_LONGITUDE`
- `LOCATION_STATE`

#### Officer

- `OFFICER_AGE`
- `OFFICER_ASSIGNMENT`
- `OFFICER_DETAILS`
- `OFFICER_GENDER`
- `OFFICER_HIRE_DATE`
- `OFFICER_HOSPITALIZATION`
- `OFFICER_ID`
- `OFFICER_INJURY`
- `OFFICER_INJURY_TYPE`
- `OFFICER_RACE`
- `OFFICER_YEARS_ON_FORCE`

#### Street

- `STREET_DIRECTION`
- `STREET_NAME`
- `STREET_NUMBER`
- `STREET_TYPE`

#### Subject

- `SUBJECT_AGE`
- `SUBJECT_DESCRIPTION`
- `SUBJECT_DETAILS`
- `SUBJECT_DETAILS.1`
- **`SUBJECT_GENDER`**
- `SUBJECT_HOSPITALIZATION`
- `SUBJECT_ID`
- `SUBJECT_INJURY`
- `SUBJECT_INJURY_TYPE`
- `SUBJECT_OFFENSE`
- **`SUBJECT_RACE`**
- `SUBJECT_RACT`
- `SUBJECT_ROLE`
- `SUBJECT_WAS_ARRESTED`

#### Type of force used

- `TYPE_OF_FORCE_USED`
- `TYPE_OF_FORCE_USED1`
- `TYPE_OF_FORCE_USED10`
- `TYPE_OF_FORCE_USED2`
- `TYPE_OF_FORCE_USED3`
- `TYPE_OF_FORCE_USED4`
- `TYPE_OF_FORCE_USED5`
- `TYPE_OF_FORCE_USED6`
- `TYPE_OF_FORCE_USED7`
- `TYPE_OF_FORCE_USED8`
- `TYPE_OF_FORCE_USED9`

#### Others

- `BEAT`
- `CHARGE`
- `DIVISION`
- `FORCE_EFFECTIVE`
- `NUMBER_EC_CYCLES`
- `PRIMARY_KEY`
- **`REASON_FOR_FORCE`**
- `REASON_FOR_FORCE.1`
- `REPORTING_AREA`
- `RIN`
- `SECTOR`
- `SHIFT`
- `UOF_NUMBER`
- `Unnamed: 19 (Officer Organization Desc)`
- `WEAPON_OR_TOOL_USED`
- `Y_COORDINATE`
- `Y_COORDINATE.1`

In [14]:
overall.loc[[1, 2, 6, 7, 8]]

Unnamed: 0,dept,dept_full_name,dept_name,dept_loc,file_name,dframe,ncolumns,nrows
1,Department('23-00089'),"23-00089 (Lawrence, IN)",23-00089,"Lawrence, IN",23-00089_UOF-P.csv,INCIDENT_UNIQUE_IDENTIFIER INCID...,26,10275
2,Department('24-00013'),"24-00013 (Minneapolis, MN)",24-00013,"Minneapolis, MN",24-00013_UOF_2008-2017_prepped.csv,LOCATION_LONGITUDE LOCATION_LATITUDE INC...,13,25802
6,Department3700027('37-00027'),"37-00027 (Austin, TX)",37-00027,"Austin, TX",37-00027_UOF-P_2014-2016_prepped.csv,RIN PRIMARY_KEY INCIDENT_DATE LOCAT...,29,9483
7,Department('37-00049'),"37-00049 (Dallas, TX)",37-00049,"Dallas, TX",37-00049_UOF-P_2016_prepped.csv,INCIDENT_DATE INCIDENT_TIME \ 0 OC...,47,2384
8,Department('49-00009'),"49-00009 (Seattle, WA)",49-00009,"Seattle, WA",49-00009_UOF.csv,INCIDENT_UNIQUE_IDENTIFIER INCIDENT_UNIQU...,11,8688


In [15]:
uof_frames = overall.loc[[1, 2, 6, 7, 8], 'dframe']
variables1 = sorted(set(c for df in uof_frames for c in df.columns))
variables2 = sorted(set(c for df in uof_frames for c in df.iloc[0] if not isinstance(c, float)))  # not prepped

In [16]:
collections.Counter(c for df in uof_frames for c in df.columns).most_common()[:5]

[('INCIDENT_DATE', 5),
 ('LOCATION_DISTRICT', 5),
 ('SUBJECT_GENDER', 5),
 ('REASON_FOR_FORCE', 4),
 ('SUBJECT_RACE', 4)]

## Officer-involved shootings

### General characteristics

- Present in 2 departments
  - 35-00016 (Orlando, FL)
  - 35-00103 (Charlotte, NC)
- 9 and 21 variables
- 55 and 78 entries

### Normalized Variable Groups

*variables in bold appeared in both datasets*

#### Incident

- **`INCIDENT_DATE`**
- **`INCIDENT_UNIQUE_IDENTIFIER`**

#### Location

- **`LOCATION_FULL_STREET_ADDRESS_OR_INTERSECTION`**
- **`LOCATION_LATITUDE`**
- **`LOCATION_LONGITUDE`**

#### Officer

- `OFFICER_AGE`
- `OFFICER_DEATH`
- `OFFICER_ETHNICITY`
- `OFFICER_GENDER`
- `OFFICER_INJURY`
- `OFFICER_RACE`
- `OFFICER_YEARS_ON_FORCE`

#### Subject

- `SUBJECT_AGE`
- `SUBJECT_AGE_IN_YEARS`
- `SUBJECT_DEATH`
- **`SUBJECT_GENDER`**
- `SUBJECT_INJURY`
- `SUBJECT_INJURY_TYPE`
- `SUBJECT_POSSESSED_WEAPON`
- **`SUBJECT_RACE`**

#### Others

- `NUMBER_OF_OFFICERS`
- `NUMBER_OF_SUBJECTS`
- `Unnamed: 3 (Incident Type)`

In [17]:
overall.loc[[4, 5]]

Unnamed: 0,dept,dept_full_name,dept_name,dept_loc,file_name,dframe,ncolumns,nrows
4,Department3500016('35-00016'),"35-00016 (Orlando, FL)",35-00016,"Orlando, FL",35-00016_UOF-OIS-P.csv,INCIDENT_UNIQUE_IDENTIFIER INCIDENT_DATE \...,21,55
5,Department3500103('35-00103'),"35-00103 (Charlotte, NC)",35-00103,"Charlotte, NC",35-00103_UOF-OIS-P_prepped.csv,INCIDENT_UNIQUE_IDENTIFIER INCIDENT_DATE ...,9,78


In [18]:
ois_frames = overall.loc[[4, 5], 'dframe']
variables1 = sorted(set(c for df in ois_frames for c in df.columns))
variables2 = sorted(set(c for df in ois_frames for c in df.iloc[0] ))  # not prepped

In [19]:
collections.Counter(c for df in ois_frames for c in df.columns).most_common()[:7]

[('INCIDENT_UNIQUE_IDENTIFIER', 2),
 ('INCIDENT_DATE', 2),
 ('LOCATION_FULL_STREET_ADDRESS_OR_INTERSECTION', 2),
 ('SUBJECT_RACE', 2),
 ('SUBJECT_GENDER', 2),
 ('LOCATION_LATITUDE', 2),
 ('LOCATION_LONGITUDE', 2)]

## Incidents

### General characteristics

- Present in 2 departments
  - 49-00035 (Oakland, CA) (2016)
  - 49-00081 (San Francisco, CA) (2012-2015)
- 8 and 11 variables
- 10770 and 394236 entries

### Normalized Variable Groups

*variables in bold appeared in both datasets*

#### Incident

- **`INCIDENT_DATE`**
- `INCIDENT_DAY`
- **`INCIDENT_REASON`**
- `INCIDENT_REASON_DESCRIPTION`
- `INCIDENT_TIME`
- **`INCIDENT_UNIQUE_IDENTIFIER`**

#### Location

- `LOCATION_CITY`
- **`LOCATION_DISTRICT`**
- **`LOCATION_FULL_STREET_ADDRESS_OR_INTERSECTION`**
- `LOCATION_LATITUDE`
- `LOCATION_LONGITUDE`
- `LOCATION_STATE`

#### Others

- `CRIME_TYPE`
- `DISPOSITION`

In [20]:
overall.loc[[10, 11]]

Unnamed: 0,dept,dept_full_name,dept_name,dept_loc,file_name,dframe,ncolumns,nrows
10,Department('49-00035'),"49-00035 (Oakland, CA)",49-00035,"Oakland, CA",49-00035_Incidents_2016.csv,CRIME_TYPE INCIDENT_DATE INCI...,8,10770
11,Department('49-00081'),"49-00081 (San Francisco, CA)",49-00081,"San Francisco, CA",49-00081_Incident-Reports_2012_to_May_2015.csv,INCIDENT_UNIQUE_IDENTIFIER ...,11,394236


In [21]:
inc_frames = overall.loc[[10, 11], 'dframe']
variables1 = sorted(set(c for df in inc_frames for c in df.columns))
variables2 = sorted(set(c for df in inc_frames for c in df.iloc[0]))  # not prepped

In [22]:
collections.Counter(c for df in inc_frames for c in df.columns).most_common()[:5]

[('INCIDENT_DATE', 2),
 ('INCIDENT_UNIQUE_IDENTIFIER', 2),
 ('INCIDENT_REASON', 2),
 ('LOCATION_DISTRICT', 2),
 ('LOCATION_FULL_STREET_ADDRESS_OR_INTERSECTION', 2)]

## Vehicle stops

### General characteristics

- Present in department 24-00098 (St. Paul, MN)
- 12 variables
- 710437 entries

### Variable Groups

#### Incident

- `INCIDENT_DATE`
- `INCIDENT_DATE_YEAR`
- `INCIDENT_REASON`

#### Location

- `LOCATION_DISTRICT`
- `LOCATION_LATITUDE`
- `LOCATION_LONGITUDE`

#### Search conducted?

- `SEARCH_CONDUCTED_PERSON`
- `SEARCH_CONDUCTED_VEHICLE`

#### Subject

- `SUBJECT_AGE`
- `SUBJECT_GENDER`
- `SUBJECT_ISSUED_CITATION`
- `SUBJECT_RACE`

In [23]:
overall.loc[[3]]

Unnamed: 0,dept,dept_full_name,dept_name,dept_loc,file_name,dframe,ncolumns,nrows
3,Department('24-00098'),"24-00098 (St. Paul, MN)",24-00098,"St. Paul, MN",24-00098_Vehicle-Stops-data.csv,INCIDENT_DATE_YEAR INCIDENT_DATE S...,12,710473


In [24]:
veh_df = overall.loc[3, 'dframe']
variables1 = sorted(veh_df.columns)
variables2 = sorted(veh_df.iloc[0])

## Field interviews

### General characteristics

- Present in department 11-00091 (Boston, MA) (2011-2015)
- 34 variables
- 152231 entries

### Variable Groups

#### Incident

- `INCIDENT_DATE`
- `INCIDENT_REASON`
- `INCIDENT_REASON.1`
- `INCIDENT_TIME`
- `INCIDENT_UNIQUE_IDENTIFIER`
- `INCIDENT_UNIQUE_IDENTIFIER.1`

#### Location

- `LOCATION_CITY`
- `LOCATION_DISTRICT`
- `LOCATION_DISTRICT.1`
- `LOCATION_FULL_STREET_ADDRESS_OR_INTERSECTION`
- `LOCATION_STREET_NUMBER`

#### Officer

- `OFFICER_AGE`
- `OFFICER_ASSIGNMENT`
- `OFFICER_ASSIGNMENT.1`
- `OFFICER_ETHNICITY`
- `OFFICER_ID`
- `OFFICER_RACE`
- `OFFICER_SUPERVISOR`

#### Subject

- `SUBJECT_DETAILS`
- `SUBJECT_DETAILS.1`
- `SUBJECT_DETAILS.2`
- `SUBJECT_GENDER`
- `SUBJECT_RACE`

#### Vehicle

- `VEHICLE_COLOR`
- `VEHICLE_DETAILS`
- `VEHICLE_DETAILS.1`
- `VEHICLE_MAKE`
- `VEHICLE_MODEL`
- `VEHICLE_YEAR`

#### Others

- `DISPOSITION`
- `SEARCH_CONDUCTED`
- `SEARCH_REASON`
- `UNKNOWN_FIELD_TYPE (FIOFS_TYPE)`
- `UNKNOWN_FIELD_TYPE.1 (TERRORISM)`

In [25]:
overall.loc[[0]]

Unnamed: 0,dept,dept_full_name,dept_name,dept_loc,file_name,dframe,ncolumns,nrows
0,Department('11-00091'),"11-00091 (Boston, MA)",11-00091,"Boston, MA",11-00091_Field-Interviews_2011-2015.csv,INCIDENT_UNIQUE_IDENTIFIER INCIDENT_UNI...,34,152231


In [26]:
fi_df = overall.loc[0, 'dframe']
variables1 = sorted(fi_df.columns)
variables2 = sorted(fi_df.iloc[0])

## Arrests

### General characteristics

- Present in department 49-00033 (Los Angeles, CA) (2015)
- 18 variables
- 126855 entries

Ref (search for the var name):

https://github.com/CityOfLosAngeles/pd-geohub-migration/blob/master/arrests.py

### Variable Groups

#### Charge

- `CHARGE`
- `CHARGE_DESCRIPTION`

#### Incident

- `INCIDENT_DATE`
- `INCIDENT_REASON`
- `INCIDENT_TIME`
- `INCIDENT_UNIQUE_IDENTIFIER`

#### Location

- `LOCATION_DISTRICT`
- `LOCATION_FULL_STREET_ADDRESS_OR_INTERSECTION`
- `LOCATION_LATITUDE`
- `LOCATION_LONGITUDE`
- `LOCATION_TYPE`

#### Subject

- `SUBJECT_AGE`
- `SUBJECT_GENDER`
- `SUBJECT_RACE`

#### Others

- `CROSS_STREET`
- `UNKNOWN_FIELD_TYPE (RD)`
- `UNKNOWN_FIELD_TYPE.1 (CHRG_GRP_CD)`
- `UNKNOWN_FIELD_TYPE.2 (ARST_TYP_CD)`

In [27]:
overall.loc[[9]]

Unnamed: 0,dept,dept_full_name,dept_name,dept_loc,file_name,dframe,ncolumns,nrows
9,Department('49-00033'),"49-00033 (Los Angeles, CA)",49-00033,"Los Angeles, CA",49-00033_Arrests_2015.csv,INCIDENT_DATE INCIDENT_TIME INCIDENT_U...,18,126855


In [28]:
arr_df = overall.loc[9, 'dframe']
variables1 = sorted(arr_df.columns)
variables2 = sorted(arr_df.iloc[0])