# Generating Recidivism Data

The purpose of this notebook is to transform and clean data from North Carolina Department of Corrections into a dataset for predicting recidivsm of individual inmates. The scripts to download the raw, publically available data can be found in this repository. This notebook is likely best run on a server with suitable memory, as the data is fairly large. At the end, this exports a pickle of a pandas DF. For using in another script, the pickle is highly recommended, as import time and disk space is much lower. Can be changed to export a CSV for cross compatibility.

In [8]:
import pandas as pd
import numpy as np
from datetime import datetime
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 500)

# Load In Data

### Court commitments data 
- At the individual commitment level (person and commitment IDs). We select a few columns based on what might be useful in predicting recidivism, what can be gotten elsewhere, and what doesn't contain too many NaN's.

In [82]:
court_commit = pd.read_csv('OFNT3BB1.csv')

  interactivity=interactivity, compiler=compiler, result=result)


In [83]:
court_commit.tail(5)

Unnamed: 0,OFFENDER_NC_DOC_ID_NUMBER,COMMITMENT_PREFIX,COMMITTED_LAST_NAME,COMMITTED_FIRST_NAME,COMMITTED_MIDDLE_NAME,COMMITTED_NAME_SUFFIX,OFFENDER_ADMISSION/INTAKE_DATE,P&P_CASE_INTAKE_DATE,INMATE_COMMITMENT_STATUS_FLAG,COMMITMENT_STATUS_DATE,EARLIEST_SENTENCE_EFFECTIVE_DT,NEW_PERIOD_OF_INCARCERATION_FL,MOST_SERIOUS_OFFENSE_CODE,CO_OF_CONV_MOST_SERIOUS_OFFNSE,TOTAL_SENTENCE_LENGTH,TOTAL_JAIL_CREDITS_(IN_DAYS),NO_RESTITUTION_FLAG,P&P_COMMITMENT_STATUS_FLAG,P&P_COMMITMENT_STATUS_DATE,TOTAL_LENGTH_OF_SUPERVISION,PED_PRIOR_TO_1995_CONVERSION,DATE_OF_LAST_UPDATE,TIME_OF_LAST_UPDATE,NEW_PERIOD_OF_SUPERVISION_FLAG,TYPE_OF_OLD_PE_DATE_CODE
3445196,T659761,BA,SUTTON,JEREMY,HUNTER,,2018-04-11,0001-01-01,ACTIVE,2018-04-05,2018-04-05,Y,TRAFFICKING SCHEDULE I,WAKE,20000.0,30.0,,NORMAL NORM,0001-01-01,0.0,0001-01-01,2018-04-13,10:12:01,N,
3445197,T659808,BA,MARTIN,MICHAEL,KENNETH,,2018-04-12,0001-01-01,ACTIVE,2018-04-04,2018-04-04,Y,POSS/CONSUME FORT WINE/LIQUOR,CRAVEN,420.0,58.0,,NORMAL NORM,0001-01-01,0.0,0001-01-01,2018-04-13,09:21:56,N,
3445198,T660200,BA,CABRERA LORENZO,FLOR,ESPERA,,2018-04-17,0001-01-01,ACTIVE,2018-04-16,2018-04-16,Y,LARCENY,WAKE,10200.0,63.0,,NORMAL NORM,0001-01-01,0.0,0001-01-01,2018-04-18,13:30:16,N,
3445199,T660278,BA,SILVER,JOAQUIN,,,2018-04-18,0001-01-01,ACTIVE,2018-04-18,2018-04-18,Y,DISC/FIREARM/OCCUPIED VEHICLE,NASH,70600.0,3.0,,NORMAL NORM,0001-01-01,0.0,0001-01-01,2018-04-19,14:22:04,N,
3445200,T660328,BA,LUCAS,JEREMEY,PAUL,,2018-04-19,0001-01-01,ACTIVE,2018-04-11,2018-04-11,Y,AWDWWITKISI,ALAMANCE,80400.0,291.0,,NORMAL NORM,0001-01-01,0.0,0001-01-01,2018-04-20,07:31:51,N,


In [84]:
court_commit_cols = ['OFFENDER_NC_DOC_ID_NUMBER', 'COMMITMENT_PREFIX', 'OFFENDER_ADMISSION/INTAKE_DATE', 
                     'NEW_PERIOD_OF_INCARCERATION_FL', 'P&P_COMMITMENT_STATUS_FLAG' , 'NEW_PERIOD_OF_SUPERVISION_FLAG']

In [85]:
court_commit['OFFENDER_ADMISSION/INTAKE_DATE'] = pd.to_datetime(court_commit['OFFENDER_ADMISSION/INTAKE_DATE'],
                                                               errors='coerce')

In [86]:
court_commit = court_commit[court_commit_cols]

### Inmates data
 - At the person level. All the data in the inmates table is aggregated over time, and therefore it has the risk of data leakage. Eg: Total_Sentence_Count could tell the algorithm that there will be upcoming sentences. Therefore, we want only the columns that could not change over time.

In [6]:
inmates = pd.read_csv('INMT4AA1.csv')

  interactivity=interactivity, compiler=compiler, result=result)


In [7]:
inmates.tail(5)

Unnamed: 0,INMATE_DOC_NUMBER,INMATE_LAST_NAME,INMATE_FIRST_NAME,INMATE_MIDDLE_INITIAL,INMATE_NAME_SUFFIX,INMATE_NAME_SOUNDEX_CODE,INMATE_GENDER_CODE,INMATE_RACE_CODE,INMATE_BIRTH_DATE,INMATE_ETHNIC_AFFILIATION,INMATE_RECORD_STATUS_CODE,INMATE_ADMIN._STATUS_CODE,CUSTODY_CLASS_CODE,NEXT_CUSTODY_REVIEW_DATE,INMATE_CONTROL_STATUS_CODE,NEXT_SECURITY_REVIEW_DATE,INMATE_SPECIAL_CHARACTERISTICS,PAROLE_CASE_ANALYST,NEXT_PAROLE_COMM._REVIEW_DATE,INMATE_PRIMARY_ASSIGNMENT,INMATE_ADMISSION_DATE,ADMITTING_DIAGNOSTIC_CENTER,DATE_OF_LAST_INMATE_MOVEMENT,TYPE_OF_LAST_INMATE_MOVEMENT,OTHER_FACILITY_CODE,CURRENT_DOP_COMMAND_CODE,CURRENT_DOP_AREA_CODE,INMATE_FACILITY_CODE,INMATE_TIME_COMP_STATUS_CODE,OLDEST_COMMIT.OF_CURRENT_INCAR,OLDEST_SNT.CMP._OF_CURR.INCAR.,OLDEST_CONVICTION_DATE,TOTAL_SENTENCE_COUNT,MOST_SERIOUS_OFFNSE_CURR_INCAR,INMATE_IS_FELON/MISDEMEANANT,CURRENT_COMMITMENT_PREFIX,CURRENT_SENTENCE_COMPONENT,TOTAL_SENTENCE_LENGTH(IN_DAYS),LENGTH_OF_CURRENT_INCARCERATN.,TERM_OF_INCARCERATION(IN_DAYS),LENGTH_OF_RULING_SENTENCES,LAST_RULING_PRD_COMMITMENT,LAST_RULING_PRD_COMPONENT,FINAL_RULING_PED,FINAL_RULING_TRD,FINAL_RULING_PRD,FINAL_RULING_MAX_RELEASE_DATE,LAW_FOR_FINAL_RULING_DATES,DAYS_SERVED_IN_DOC_CUSTODY,DATE_TRD_&_PRD_LAST_COMPUTED,ON/OFF_GAIN_TIME_CODE,GT_RATE_/_ET_LEVEL_CODE,LAST_DATE_ON/OFF_GAIN_TIME,TOTAL_DISCIPLINE_INFRACTIONS,LATEST_DISCIPLINE_INFRACTION,LAST_DISCIPLINE_INFRACTION_DT.,DATE_OF_LAST_ARREST_ON_PAROLE,CURRENT_PENDING_REVIEWS_FLAG,ESCAPE_HISTORY_FLAG,PRIOR_INCARCERATIONS_FLAG,NEXT_PAROLE_REVIEW_TYPE_CODE,TIME_OF_LAST_MOVEMENT,POPULATION/MANAGEMENT_UNIT,INMATE_POSITIVELY_IDENTIFIED,PAROLE_AND_TERMINATE_STATUS,INMATE_LABEL_STATUS_CODE,PRIMARY_OFFENSE_QUALIFIER
453743,T660256,WILSON,ESTEPFANIA,A,,,FEMALE,BLACK,1989-10-22,,ACTIVE,ACTIVE,MEDIUM MED,0001-01-01,REGULAR POPULATION RPOP,0001-01-01,REGULAR,,0001-01-01,,2018-04-20,CORR INST-WOMEN DC,2018-04-20,NEW ADMISSION,FRANKLIN COUNTY,2075,3010,NCCI WOMEN NCCW,NEITHER PRD NOR PED COMPUTED,,,0001-01-01,0,,FELON,,,0.0,0.0,0.0,0.0,,,0001-01-01,0001-01-01,0001-01-01,0001-01-01,,0,2018-04-20,,,0001-01-01,0,,0001-01-01,0001-01-01,N,N,,,14:15:00,,NO,,,
453744,T660278,SILVER,JOAQUIN,,,,MALE,BLACK,1978-11-13,,ACTIVE,ACTIVE,MEDIUM MED,0001-01-01,REGULAR POPULATION RPOP,0001-01-01,REGULAR,JAC01,0001-01-01,,2018-04-18,CENTRAL PRISON D. C.,2018-04-18,NEW ADMISSION,NASH COUNTY,2075,3100,CENTRAL PRISON CENT,NEITHER PRD NOR PED COMPUTED,BA,1.0,0001-01-01,0,,FELON,,,0.0,0.0,0.0,0.0,,,0001-01-01,0001-01-01,0001-01-01,0001-01-01,,0,2018-04-19,,,0001-01-01,0,,0001-01-01,0001-01-01,N,N,,,17:54:00,,NO,,,
453745,T660286,JONES,DESIREE,,,,FEMALE,BLACK,1966-11-20,,ACTIVE,ACTIVE,MEDIUM MED,0001-01-01,REGULAR POPULATION RPOP,0001-01-01,REGULAR,,0001-01-01,,2018-04-20,CORR INST-WOMEN DC,2018-04-20,NEW ADMISSION,VANCE COUNTY,2075,3010,NCCI WOMEN NCCW,NEITHER PRD NOR PED COMPUTED,,,0001-01-01,0,,FELON,,,0.0,0.0,0.0,0.0,,,0001-01-01,0001-01-01,0001-01-01,0001-01-01,,0,2018-04-20,,,0001-01-01,0,,0001-01-01,0001-01-01,N,N,,,11:52:00,,NO,,,
453746,T660328,LUCAS,JEREMEY,P,,,MALE,WHITE,1999-05-18,EUROPEAN/N.AM./AUSTR,ACTIVE,ACTIVE,MEDIUM MED,0001-01-01,REGULAR POPULATION RPOP,0001-01-01,REGULAR,BNI01,0001-01-01,,2018-04-19,POLK YI DIAG CTR NEW,2018-04-19,NEW ADMISSION,ALAMANCE COUNTY,2075,3980,POLK CI POLK,NEITHER PRD NOR PED COMPUTED,BA,1.0,0001-01-01,0,,FELON,,,0.0,0.0,0.0,0.0,,,0001-01-01,0001-01-01,0001-01-01,0001-01-01,,0,2018-04-20,,,0001-01-01,0,,0001-01-01,0001-01-01,N,N,,,14:38:00,,NO,,,
453747,T660466,FERRER,JANASIA,S,,,FEMALE,BLACK,1998-03-05,,ACTIVE,ACTIVE,MEDIUM MED,0001-01-01,REGULAR POPULATION RPOP,0001-01-01,REGULAR,,0001-01-01,,2018-04-20,CORR INST-WOMEN DC,2018-04-20,NEW ADMISSION,JOHNSTON COUNTY,2075,3010,NCCI WOMEN NCCW,NEITHER PRD NOR PED COMPUTED,,,0001-01-01,0,,FELON,,,0.0,0.0,0.0,0.0,,,0001-01-01,0001-01-01,0001-01-01,0001-01-01,,0,2018-04-20,,,0001-01-01,0,,0001-01-01,0001-01-01,N,N,,,20:28:00,,NO,,,


In [8]:
inmates_cols = ['INMATE_DOC_NUMBER', 'INMATE_GENDER_CODE', 'INMATE_RACE_CODE', 'INMATE_BIRTH_DATE']

In [9]:
inmates['INMATE_BIRTH_DATE'] = pd.to_datetime(inmates['INMATE_BIRTH_DATE'], errors='coerce')

In [10]:
# Dropping 218 inmates without a birthdate recorded.
inmates = inmates[inmates['INMATE_BIRTH_DATE'].notnull()]

In [11]:
inmates = inmates[inmates_cols]

### Sentence computation data 
- At the individual commitment level (person and commitment IDs), containing pertinent dates for the sentences. Sentences served consecutively for a given inmate will have the same COMMITMENT_PREFIX and subsequent SENTENCE_COMPONENTs (it seems). For our purposes, we need the initial beginning date of each sentence and the final end date.

In [12]:
sentence_computation = pd.read_csv('INMT4BB1.csv')

  interactivity=interactivity, compiler=compiler, result=result)


In [13]:
sentence_computation.tail(5)

Unnamed: 0,INMATE_DOC_NUMBER,INMATE_COMMITMENT_PREFIX,INMATE_SENTENCE_COMPONENT,INMATE_COMPUTATION_STATUS_FLAG,SENTENCE_BEGIN_DATE_(FOR_MAX),ACTUAL_SENTENCE_END_DATE,PROJECTED_RELEASE_DATE_(PRD),PAROLE_DISCHARGE_DATE,PAROLE_SUPERVISION_BEGIN_DATE
1672080,1576411,BA,1,ACTIVE,2018-04-18,2018-04-24,2018-04-24,0001-01-01,0001-01-01
1672081,1576436,BA,1,ACTIVE,2017-07-19,2018-06-15,2018-06-15,0001-01-01,0001-01-01
1672082,1576493,BA,1,ACTIVE,2017-11-28,2019-03-25,2019-03-25,0001-01-01,0001-01-01
1672083,1576571,BA,1,ACTIVE,2018-03-28,2018-08-04,2018-08-04,0001-01-01,0001-01-01
1672084,1576602,BA,1,ACTIVE,2018-04-19,2018-04-25,2018-04-25,0001-01-01,0001-01-01


### Sentence data 
- At the individual commitment level. Contains data on the sentence. This will be the primary basis for our columns. For efficiency, I have ommitted some variables that could be included as dummy variables: PUNISHMENT_TYPE_CODE, COURT_TYPE_CODE, SENTENCING_PENALTY_CLASS_CODE, SENTENCING_PENALTY_CLASS_CODE, PRIOR_RECORD_LEVEL_CODE, MINIMUM_SENTENCE_LENGTH, SENTENCE_TYPE_CODE, COMPONENT_DISPOSITION_CODE. Instead I will include those most applicable to the crime itself.

In [6]:
sentences = pd.read_csv('OFNT3CE1.csv')

  interactivity=interactivity, compiler=compiler, result=result)


In [7]:
sentences.tail(5)

Unnamed: 0,OFFENDER_NC_DOC_ID_NUMBER,COMMITMENT_PREFIX,SENTENCE_COMPONENT_NUMBER,COUNTY_OF_CONVICTION_CODE,COURT_DOCKET_NUMBER,PUNISHMENT_TYPE_CODE,COURT_TYPE_CODE,COMPONENT_DISPOSITION_CODE,CMP._DISPOSITION_CODE_(2_OF_2),NUMBER_OF_COUNTS,TYPE_OF_COUNT_CODE,PRIMARY_OFFENSE_CODE,OFFENSE_QUALIFIER_CODE,DATE_OFFENSE_COMMITTED_-_BEGIN,DATE_OFFENSE_COMMITTED_-_END,NC_GENERAL_STATUTE_NUMBER,PRIMARY_FELONY/MISDEMEANOR_CD.,SENTENCING_PENALTY_CLASS_CODE,PRIOR_RCD._POINTS/CONVICTIONS,PRIOR_RECORD_LEVEL_CODE,MINIMUM_SENTENCE_LENGTH,MAXIMUM_SENTENCE_LENGTH,LENGTH_OF_SUPERVISION,SUPERVISION_TERM_EXTENSION,SUPERVISION_TO_FOLLOW_INCAR.,SPLIT_SENTENCE_ACTIVE_TERM,G.S._MAXIMUM_SENTENCE_ALLOWED,SERVING_MIN_OR_MAX_TERM_CODE,SENTENCE_TYPE_CODE,SENTENCE_TYPE_CODE.1,SENTENCE_TYPE_CODE.2,SENTENCE_TYPE_CODE.3,SENTENCE_TYPE_CODE.4,SENTENCE_TYPE_CODE.5,CREDITS_FOR_JAIL_DAYS_SERVED,ICC_JAIL_CREDITS_(IN_DAYS),SENTENCE_CHAINING_TYPE_CODE,PRIOR_COMMITMENT_PREFIX,PRIOR_COMPONENT_IDENTIFIER,P&P_SUPV.TERM_CHAIN_TYPE_CODE,P&P_PRIOR_COMMITMENT_PREFIX,PRIOR_P&P_COMMNT/COMPONENT_ID,TIME_COMPUTATION_EXCEPTION_CD.,SENTENCE_CONVICTION_DATE,SENTENCE_EFFECTIVE(BEGIN)_DATE,DELEGATED_AUTHORITY_FLAG,INMATE_SENTENCE_STATUS_CODE,INMATE_COMPONENT_STATUS_DATE,P&P_CASE_STATUS,P&P_COMPONENT_STATUS_DATE,DATE_OF_LAST_UPDATE,TIME_OF_LAST_UPDATE,ORIGINAL_DATA_ENTRY_DATE,ORIGINAL_SENTENCE_AUDIT_CODE,DATE_OF_LAST_UPDATE_TWO,TIME_OF_LAST_UPDATE_TWO
4165335,1576615,1,1,WILSON,18051429.0,COMMUNITY SS (DCC),DISTRICT,GUILTY,GUILTY,1,CONCURRENT,FOOD STAMP FRAUD > $400,PRINCIPAL,2016-02-23,2016-02-23,,MISD.,CLASS 1 MISDEMEANOR SS,0,LEVEL I,0,0,20000.0,0.0,0.0,0,,MAX.TERM:,PROBATION,SUSPENDED SENTENCE,COUNTY JAIL,,,,0,0,,,,INITIAL,,,NOT APPLICABLE,2018-04-20,2018-04-20,Y,,0001-01-01,NORMAL NORM,2018-04-20,2018-04-20,13:10:17,2018-04-20,,2018-04-20,13:10:17
4165336,1576635,1,1,WAKE,18204037.0,COMMUNITY SS (DCC),DISTRICT,GUILTY,GUILTY,1,CONCURRENT,COMMUNICATING THREATS,PRINCIPAL,2018-03-01,2018-03-01,14-277.1,MISD.,CLASS 1 MISDEMEANOR SS,0,LEVEL I,0,0,1800.0,0.0,0.0,0,,MAX.TERM:,PROBATION,SUSPENDED SENTENCE,COUNTY JAIL,,,,0,0,,,,INITIAL,,,NOT APPLICABLE,2018-04-20,2018-04-20,Y,,0001-01-01,NORMAL NORM,2018-04-20,2018-04-20,12:23:23,2018-04-20,,2018-04-20,12:23:23
4165337,1576643,1,1,GREENE,17050245.0,COMMUNITY SS (DCC),SUPERIOR,GUILTY,GUILTY,1,CONCURRENT,SECOND DEGREE TRESPASS,PRINCIPAL,2017-04-26,2017-04-26,14-159.13,MISD.,CLASS 3 MISDEMEANOR SS,0,LEVEL I,0,0,1200.0,0.0,0.0,0,,MAX.TERM:,PROBATION,SUSPENDED SENTENCE,COUNTY JAIL,,,,0,0,,,,INITIAL,,,NOT APPLICABLE,2018-04-19,2018-04-19,Y,,0001-01-01,NORMAL NORM,2018-04-19,2018-04-20,12:20:51,2018-04-20,,2018-04-20,12:20:51
4165338,1576678,1,1,BUNCOMBE,17090013.0,DWI,DISTRICT,GUILTY,GUILTY,1,CONCURRENT,DWI LEVEL 5,PRINCIPAL,2017-09-04,2017-09-04,20-138.1,MISD.,NON CLASS CODE,0,,0,0,1200.0,0.0,0.0,1,,MAX.TERM:,PROBATION,SUSPENDED SENTENCE,DWI CONVICTION,SPECIAL PROBATION (SPLIT),,,0,0,,,,INITIAL,,,NOT APPLICABLE,2018-04-20,2018-04-20,N,,0001-01-01,NORMAL NORM,2018-04-20,2018-04-20,15:55:30,2018-04-20,,2018-04-20,15:55:30
4165339,T153879,1,1,DARE,99003876.0,COMMUNITY SS (DCC),SUPERIOR,GUILTY,GUILTY,1,CONCURRENT,CONSPIRACY,PRINCIPAL,1999-07-02,1999-07-02,90 95,FELON,CLASS H,0,LEVEL I,600,800,,,,0,,MAX.TERM:,PROBATION,SUSPENDED SENTENCE,,,,,0,0,,,,INITIAL,,,NOT APPLICABLE,1999-09-27,1999-09-27,N,,0001-01-01,TOLLED TOLLD,2000-09-26,2000-10-20,11:20:45,1999-10-07,,2000-09-26,09:07:51


In [8]:
sentences_cols =  ["OFFENDER_NC_DOC_ID_NUMBER", "COMMITMENT_PREFIX", "SENTENCE_COMPONENT_NUMBER", 
                   "NUMBER_OF_COUNTS", "PRIMARY_OFFENSE_CODE", # Want to group this one
                   "PRIMARY_FELONY/MISDEMEANOR_CD.", "COUNTY_OF_CONVICTION_CODE", # Want to group this one too
                   "PRIOR_RCD._POINTS/CONVICTIONS", "MINIMUM_SENTENCE_LENGTH",
                   "MAXIMUM_SENTENCE_LENGTH", "LENGTH_OF_SUPERVISION", "SERVING_MIN_OR_MAX_TERM_CODE",
                   "SENTENCE_EFFECTIVE(BEGIN)_DATE"
                  ]

In [9]:
sentences["SENTENCE_EFFECTIVE(BEGIN)_DATE"] = pd.to_datetime(sentences["SENTENCE_EFFECTIVE(BEGIN)_DATE"],
                                                             errors = "coerce")

In [10]:
sentences = sentences[sentences_cols]

### Disciplinary infraction data 
- At the ID number and date level. These must be joined and filtered by date, not commitment_prefix. 

In [2]:
discipline = pd.read_csv('INMT9CF1.csv')

  interactivity=interactivity, compiler=compiler, result=result)


In [3]:
discipline.tail()

Unnamed: 0,INMATE_DOC_NUMBER,DISCIPLINARY_INFRACTION_DATE,DISCIPLINARY_INFRACTION_TIME,DISCIP._INFRACTION_SEQUENCE_#,DISCIPLINARY_INFRACTION_CODE,DISCIPLINARY_CHARGE_LEVEL,INMATE_PLEA_RE._INFRACTION,DISCI.INFRACTION_VERDICT_CODE,TYPE_OF_HEARING_FOR_PUNISHMENT,DISCIPLINARY_APPEAL_DECISION,DISCI._SEGREGATION_TIME_(DAYS),DSEG_DAYS_SUSPENDED(IN_MONTHS),GOOD_TIME_LOST_DUE_TO_INFRAC.,GOOD_TIME_LOST_SUSPENDED_(MOS),DISCI._VIOLATION_STATUS_CODE,EXTRA_DUTY_HOURS,EXTRA_DUTY_HOURS_SUSPEND_MNTHS,PRIVILEGE_LOST_-_1ST_TYPE_CODE,PRIVILEGE_LOST_-_2ND_TYPE_CODE,PRIVILEGE_LOST_-_3RD_TYPE_CODE,PRIVILEGES_SUSPENDED_(IN_DAYS),SUSP.PRIVILEGES_SUSPENDED(MOS),CUSTODY_DEMOTED_FROM_CODE,CUSTODY_DEMOTED_TO_CODE,CUSTODY_DEMOTION_SUSPENDED(MO),ACTIVATE_PRIOR_SUSPENSION,DATE_OF_LAST_UPDATE,TIME_OF_LAST_UPDATE,ORIGINAL_DR_CODE_FROM_INVEST.,SUSPENSION_STATUS,DATE_OF_PRIOR_SUSP_PUNISH,TIME_OF_PRIOR_SUSP_PUNISH
3166118,1574393,2018-04-04,21:45:00,2,DISOBEY ORDER,DISCP HEAR OFFC,GUILTY,GUILTY,,,10.0,,10.0,,APPLIED,30.0,,CANTEEN,,,30.0,3.0,,,,,2018-04-17,10:10:24,DISOBEY ORDER,SUSPENSIONS EXIST,0001-01-01,00:00:01
3166119,1574679,2018-04-04,07:55:00,1,PROFANE LANGUAGE,UNIT,NO PLEA,DISMISSED,,,,,,,APPLIED,,,,,,,,,,,,2018-04-11,09:23:14,PROFANE LANGUAGE,,0001-01-01,00:00:01
3166120,1574679,2018-04-04,07:55:00,1,DISOBEY ORDER,UNIT,NO PLEA,DISMISSED,,,,,,,APPLIED,,,,,,,,,,,,2018-04-11,09:23:14,DISOBEY ORDER,,0001-01-01,00:00:01
3166121,1574679,2018-04-14,02:00:00,1,WEAPON POSSESSION,UNIT,NO PLEA,REFER DHO,,,,,,,APPLIED,,,,,,,,,,,,2018-04-19,16:07:16,WEAPON POSSESSION,,0001-01-01,00:00:01
3166122,1575224,2018-04-17,07:10:00,1,WEAPON POSSESSION,UNIT,NO PLEA,REFER DHO,,,,,,,APPLIED,,,,,,,,,,,,2018-04-20,17:31:59,WEAPON POSSESSION,,0001-01-01,00:00:01


In [4]:
discipline_cols = ['INMATE_DOC_NUMBER', 'DISCIPLINARY_INFRACTION_DATE', 
                   'DISCIPLINARY_INFRACTION_CODE', 'DISCI._SEGREGATION_TIME_(DAYS)']

In [5]:
discipline['DISCIPLINARY_INFRACTION_DATE'] = pd.to_datetime(discipline['DISCIPLINARY_INFRACTION_DATE'], errors='coerce')

In [6]:
discipline = discipline[discipline_cols]

# Creating the Dataset

First, I will use sentence_computation to conglomerate all of the different components of a single commitment to get information at the continous internment level.

### Get the Dates of a Single Internment

In [23]:
sentence_computation["SENTENCE_BEGIN_DATE_(FOR_MAX)"] = pd.to_datetime(sentence_computation["SENTENCE_BEGIN_DATE_(FOR_MAX)"], errors = "coerce")
sentence_computation["ACTUAL_SENTENCE_END_DATE"] = pd.to_datetime(sentence_computation["ACTUAL_SENTENCE_END_DATE"], errors = "coerce")
sentence_computation["PROJECTED_RELEASE_DATE_(PRD)"] = pd.to_datetime(sentence_computation["PROJECTED_RELEASE_DATE_(PRD)"], errors = "coerce")

In [24]:
sentence_computation["PAROLE_SUPERVISION_BEGIN_DATE"] = pd.to_datetime(sentence_computation["PAROLE_SUPERVISION_BEGIN_DATE"], errors = "coerce")
sentence_computation["PAROLE_DISCHARGE_DATE"] = pd.to_datetime(sentence_computation["PAROLE_DISCHARGE_DATE"], errors = "coerce")

In [25]:
sentence_computation['SENTENCE_END'] = sentence_computation["ACTUAL_SENTENCE_END_DATE"]\
                                         .fillna(sentence_computation["PROJECTED_RELEASE_DATE_(PRD)"])

In [26]:
# For each commitment, getting the lowest beginning date and the highest end date for the full sentence term.
sentence_subset = sentence_computation.groupby(['INMATE_DOC_NUMBER', 'INMATE_COMMITMENT_PREFIX'])\
        .agg({'SENTENCE_BEGIN_DATE_(FOR_MAX)': min, 
              'SENTENCE_END': max,
              'PAROLE_SUPERVISION_BEGIN_DATE': min,
              'PAROLE_DISCHARGE_DATE': max
             }
            ).reset_index()

In [27]:
# Null end dates encode life sentence_computations, so I will set the sentence_end to 2230-1-1 (near top of pandas date range)
sentence_subset.loc[sentence_subset.SENTENCE_END.isnull(), "SENTENCE_END"] = pd.to_datetime('2230-1-1')

In [28]:
# Get the number of parole days, then drop the parole date columns
sentence_subset['PAROLE_DAYS'] = (sentence_subset['PAROLE_DISCHARGE_DATE'] - 
                                  sentence_subset['PAROLE_SUPERVISION_BEGIN_DATE']) / np.timedelta64(1, 'D')
sentence_subset.fillna({'PAROLE_DAYS':0}, inplace=True)

sentence_subset.drop(['PAROLE_SUPERVISION_BEGIN_DATE', 'PAROLE_DISCHARGE_DATE'], axis=1, inplace=True, errors='ignore')

The minimum supported version is 2.4.6



In [30]:
sentence_subset.shape

(868880, 5)

### Bringing In the Court Commitments Table

Now I will join the sentencing dates derived above to the courts data. The resulting table is called `admissions`.

In [31]:
admissions = sentence_subset.merge(court_commit, left_on = ["INMATE_DOC_NUMBER", "INMATE_COMMITMENT_PREFIX"],
                                     right_on=['OFFENDER_NC_DOC_ID_NUMBER','COMMITMENT_PREFIX'])
admissions.drop(['OFFENDER_NC_DOC_ID_NUMBER', 'COMMITMENT_PREFIX'], axis=1, inplace=True)

In [32]:
admissions.tail(3)

Unnamed: 0,INMATE_DOC_NUMBER,INMATE_COMMITMENT_PREFIX,SENTENCE_END,SENTENCE_BEGIN_DATE_(FOR_MAX),PAROLE_DAYS,OFFENDER_ADMISSION/INTAKE_DATE,NEW_PERIOD_OF_INCARCERATION_FL,P&P_COMMITMENT_STATUS_FLAG,NEW_PERIOD_OF_SUPERVISION_FLAG
867812,1567661,BA,2033-08-13,2018-01-23,0.0,2018-01-24,Y,NORMAL NORM,N
867813,1567662,BA,2018-05-22,2018-01-12,0.0,2018-01-24,Y,NORMAL NORM,N
867814,1567663,BA,2018-06-05,2018-01-18,0.0,2018-01-25,Y,NORMAL NORM,N


In [33]:
admissions.shape

(867815, 9)

### Bringing In the Inmates Table

Now I will add the information from the inmates table. This contains demographics and date of birth. I will use this to calculate age at release.

In [34]:
admissions = admissions.merge(inmates, on='INMATE_DOC_NUMBER')

In [35]:
admissions['AGE_AT_RELEASE'] = ((admissions['SENTENCE_END'] - admissions['INMATE_BIRTH_DATE']) 
                                / np.timedelta64(1, 'Y')).astype(int)

admissions.drop('INMATE_BIRTH_DATE', axis=1, inplace=True)

In [36]:
admissions.tail(3)

Unnamed: 0,INMATE_DOC_NUMBER,INMATE_COMMITMENT_PREFIX,SENTENCE_END,SENTENCE_BEGIN_DATE_(FOR_MAX),PAROLE_DAYS,OFFENDER_ADMISSION/INTAKE_DATE,NEW_PERIOD_OF_INCARCERATION_FL,P&P_COMMITMENT_STATUS_FLAG,NEW_PERIOD_OF_SUPERVISION_FLAG,INMATE_GENDER_CODE,INMATE_RACE_CODE,AGE_AT_RELEASE
857603,1523385,BA,2018-05-10,2016-10-05,0.0,2016-11-04,Y,NORMAL NORM,N,MALE,OTHER,28
857604,1523389,BA,2019-02-28,2016-11-07,0.0,2016-12-01,Y,NORMAL NORM,N,MALE,WHITE,34
857605,1523408,BA,2026-08-18,2016-11-07,0.0,2016-11-17,Y,NORMAL NORM,N,MALE,WHITE,49


In [37]:
admissions.shape

(857606, 12)

In [38]:
# Saving an intermediate table for easy loading
admissions.to_pickle('admissions.pkl')

In [5]:
# Load the above pickle
admissions = pd.read_pickle('admissions.pkl')

### Bringing In the Sentences Table

Now I will add the information from the sentences table. This contains the information most pertinent: the crime, punishment, etc. This new table will be called `sentence_info`. Recall that the `sentences` table contains all information for every component of each sentences.

In [16]:
sentence_info = sentences.merge(admissions, left_on=['OFFENDER_NC_DOC_ID_NUMBER','COMMITMENT_PREFIX'],
                                            right_on=["INMATE_DOC_NUMBER", "INMATE_COMMITMENT_PREFIX"])

sentence_info.drop(['OFFENDER_NC_DOC_ID_NUMBER', 'COMMITMENT_PREFIX'], axis=1, inplace=True)

In [17]:
sentence_info.columns

Index(['SENTENCE_COMPONENT_NUMBER', 'NUMBER_OF_COUNTS', 'PRIMARY_OFFENSE_CODE',
       'PRIMARY_FELONY/MISDEMEANOR_CD.', 'COUNTY_OF_CONVICTION_CODE',
       'PRIOR_RCD._POINTS/CONVICTIONS', 'MINIMUM_SENTENCE_LENGTH',
       'MAXIMUM_SENTENCE_LENGTH', 'LENGTH_OF_SUPERVISION',
       'SERVING_MIN_OR_MAX_TERM_CODE', 'SENTENCE_EFFECTIVE(BEGIN)_DATE',
       'INMATE_DOC_NUMBER', 'INMATE_COMMITMENT_PREFIX', 'SENTENCE_END',
       'SENTENCE_BEGIN_DATE_(FOR_MAX)', 'PAROLE_DAYS',
       'OFFENDER_ADMISSION/INTAKE_DATE', 'NEW_PERIOD_OF_INCARCERATION_FL',
       'P&P_COMMITMENT_STATUS_FLAG', 'NEW_PERIOD_OF_SUPERVISION_FLAG',
       'INMATE_GENDER_CODE', 'INMATE_RACE_CODE', 'AGE_AT_RELEASE'],
      dtype='object')

 We want to turn the categorical variables into counts/percentages.

For PRIMARY_OFFENSE_CODE and COUNTY_OF_CONVICTION_CODE, I will reduce the number of categories by grouping together anything representing less than 0.1% and 0.5% of the offenses and counties respectively into "OTHER". This gives us 125 offense groups and 58 county groups.

In [18]:
# Offense grouping
offense_percentages = sentence_info["PRIMARY_OFFENSE_CODE"].value_counts(normalize=True, dropna=False)
offense_groupings = {i:i for i in sentence_info["PRIMARY_OFFENSE_CODE"].unique()}

for i, j in offense_percentages[offense_percentages < 0.001].iteritems():
    offense_groupings[i] = "OTHER"

The minimum supported version is 2.4.6



In [19]:
# County grouping
county_percentages = sentence_info["COUNTY_OF_CONVICTION_CODE"].value_counts(normalize=True, dropna=False)
county_groupings = {i:i for i in sentence_info["COUNTY_OF_CONVICTION_CODE"].unique()}

for i, j in county_percentages[county_percentages < 0.005].iteritems():
    county_groupings[i] = "OTHER"

In [20]:
sentence_info["PRIMARY_OFFENSE_CODE"] = [offense_groupings[j] for i,j in sentence_info['PRIMARY_OFFENSE_CODE'].iteritems()]

In [21]:
sentence_info["COUNTY_OF_CONVICTION_CODE"] = [county_groupings[j] for i,j in sentence_info['COUNTY_OF_CONVICTION_CODE'].iteritems()]

Getting count of rows for each commitment, to normalize some of the variables below. 

In [22]:
num_distinct_rows = sentence_info.groupby(['INMATE_DOC_NUMBER', 'INMATE_COMMITMENT_PREFIX'])['NUMBER_OF_COUNTS'].count()

Getting total number of counts per commitment, percentages by county groups defined above, percentages by offense groups defined above, percentages by misdemeanor/felony, percentages by the flag 'serving min or max sentence', and the means of the minimum sentence lengths, maximum sentence lengths, and length of supervision.

In [23]:
counts = sentence_info.groupby(['INMATE_DOC_NUMBER', 'INMATE_COMMITMENT_PREFIX'])['NUMBER_OF_COUNTS'].sum().to_frame()
counts.shape

(847475, 1)

In [24]:
counties = sentence_info.groupby(['INMATE_DOC_NUMBER', 'INMATE_COMMITMENT_PREFIX', 
                                  'COUNTY_OF_CONVICTION_CODE']).size().unstack(fill_value=0)

counties.columns = ["COUNTY_" + str(i) for i in counties.columns]
for col in counties.columns:
    counties[col] = counties[col]/num_distinct_rows
    
counties.shape

(847475, 58)

In [25]:
offenses = sentence_info.groupby(['INMATE_DOC_NUMBER', 'INMATE_COMMITMENT_PREFIX', 
                                  'PRIMARY_OFFENSE_CODE']).size().unstack(fill_value=0)

offenses.columns = ["OFFENSE_" + str(i) for i in offenses.columns]
for col in offenses.columns:
    offenses[col] = offenses[col]/num_distinct_rows
    
offenses.shape

(847475, 125)

In [26]:
misd_felon = sentence_info.groupby(['INMATE_DOC_NUMBER', 'INMATE_COMMITMENT_PREFIX', 
                                    'PRIMARY_FELONY/MISDEMEANOR_CD.']).size().unstack(fill_value=0)

for col in misd_felon.columns:
    misd_felon[col] = misd_felon[col]/num_distinct_rows
    
misd_felon.shape

(847475, 2)

In [27]:
min_max = sentence_info.groupby(['INMATE_DOC_NUMBER', 'INMATE_COMMITMENT_PREFIX', 
                                 'SERVING_MIN_OR_MAX_TERM_CODE']).size().unstack(fill_value=0)

for col in min_max.columns:
    min_max[col] = min_max[col]/num_distinct_rows
    
min_max.shape

(847475, 2)

In [28]:
sentence_info['MAXIMUM_SENTENCE_LENGTH'] = pd.to_numeric(sentence_info['MAXIMUM_SENTENCE_LENGTH'], errors='coerce')
sentence_info.fillna({'MAXIMUM_SENTENCE_LENGTH': 0, 'MINIMUM_SENTENCE_LENGTH': 0, 
                      'LENGTH_OF_SUPERVISION':0}, inplace=True)

others = sentence_info.groupby(['INMATE_DOC_NUMBER', 'INMATE_COMMITMENT_PREFIX'])\
        ['MINIMUM_SENTENCE_LENGTH', 'MAXIMUM_SENTENCE_LENGTH', 
         'LENGTH_OF_SUPERVISION', 'PRIOR_RCD._POINTS/CONVICTIONS'].mean()
    
others.shape

(847475, 4)

In [29]:
all_sentence_vars = pd.concat([counts, counties, offenses, misd_felon, min_max, others], axis=1).reset_index()

Now we want to rejoin these columns onto the admissions data.

In [30]:
sentence_and_admissions = admissions.merge(all_sentence_vars, on=["INMATE_DOC_NUMBER", "INMATE_COMMITMENT_PREFIX"])

In [5]:
sentence_and_admissions.head(3)

Unnamed: 0,INMATE_DOC_NUMBER,INMATE_COMMITMENT_PREFIX,SENTENCE_END,SENTENCE_BEGIN_DATE_(FOR_MAX),PAROLE_DAYS,OFFENDER_ADMISSION/INTAKE_DATE,NEW_PERIOD_OF_INCARCERATION_FL,P&P_COMMITMENT_STATUS_FLAG,NEW_PERIOD_OF_SUPERVISION_FLAG,INMATE_GENDER_CODE,INMATE_RACE_CODE,AGE_AT_RELEASE,NUMBER_OF_COUNTS,COUNTY_ALAMANCE,COUNTY_BEAUFORT,COUNTY_BRUNSWICK,COUNTY_BUNCOMBE,COUNTY_BURKE,COUNTY_CABARRUS,COUNTY_CALDWELL,COUNTY_CARTERET,COUNTY_CATAWBA,COUNTY_CLEVELAND,COUNTY_COLUMBUS,COUNTY_CRAVEN,COUNTY_CUMBERLAND,COUNTY_DAVIDSON,COUNTY_DUPLIN,COUNTY_DURHAM,COUNTY_EDGECOMBE,COUNTY_FORSYTH,COUNTY_FRANKLIN,COUNTY_GASTON,COUNTY_GRANVILLE,COUNTY_GUILFORD,COUNTY_HALIFAX,COUNTY_HARNETT,COUNTY_HAYWOOD,COUNTY_HENDERSON,COUNTY_HOKE,COUNTY_IREDELL,COUNTY_JOHNSTON,COUNTY_LEE,COUNTY_LENOIR,COUNTY_LINCOLN,COUNTY_MCDOWELL,COUNTY_MECKLENBURG,COUNTY_MOORE,COUNTY_NASH,COUNTY_NEW HANOVER,COUNTY_ONSLOW,COUNTY_ORANGE,COUNTY_OTHER,COUNTY_PERSON,COUNTY_PITT,COUNTY_RANDOLPH,COUNTY_RICHMOND,COUNTY_ROBESON,COUNTY_ROCKINGHAM,COUNTY_ROWAN,COUNTY_RUTHERFORD,COUNTY_SAMPSON,COUNTY_SCOTLAND,COUNTY_STANLY,COUNTY_SURRY,COUNTY_UNION,COUNTY_VANCE,COUNTY_WAKE,COUNTY_WAYNE,COUNTY_WILKES,COUNTY_WILSON,OFFENSE_3M REIMPRISON VIOL,OFFENSE_ABANDONMENT,OFFENSE_ARMED ROBBERY,OFFENSE_ASSAULT,OFFENSE_ASSAULT BY STRANGULATION,OFFENSE_ASSAULT INFLICT SERI BODY INJ,OFFENSE_ASSAULT ISI,OFFENSE_ASSAULT ON FEMALE,OFFENSE_ASSAULT ON OFFICER/ST EMPLOYEE,OFFENSE_ASSAULT ON POLICEMAN,OFFENSE_ASSAULT ON PUBLIC OFFICIAL,OFFENSE_AWDW,OFFENSE_AWDW GOV OFFICERS/EMPLOYEES,OFFENSE_AWDWISI,OFFENSE_AWDWWITK,OFFENSE_AWDWWITKISI,OFFENSE_B & E & L,OFFENSE_B & E VEHICLES,OFFENSE_BURGLARY 1ST DEGREE,OFFENSE_BURGLARY 2ND DEGREE,OFFENSE_CARRY CONCEALED WEAPON,OFFENSE_CHEAT - PROPERTY/SERVICES,OFFENSE_CHILD ABUSE,OFFENSE_COMMON LAW FORGERY,OFFENSE_COMMON LAW ROBBERY,OFFENSE_COMMUNICATING THREATS,OFFENSE_CREDIT CARD THEFT,OFFENSE_CRIME AGAINST NATURE,OFFENSE_DAMAGE TO PROPERTY,OFFENSE_DEL/SELL SCHEDULE II,OFFENSE_DISCHG FIREARM-OCC PROPERTY,OFFENSE_DISORDERLY CONDUCT,OFFENSE_DRIV LICENSE PERM RVK,OFFENSE_DRIV LICENSE REVOKED,OFFENSE_DRIVING UNDER INFLUENCE (DUI),OFFENSE_DRUG PARA - USE/POSSESS,OFFENSE_DRUNK & DISORDERLY,OFFENSE_DWI DRIVING WHILE IMPAIRED,OFFENSE_DWI LEVEL 1,OFFENSE_DWI LEVEL 1 AGGRAVATED,OFFENSE_DWI LEVEL 2,OFFENSE_DWI LEVEL 3,OFFENSE_DWI LEVEL 4,OFFENSE_DWI LEVEL 5,OFFENSE_EMBEZZLEMENT,OFFENSE_EMPLOY SEC VIOLATION,OFFENSE_ESCAPE PRISON,OFFENSE_FAIL TO REGISTER (SEX OFFENDER,OFFENSE_FAILURE TO STOP FOR ACCIDENT,OFFENSE_FELONY B&E,OFFENSE_FINANCIAL CARD FRAUD/MISD,OFFENSE_FORGERY,OFFENSE_FORGERY AND UTTERING,OFFENSE_HABITUAL FELON,OFFENSE_HABITUAL IMPAIRED DRIVING,OFFENSE_HABITUAL MISDEMEANOR ASSAULT,OFFENSE_HIT AND RUN,OFFENSE_IDENTITY FRAUD/THEFT,OFFENSE_INDECENT LIBERTY W/CHILD,OFFENSE_INVOLUNTARY MANSLAUGHTER,OFFENSE_KIDNAPPING 1ST DEGREE,OFFENSE_KIDNAPPING 2ND DEGREE,OFFENSE_LARCENY,OFFENSE_LARCENY (OVER $200),OFFENSE_LARCENY AFTER B & E,OFFENSE_LARCENY AND RECEIVING,OFFENSE_LARCENY BY SERVANT/EMPLOYEE,OFFENSE_LARCENY FROM PERSON,OFFENSE_LARCENY OF FIREARMS,OFFENSE_LARCENY OF MOTOR VEHICLE,OFFENSE_LARCENY OVER $1000,OFFENSE_MAINT ANY PLACE CONTR SUB,OFFENSE_MANSLAUGHTER,OFFENSE_MISD B&E,OFFENSE_MURDER FIRST DEGREE,OFFENSE_MURDER SECOND DEGREE,OFFENSE_NON-SUPPORT,OFFENSE_OBSTRUCTING JUSTICE,OFFENSE_OBT PROP BY FALSE PR/CHTS/SER,OFFENSE_OBTAIN CONTR SUBST BY FRAUD,OFFENSE_OPERATE VEHICLE W/O LICENSE,OFFENSE_OTHER,OFFENSE_OTHER MISDEMEANANT,OFFENSE_POSSESS SCHEDULE I,OFFENSE_POSSESS SCHEDULE II,OFFENSE_POSSESS SCHEDULE VI,OFFENSE_POSSESS WIT SELL CONTROL SUBST,OFFENSE_POSSESS WITS SCHEDULE I,OFFENSE_POSSESS WITS SCHEDULE II,OFFENSE_POSSESS WITS SCHEDULE VI,OFFENSE_POSSESSING STOLEN GOODS,OFFENSE_POSSESSION OF FIREARM BY FELON,OFFENSE_POST RELEASE REVOCATION,OFFENSE_RAPE FIRST DEGREE,OFFENSE_RAPE SECOND DEGREE,OFFENSE_RECEIVING STOLEN GOODS,OFFENSE_RECEIVING STOLEN VEHICLE,OFFENSE_RECKLESS DRIVING,OFFENSE_RESISTING OFFICER,OFFENSE_ROBBERY W/DANGEROUS WEAPON,OFFENSE_SECOND DEGREE TRESPASS,OFFENSE_SELL CONTROL SUBSTANCE,OFFENSE_SELL SCHEDULE I,OFFENSE_SELL SCHEDULE II,OFFENSE_SELL SCHEDULE VI,OFFENSE_SEXUAL OFFENSE 1ST DEGREE,OFFENSE_SEXUAL OFFENSE 2ND DEGREE,OFFENSE_SHOPLIFTING,OFFENSE_SIMPLE ASSAULT/AFFRAY,OFFENSE_SPEED ELUDE ARREST/ATTEMPT/SEC,OFFENSE_SPEEDING,OFFENSE_SPEEDING ELUDE ARREST OR/ATTEM,OFFENSE_TRAFFICKING SCHEDULE I,OFFENSE_TRAFFICKING SCHEDULE II,OFFENSE_TRESPASS,OFFENSE_UNAUTH USE MOTOR CONVEYANCE,OFFENSE_UTTERING FORGEDPAPER/INST/END,OFFENSE_VIOLATE REGULATE CONTROL SUBST,OFFENSE_VIOLATE VEHICLE REGISTRATION,OFFENSE_VIOLATION CONT SUB PENAL INST,OFFENSE_VIOLATION DRUG LAWS,OFFENSE_VIOLATION PROTECTIVE ORDER,OFFENSE_WANTON INJ PER/PROP GT $200,OFFENSE_WILL/WANT INJ REAL PROPERTY,OFFENSE_WORTHLESS CHECK,FELON,MISD.,MAX.TERM:,MIN.TERM:,MINIMUM_SENTENCE_LENGTH,MAXIMUM_SENTENCE_LENGTH,LENGTH_OF_SUPERVISION,PRIOR_RCD._POINTS/CONVICTIONS,SENTENCE_EFFECTIVE(BEGIN)_DATE
0,4,AA,1984-07-11,1983-07-12,0.0,1983-07-13,Y,NORMAL NORM,,MALE,WHITE,22,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,30000.0,0.0,0.0,1983-07-12
1,6,AA,1973-03-28,1973-01-30,0.0,1973-01-30,Y,,,MALE,WHITE,21,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,60.0,90.0,0.0,0.0,1973-01-30
2,6,AB,1975-08-18,1973-04-11,0.0,1973-04-15,Y,NORMAL NORM,,MALE,WHITE,24,27,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,30.0,0.0,0.0,0.0,1973-04-11


In [44]:
sentence_and_admissions.to_pickle('sentence_and_admissions.pkl')

In [4]:
# sentence_and_admissions = pd.read_pickle('sentence_and_admissions.pkl')

#### Finalizing Start Dates
If `SENTENCE_BEGIN_DATE_(FOR_MAX)` from the sentence computation table is missing, replace it with `SENTENCE_EFFECTIVE(BEGIN)_DATE` from the sentences table and then `OFFENDER_ADMISSION/INTAKE_DATE` from the court commitment table. If all are null, drop that example.

In [32]:
sentences_table_dates = sentences.groupby(['OFFENDER_NC_DOC_ID_NUMBER', 'COMMITMENT_PREFIX'])\
                          ['SENTENCE_EFFECTIVE(BEGIN)_DATE'].min().to_frame().reset_index()

In [47]:
sentence_and_admissions = sentence_and_admissions.merge(sentences_table_dates, left_on=['INMATE_DOC_NUMBER', 'INMATE_COMMITMENT_PREFIX'],
                                                 right_on=['OFFENDER_NC_DOC_ID_NUMBER', 'COMMITMENT_PREFIX'])

sentence_and_admissions.drop(['OFFENDER_NC_DOC_ID_NUMBER', 'COMMITMENT_PREFIX'], axis=1, inplace=True)

In [89]:
sentence_and_admissions['SENTENCE_START'] = sentence_and_admissions['SENTENCE_BEGIN_DATE_(FOR_MAX)']\
                                .fillna(sentence_and_admissions['SENTENCE_EFFECTIVE(BEGIN)_DATE'])\
                                .fillna(sentence_and_admissions['OFFENDER_ADMISSION/INTAKE_DATE'])                            

In [92]:
clean_dates = sentence_and_admissions[sentence_and_admissions['SENTENCE_START'].notnull()]

In [93]:
clean_dates.drop(['SENTENCE_BEGIN_DATE_(FOR_MAX)', 'SENTENCE_EFFECTIVE(BEGIN)_DATE', 'OFFENDER_ADMISSION/INTAKE_DATE'], 
                axis=1, inplace=True)

In [3]:
clean_dates.head(3)

Unnamed: 0,INMATE_DOC_NUMBER,INMATE_COMMITMENT_PREFIX,SENTENCE_END,PAROLE_DAYS,NEW_PERIOD_OF_INCARCERATION_FL,P&P_COMMITMENT_STATUS_FLAG,NEW_PERIOD_OF_SUPERVISION_FLAG,INMATE_GENDER_CODE,INMATE_RACE_CODE,AGE_AT_RELEASE,NUMBER_OF_COUNTS,COUNTY_ALAMANCE,COUNTY_BEAUFORT,COUNTY_BRUNSWICK,COUNTY_BUNCOMBE,COUNTY_BURKE,COUNTY_CABARRUS,COUNTY_CALDWELL,COUNTY_CARTERET,COUNTY_CATAWBA,COUNTY_CLEVELAND,COUNTY_COLUMBUS,COUNTY_CRAVEN,COUNTY_CUMBERLAND,COUNTY_DAVIDSON,COUNTY_DUPLIN,COUNTY_DURHAM,COUNTY_EDGECOMBE,COUNTY_FORSYTH,COUNTY_FRANKLIN,COUNTY_GASTON,COUNTY_GRANVILLE,COUNTY_GUILFORD,COUNTY_HALIFAX,COUNTY_HARNETT,COUNTY_HAYWOOD,COUNTY_HENDERSON,COUNTY_HOKE,COUNTY_IREDELL,COUNTY_JOHNSTON,COUNTY_LEE,COUNTY_LENOIR,COUNTY_LINCOLN,COUNTY_MCDOWELL,COUNTY_MECKLENBURG,COUNTY_MOORE,COUNTY_NASH,COUNTY_NEW HANOVER,COUNTY_ONSLOW,COUNTY_ORANGE,COUNTY_OTHER,COUNTY_PERSON,COUNTY_PITT,COUNTY_RANDOLPH,COUNTY_RICHMOND,COUNTY_ROBESON,COUNTY_ROCKINGHAM,COUNTY_ROWAN,COUNTY_RUTHERFORD,COUNTY_SAMPSON,COUNTY_SCOTLAND,COUNTY_STANLY,COUNTY_SURRY,COUNTY_UNION,COUNTY_VANCE,COUNTY_WAKE,COUNTY_WAYNE,COUNTY_WILKES,COUNTY_WILSON,OFFENSE_3M REIMPRISON VIOL,OFFENSE_ABANDONMENT,OFFENSE_ARMED ROBBERY,OFFENSE_ASSAULT,OFFENSE_ASSAULT BY STRANGULATION,OFFENSE_ASSAULT INFLICT SERI BODY INJ,OFFENSE_ASSAULT ISI,OFFENSE_ASSAULT ON FEMALE,OFFENSE_ASSAULT ON OFFICER/ST EMPLOYEE,OFFENSE_ASSAULT ON POLICEMAN,OFFENSE_ASSAULT ON PUBLIC OFFICIAL,OFFENSE_AWDW,OFFENSE_AWDW GOV OFFICERS/EMPLOYEES,OFFENSE_AWDWISI,OFFENSE_AWDWWITK,OFFENSE_AWDWWITKISI,OFFENSE_B & E & L,OFFENSE_B & E VEHICLES,OFFENSE_BURGLARY 1ST DEGREE,OFFENSE_BURGLARY 2ND DEGREE,OFFENSE_CARRY CONCEALED WEAPON,OFFENSE_CHEAT - PROPERTY/SERVICES,OFFENSE_CHILD ABUSE,OFFENSE_COMMON LAW FORGERY,OFFENSE_COMMON LAW ROBBERY,OFFENSE_COMMUNICATING THREATS,OFFENSE_CREDIT CARD THEFT,OFFENSE_CRIME AGAINST NATURE,OFFENSE_DAMAGE TO PROPERTY,OFFENSE_DEL/SELL SCHEDULE II,OFFENSE_DISCHG FIREARM-OCC PROPERTY,OFFENSE_DISORDERLY CONDUCT,OFFENSE_DRIV LICENSE PERM RVK,OFFENSE_DRIV LICENSE REVOKED,OFFENSE_DRIVING UNDER INFLUENCE (DUI),OFFENSE_DRUG PARA - USE/POSSESS,OFFENSE_DRUNK & DISORDERLY,OFFENSE_DWI DRIVING WHILE IMPAIRED,OFFENSE_DWI LEVEL 1,OFFENSE_DWI LEVEL 1 AGGRAVATED,OFFENSE_DWI LEVEL 2,OFFENSE_DWI LEVEL 3,OFFENSE_DWI LEVEL 4,OFFENSE_DWI LEVEL 5,OFFENSE_EMBEZZLEMENT,OFFENSE_EMPLOY SEC VIOLATION,OFFENSE_ESCAPE PRISON,OFFENSE_FAIL TO REGISTER (SEX OFFENDER,OFFENSE_FAILURE TO STOP FOR ACCIDENT,OFFENSE_FELONY B&E,OFFENSE_FINANCIAL CARD FRAUD/MISD,OFFENSE_FORGERY,OFFENSE_FORGERY AND UTTERING,OFFENSE_HABITUAL FELON,OFFENSE_HABITUAL IMPAIRED DRIVING,OFFENSE_HABITUAL MISDEMEANOR ASSAULT,OFFENSE_HIT AND RUN,OFFENSE_IDENTITY FRAUD/THEFT,OFFENSE_INDECENT LIBERTY W/CHILD,OFFENSE_INVOLUNTARY MANSLAUGHTER,OFFENSE_KIDNAPPING 1ST DEGREE,OFFENSE_KIDNAPPING 2ND DEGREE,OFFENSE_LARCENY,OFFENSE_LARCENY (OVER $200),OFFENSE_LARCENY AFTER B & E,OFFENSE_LARCENY AND RECEIVING,OFFENSE_LARCENY BY SERVANT/EMPLOYEE,OFFENSE_LARCENY FROM PERSON,OFFENSE_LARCENY OF FIREARMS,OFFENSE_LARCENY OF MOTOR VEHICLE,OFFENSE_LARCENY OVER $1000,OFFENSE_MAINT ANY PLACE CONTR SUB,OFFENSE_MANSLAUGHTER,OFFENSE_MISD B&E,OFFENSE_MURDER FIRST DEGREE,OFFENSE_MURDER SECOND DEGREE,OFFENSE_NON-SUPPORT,OFFENSE_OBSTRUCTING JUSTICE,OFFENSE_OBT PROP BY FALSE PR/CHTS/SER,OFFENSE_OBTAIN CONTR SUBST BY FRAUD,OFFENSE_OPERATE VEHICLE W/O LICENSE,OFFENSE_OTHER,OFFENSE_OTHER MISDEMEANANT,OFFENSE_POSSESS SCHEDULE I,OFFENSE_POSSESS SCHEDULE II,OFFENSE_POSSESS SCHEDULE VI,OFFENSE_POSSESS WIT SELL CONTROL SUBST,OFFENSE_POSSESS WITS SCHEDULE I,OFFENSE_POSSESS WITS SCHEDULE II,OFFENSE_POSSESS WITS SCHEDULE VI,OFFENSE_POSSESSING STOLEN GOODS,OFFENSE_POSSESSION OF FIREARM BY FELON,OFFENSE_POST RELEASE REVOCATION,OFFENSE_RAPE FIRST DEGREE,OFFENSE_RAPE SECOND DEGREE,OFFENSE_RECEIVING STOLEN GOODS,OFFENSE_RECEIVING STOLEN VEHICLE,OFFENSE_RECKLESS DRIVING,OFFENSE_RESISTING OFFICER,OFFENSE_ROBBERY W/DANGEROUS WEAPON,OFFENSE_SECOND DEGREE TRESPASS,OFFENSE_SELL CONTROL SUBSTANCE,OFFENSE_SELL SCHEDULE I,OFFENSE_SELL SCHEDULE II,OFFENSE_SELL SCHEDULE VI,OFFENSE_SEXUAL OFFENSE 1ST DEGREE,OFFENSE_SEXUAL OFFENSE 2ND DEGREE,OFFENSE_SHOPLIFTING,OFFENSE_SIMPLE ASSAULT/AFFRAY,OFFENSE_SPEED ELUDE ARREST/ATTEMPT/SEC,OFFENSE_SPEEDING,OFFENSE_SPEEDING ELUDE ARREST OR/ATTEM,OFFENSE_TRAFFICKING SCHEDULE I,OFFENSE_TRAFFICKING SCHEDULE II,OFFENSE_TRESPASS,OFFENSE_UNAUTH USE MOTOR CONVEYANCE,OFFENSE_UTTERING FORGEDPAPER/INST/END,OFFENSE_VIOLATE REGULATE CONTROL SUBST,OFFENSE_VIOLATE VEHICLE REGISTRATION,OFFENSE_VIOLATION CONT SUB PENAL INST,OFFENSE_VIOLATION DRUG LAWS,OFFENSE_VIOLATION PROTECTIVE ORDER,OFFENSE_WANTON INJ PER/PROP GT $200,OFFENSE_WILL/WANT INJ REAL PROPERTY,OFFENSE_WORTHLESS CHECK,FELON,MISD.,MAX.TERM:,MIN.TERM:,MINIMUM_SENTENCE_LENGTH,MAXIMUM_SENTENCE_LENGTH,LENGTH_OF_SUPERVISION,PRIOR_RCD._POINTS/CONVICTIONS,SENTENCE_START
0,4,AA,1984-07-11,0.0,Y,NORMAL NORM,,MALE,WHITE,22,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,30000.0,0.0,0.0,1983-07-12
1,6,AA,1973-03-28,0.0,Y,,,MALE,WHITE,21,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,60.0,90.0,0.0,0.0,1973-01-30
2,6,AB,1975-08-18,0.0,Y,NORMAL NORM,,MALE,WHITE,24,27,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,30.0,0.0,0.0,0.0,1973-04-11


In [94]:
clean_dates.to_pickle('clean_dates.pkl')

In [2]:
# clean_dates = pd.read_pickle('clean_dates.pkl')

### Bringing In Disciplinary Data

Now I will bring in information about disciplinary actions. I will once again aggregate all the infractions that occur less than 0.1% of the time into the category "OTHER", giving us 54 infraction categories overall. 

In [8]:
just_dates = clean_dates[['INMATE_DOC_NUMBER', 'INMATE_COMMITMENT_PREFIX', 'SENTENCE_START', 'SENTENCE_END']]

In [9]:
# Infractions grouping
infraction_percentages = discipline["DISCIPLINARY_INFRACTION_CODE"].value_counts(normalize=True, dropna=False)
infraction_groupings = {i:i for i in discipline["DISCIPLINARY_INFRACTION_CODE"].unique()}

for i, j in infraction_percentages[infraction_percentages < 0.001].iteritems():
    infraction_groupings[i] = "OTHER"

The minimum supported version is 2.4.6



In [10]:
discipline["DISCIPLINARY_INFRACTION"] = [infraction_groupings[j] for i,j in discipline["DISCIPLINARY_INFRACTION_CODE"].iteritems()]

In [11]:
dates_and_infractions = just_dates.merge(discipline, on='INMATE_DOC_NUMBER')

In [12]:
dates_and_infractions.shape

(9960217, 8)

In [13]:
dates_and_infractions = dates_and_infractions[
    (dates_and_infractions['DISCIPLINARY_INFRACTION_DATE'] >= dates_and_infractions['SENTENCE_START']) &
    (dates_and_infractions['DISCIPLINARY_INFRACTION_DATE'] <= dates_and_infractions['SENTENCE_END'])
]

In [14]:
dates_and_infractions.shape

(3067979, 8)

In [15]:
basic_vars = dates_and_infractions.groupby(
        ['INMATE_DOC_NUMBER', 'INMATE_COMMITMENT_PREFIX'])\
        .agg({'DISCI._SEGREGATION_TIME_(DAYS)':'sum', 'DISCIPLINARY_INFRACTION_CODE':'count'})
    
basic_vars.rename({'DISCIPLINARY_INFRACTION_CODE': 'INFRAC_COUNT'}, axis=1, inplace=True)

In [16]:
infractions = dates_and_infractions.groupby(['INMATE_DOC_NUMBER', 'INMATE_COMMITMENT_PREFIX', 
                                  'DISCIPLINARY_INFRACTION']).size().unstack(fill_value=0)

infractions.columns = ["INFRAC_" + str(i) for i in infractions.columns]
for col in infractions.columns:
    infractions[col] = infractions[col]/basic_vars['INFRAC_COUNT']
    
infractions.shape

(348546, 54)

In [17]:
all_infractions_data = pd.concat([basic_vars, infractions], axis=1)
all_infractions_data = all_infractions_data.reset_index()

In [18]:
all_infractions_data.head()

Unnamed: 0,INMATE_DOC_NUMBER,INMATE_COMMITMENT_PREFIX,COUNT,DISCI._SEGREGATION_TIME_(DAYS),INFRAC_ACTIVE RIOTER,INFRAC_ASSAULT PERSON W/WEAPON,INFRAC_ASSAULT STAFF W/WEAPON,INFRAC_ASSAULT STAFF/THROWING LIQUIDS,INFRAC_ASSLT STAFF W/UNLIKELY INJ,INFRAC_ATTEMPT CLASS A OFFENSE,INFRAC_ATTEMPT CLASS B OFFENSE,INFRAC_ATTEMPT CLASS C OFFENSE,INFRAC_ATTEMPT CLASS D OFFENSE,INFRAC_BARTER/TRADE/LOAN MONEY,INFRAC_CREATE OFFENSIVE CONDITION,INFRAC_DAMAGE STATE/ANOTHERS PROPERTY,INFRAC_DISOBEY ORDER,INFRAC_ESCAPE,INFRAC_FAKE ILLNESS,INFRAC_FALSE ALLEGATIONS ON STAFF,INFRAC_FIGHT W/WEAPON OR REQ.OUT.MED,INFRAC_FIGHTING,INFRAC_FLOOD CELL,INFRAC_GAMBLING,INFRAC_HIGH RISK ACT,INFRAC_ILLEGAL CLOTH/LINEN/SHEETS,INFRAC_INTERFERE W/STAFF,INFRAC_INVOLVEMENT W/GANG OR SRG,INFRAC_LEAVE\QUIT COMM BASED PROGRAM,INFRAC_LOCK TAMPERING,INFRAC_MISUSE MEDICINE,INFRAC_MISUSE SUPPLIES,INFRAC_MISUSE/UNAUTH-USE PHONE/MAIL,INFRAC_NEGLIGENTLY PERFORM DUTIES,INFRAC_NO THREAT CONTRABAND,INFRAC_OFFER/ACCEPT BRIBE STAFF,INFRAC_OTHER,INFRAC_POSS AUDIO/VIDEO/IMAGE DEVICE,INFRAC_POSS MONEY/UNAUTHORIZED FUNDS,INFRAC_POSSESS EXCESS STAMPS,INFRAC_PROFANE LANGUAGE,INFRAC_PROPERTY TAMPERING,INFRAC_PROVOKE ASSAULT,INFRAC_REFUSE SUBMIT/DRUG/BREATH TEST,INFRAC_SELF INJURY,INFRAC_SELL/MISUSE MEDICATION,INFRAC_SET A FIRE,INFRAC_SEXUAL ACT,INFRAC_SUBSTANCE POSSESSION,INFRAC_THEFT OF PROPERTY,INFRAC_THREATEN TO HARM/INJURE STAFF,INFRAC_UNAUTHORIZED FUNDS,INFRAC_UNAUTHORIZED LEAVE,INFRAC_UNAUTHORIZED LOCATION,INFRAC_UNAUTHORIZED TOBACCO USE,INFRAC_UNKEMPT ROOM,INFRAC_VERBAL THREAT,INFRAC_WEAPON POSSESSION
0,10,AA,11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.545455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909
1,10,AB,15,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.466667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.066667,0.133333,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667
2,26,AA,3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.666667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,31,AA,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,33,AD,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [19]:
# Left join disciplinary data onto all other data to include those without any disciplinary actions
all_tables_in = clean_dates.merge(all_infractions_data, on=['INMATE_DOC_NUMBER', 'INMATE_COMMITMENT_PREFIX'],
                                 how='left')

In [20]:
all_tables_in[all_infractions_data.columns] = all_tables_in[all_infractions_data.columns].fillna(value=0)

In [25]:
all_tables_in.to_pickle('all_tables_in.pkl')

In [29]:
# all_tables_in = pd.read_pickle('all_tables_in.pkl')

### Creating Final Dummy Variables

In [27]:
dummy_var_cols = ['NEW_PERIOD_OF_INCARCERATION_FL', 'P&P_COMMITMENT_STATUS_FLAG', 'NEW_PERIOD_OF_SUPERVISION_FLAG',
                 'INMATE_GENDER_CODE', 'INMATE_RACE_CODE']

In [32]:
with_indicators = pd.get_dummies(all_tables_in, columns=dummy_var_cols, drop_first=True, dummy_na=True)

In [36]:
with_indicators.head()

Unnamed: 0,INMATE_DOC_NUMBER,INMATE_COMMITMENT_PREFIX,SENTENCE_END,PAROLE_DAYS,AGE_AT_RELEASE,NUMBER_OF_COUNTS,COUNTY_ALAMANCE,COUNTY_BEAUFORT,COUNTY_BRUNSWICK,COUNTY_BUNCOMBE,COUNTY_BURKE,COUNTY_CABARRUS,COUNTY_CALDWELL,COUNTY_CARTERET,COUNTY_CATAWBA,COUNTY_CLEVELAND,COUNTY_COLUMBUS,COUNTY_CRAVEN,COUNTY_CUMBERLAND,COUNTY_DAVIDSON,COUNTY_DUPLIN,COUNTY_DURHAM,COUNTY_EDGECOMBE,COUNTY_FORSYTH,COUNTY_FRANKLIN,COUNTY_GASTON,COUNTY_GRANVILLE,COUNTY_GUILFORD,COUNTY_HALIFAX,COUNTY_HARNETT,COUNTY_HAYWOOD,COUNTY_HENDERSON,COUNTY_HOKE,COUNTY_IREDELL,COUNTY_JOHNSTON,COUNTY_LEE,COUNTY_LENOIR,COUNTY_LINCOLN,COUNTY_MCDOWELL,COUNTY_MECKLENBURG,COUNTY_MOORE,COUNTY_NASH,COUNTY_NEW HANOVER,COUNTY_ONSLOW,COUNTY_ORANGE,COUNTY_OTHER,COUNTY_PERSON,COUNTY_PITT,COUNTY_RANDOLPH,COUNTY_RICHMOND,COUNTY_ROBESON,COUNTY_ROCKINGHAM,COUNTY_ROWAN,COUNTY_RUTHERFORD,COUNTY_SAMPSON,COUNTY_SCOTLAND,COUNTY_STANLY,COUNTY_SURRY,COUNTY_UNION,COUNTY_VANCE,COUNTY_WAKE,COUNTY_WAYNE,COUNTY_WILKES,COUNTY_WILSON,OFFENSE_3M REIMPRISON VIOL,OFFENSE_ABANDONMENT,OFFENSE_ARMED ROBBERY,OFFENSE_ASSAULT,OFFENSE_ASSAULT BY STRANGULATION,OFFENSE_ASSAULT INFLICT SERI BODY INJ,OFFENSE_ASSAULT ISI,OFFENSE_ASSAULT ON FEMALE,OFFENSE_ASSAULT ON OFFICER/ST EMPLOYEE,OFFENSE_ASSAULT ON POLICEMAN,OFFENSE_ASSAULT ON PUBLIC OFFICIAL,OFFENSE_AWDW,OFFENSE_AWDW GOV OFFICERS/EMPLOYEES,OFFENSE_AWDWISI,OFFENSE_AWDWWITK,OFFENSE_AWDWWITKISI,OFFENSE_B & E & L,OFFENSE_B & E VEHICLES,OFFENSE_BURGLARY 1ST DEGREE,OFFENSE_BURGLARY 2ND DEGREE,OFFENSE_CARRY CONCEALED WEAPON,OFFENSE_CHEAT - PROPERTY/SERVICES,OFFENSE_CHILD ABUSE,OFFENSE_COMMON LAW FORGERY,OFFENSE_COMMON LAW ROBBERY,OFFENSE_COMMUNICATING THREATS,OFFENSE_CREDIT CARD THEFT,OFFENSE_CRIME AGAINST NATURE,OFFENSE_DAMAGE TO PROPERTY,OFFENSE_DEL/SELL SCHEDULE II,OFFENSE_DISCHG FIREARM-OCC PROPERTY,OFFENSE_DISORDERLY CONDUCT,OFFENSE_DRIV LICENSE PERM RVK,OFFENSE_DRIV LICENSE REVOKED,OFFENSE_DRIVING UNDER INFLUENCE (DUI),OFFENSE_DRUG PARA - USE/POSSESS,OFFENSE_DRUNK & DISORDERLY,OFFENSE_DWI DRIVING WHILE IMPAIRED,OFFENSE_DWI LEVEL 1,OFFENSE_DWI LEVEL 1 AGGRAVATED,OFFENSE_DWI LEVEL 2,OFFENSE_DWI LEVEL 3,OFFENSE_DWI LEVEL 4,OFFENSE_DWI LEVEL 5,OFFENSE_EMBEZZLEMENT,OFFENSE_EMPLOY SEC VIOLATION,OFFENSE_ESCAPE PRISON,OFFENSE_FAIL TO REGISTER (SEX OFFENDER,OFFENSE_FAILURE TO STOP FOR ACCIDENT,OFFENSE_FELONY B&E,OFFENSE_FINANCIAL CARD FRAUD/MISD,OFFENSE_FORGERY,OFFENSE_FORGERY AND UTTERING,OFFENSE_HABITUAL FELON,OFFENSE_HABITUAL IMPAIRED DRIVING,OFFENSE_HABITUAL MISDEMEANOR ASSAULT,OFFENSE_HIT AND RUN,OFFENSE_IDENTITY FRAUD/THEFT,OFFENSE_INDECENT LIBERTY W/CHILD,OFFENSE_INVOLUNTARY MANSLAUGHTER,OFFENSE_KIDNAPPING 1ST DEGREE,OFFENSE_KIDNAPPING 2ND DEGREE,OFFENSE_LARCENY,OFFENSE_LARCENY (OVER $200),OFFENSE_LARCENY AFTER B & E,OFFENSE_LARCENY AND RECEIVING,OFFENSE_LARCENY BY SERVANT/EMPLOYEE,OFFENSE_LARCENY FROM PERSON,OFFENSE_LARCENY OF FIREARMS,OFFENSE_LARCENY OF MOTOR VEHICLE,OFFENSE_LARCENY OVER $1000,OFFENSE_MAINT ANY PLACE CONTR SUB,OFFENSE_MANSLAUGHTER,OFFENSE_MISD B&E,OFFENSE_MURDER FIRST DEGREE,OFFENSE_MURDER SECOND DEGREE,OFFENSE_NON-SUPPORT,OFFENSE_OBSTRUCTING JUSTICE,OFFENSE_OBT PROP BY FALSE PR/CHTS/SER,OFFENSE_OBTAIN CONTR SUBST BY FRAUD,OFFENSE_OPERATE VEHICLE W/O LICENSE,OFFENSE_OTHER,OFFENSE_OTHER MISDEMEANANT,OFFENSE_POSSESS SCHEDULE I,OFFENSE_POSSESS SCHEDULE II,OFFENSE_POSSESS SCHEDULE VI,OFFENSE_POSSESS WIT SELL CONTROL SUBST,OFFENSE_POSSESS WITS SCHEDULE I,OFFENSE_POSSESS WITS SCHEDULE II,OFFENSE_POSSESS WITS SCHEDULE VI,OFFENSE_POSSESSING STOLEN GOODS,OFFENSE_POSSESSION OF FIREARM BY FELON,OFFENSE_POST RELEASE REVOCATION,OFFENSE_RAPE FIRST DEGREE,OFFENSE_RAPE SECOND DEGREE,OFFENSE_RECEIVING STOLEN GOODS,OFFENSE_RECEIVING STOLEN VEHICLE,OFFENSE_RECKLESS DRIVING,OFFENSE_RESISTING OFFICER,OFFENSE_ROBBERY W/DANGEROUS WEAPON,OFFENSE_SECOND DEGREE TRESPASS,OFFENSE_SELL CONTROL SUBSTANCE,OFFENSE_SELL SCHEDULE I,OFFENSE_SELL SCHEDULE II,OFFENSE_SELL SCHEDULE VI,OFFENSE_SEXUAL OFFENSE 1ST DEGREE,OFFENSE_SEXUAL OFFENSE 2ND DEGREE,OFFENSE_SHOPLIFTING,OFFENSE_SIMPLE ASSAULT/AFFRAY,OFFENSE_SPEED ELUDE ARREST/ATTEMPT/SEC,OFFENSE_SPEEDING,OFFENSE_SPEEDING ELUDE ARREST OR/ATTEM,OFFENSE_TRAFFICKING SCHEDULE I,OFFENSE_TRAFFICKING SCHEDULE II,OFFENSE_TRESPASS,OFFENSE_UNAUTH USE MOTOR CONVEYANCE,OFFENSE_UTTERING FORGEDPAPER/INST/END,OFFENSE_VIOLATE REGULATE CONTROL SUBST,OFFENSE_VIOLATE VEHICLE REGISTRATION,OFFENSE_VIOLATION CONT SUB PENAL INST,OFFENSE_VIOLATION DRUG LAWS,OFFENSE_VIOLATION PROTECTIVE ORDER,OFFENSE_WANTON INJ PER/PROP GT $200,OFFENSE_WILL/WANT INJ REAL PROPERTY,OFFENSE_WORTHLESS CHECK,FELON,MISD.,MAX.TERM:,MIN.TERM:,MINIMUM_SENTENCE_LENGTH,MAXIMUM_SENTENCE_LENGTH,LENGTH_OF_SUPERVISION,PRIOR_RCD._POINTS/CONVICTIONS,SENTENCE_START,INFRAC_COUNT,DISCI._SEGREGATION_TIME_(DAYS),INFRAC_ACTIVE RIOTER,INFRAC_ASSAULT PERSON W/WEAPON,INFRAC_ASSAULT STAFF W/WEAPON,INFRAC_ASSAULT STAFF/THROWING LIQUIDS,INFRAC_ASSLT STAFF W/UNLIKELY INJ,INFRAC_ATTEMPT CLASS A OFFENSE,INFRAC_ATTEMPT CLASS B OFFENSE,INFRAC_ATTEMPT CLASS C OFFENSE,INFRAC_ATTEMPT CLASS D OFFENSE,INFRAC_BARTER/TRADE/LOAN MONEY,INFRAC_CREATE OFFENSIVE CONDITION,INFRAC_DAMAGE STATE/ANOTHERS PROPERTY,INFRAC_DISOBEY ORDER,INFRAC_ESCAPE,INFRAC_FAKE ILLNESS,INFRAC_FALSE ALLEGATIONS ON STAFF,INFRAC_FIGHT W/WEAPON OR REQ.OUT.MED,INFRAC_FIGHTING,INFRAC_FLOOD CELL,INFRAC_GAMBLING,INFRAC_HIGH RISK ACT,INFRAC_ILLEGAL CLOTH/LINEN/SHEETS,INFRAC_INTERFERE W/STAFF,INFRAC_INVOLVEMENT W/GANG OR SRG,INFRAC_LEAVE\QUIT COMM BASED PROGRAM,INFRAC_LOCK TAMPERING,INFRAC_MISUSE MEDICINE,INFRAC_MISUSE SUPPLIES,INFRAC_MISUSE/UNAUTH-USE PHONE/MAIL,INFRAC_NEGLIGENTLY PERFORM DUTIES,INFRAC_NO THREAT CONTRABAND,INFRAC_OFFER/ACCEPT BRIBE STAFF,INFRAC_OTHER,INFRAC_POSS AUDIO/VIDEO/IMAGE DEVICE,INFRAC_POSS MONEY/UNAUTHORIZED FUNDS,INFRAC_POSSESS EXCESS STAMPS,INFRAC_PROFANE LANGUAGE,INFRAC_PROPERTY TAMPERING,INFRAC_PROVOKE ASSAULT,INFRAC_REFUSE SUBMIT/DRUG/BREATH TEST,INFRAC_SELF INJURY,INFRAC_SELL/MISUSE MEDICATION,INFRAC_SET A FIRE,INFRAC_SEXUAL ACT,INFRAC_SUBSTANCE POSSESSION,INFRAC_THEFT OF PROPERTY,INFRAC_THREATEN TO HARM/INJURE STAFF,INFRAC_UNAUTHORIZED FUNDS,INFRAC_UNAUTHORIZED LEAVE,INFRAC_UNAUTHORIZED LOCATION,INFRAC_UNAUTHORIZED TOBACCO USE,INFRAC_UNKEMPT ROOM,INFRAC_VERBAL THREAT,INFRAC_WEAPON POSSESSION,NEW_PERIOD_OF_INCARCERATION_FL_Y,NEW_PERIOD_OF_INCARCERATION_FL_nan,P&P_COMMITMENT_STATUS_FLAG_ABSCONDED ABSC,P&P_COMMITMENT_STATUS_FLAG_CANC COURT/PC TERM CANCL,P&P_COMMITMENT_STATUS_FLAG_CLOSE ABSCOND CASE CLSABS,P&P_COMMITMENT_STATUS_FLAG_CLOSE OTH ST CASE CLOTH,P&P_COMMITMENT_STATUS_FLAG_COUNTY JAIL JAIL,P&P_COMMITMENT_STATUS_FLAG_DEATH DEATH,P&P_COMMITMENT_STATUS_FLAG_DWI DEFERRED DART,P&P_COMMITMENT_STATUS_FLAG_EARLY TERM EARLY,P&P_COMMITMENT_STATUS_FLAG_ELECT TO SERVE SERVE,P&P_COMMITMENT_STATUS_FLAG_ENT CORR INST CORR,P&P_COMMITMENT_STATUS_FLAG_ENT MED INST MEDIC,P&P_COMMITMENT_STATUS_FLAG_ENT MILITARY MILIT,P&P_COMMITMENT_STATUS_FLAG_ENTERED SCHOOL SCHOO,P&P_COMMITMENT_STATUS_FLAG_EXP ABSCONDER EXABS,P&P_COMMITMENT_STATUS_FLAG_IMPACT DEFERRED IMPAC,P&P_COMMITMENT_STATUS_FLAG_MOTION/APPR.RELIEF MAR,P&P_COMMITMENT_STATUS_FLAG_NORMAL NORM,P&P_COMMITMENT_STATUS_FLAG_OTHER TERM OTHTM,P&P_COMMITMENT_STATUS_FLAG_REVOKED REVOK,P&P_COMMITMENT_STATUS_FLAG_SUPV SUSPENDED SUSP,P&P_COMMITMENT_STATUS_FLAG_TOLLED TOLLD,P&P_COMMITMENT_STATUS_FLAG_TRANS O/S O/S,P&P_COMMITMENT_STATUS_FLAG_UNSATISFACTORY TERM UNSAT,P&P_COMMITMENT_STATUS_FLAG_UNSUPERVED UNSUP,P&P_COMMITMENT_STATUS_FLAG_VACATED VACAT,P&P_COMMITMENT_STATUS_FLAG_nan,NEW_PERIOD_OF_SUPERVISION_FLAG_Y,NEW_PERIOD_OF_SUPERVISION_FLAG_nan,INMATE_GENDER_CODE_MALE,INMATE_GENDER_CODE_nan,INMATE_RACE_CODE_BLACK,INMATE_RACE_CODE_INDIAN,INMATE_RACE_CODE_OTHER,INMATE_RACE_CODE_UNKNOWN,INMATE_RACE_CODE_WHITE,INMATE_RACE_CODE_nan
0,4,AA,1984-07-11,0.0,22,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,30000.0,0.0,0.0,1983-07-12,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0
1,6,AA,1973-03-28,0.0,21,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,60.0,90.0,0.0,0.0,1973-01-30,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,1,0
2,6,AB,1975-08-18,0.0,24,27,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,30.0,0.0,0.0,0.0,1973-04-11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0
3,8,AA,1990-05-17,0.0,26,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,10000.0,10000.0,0.0,0.0,1990-04-09,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0
4,8,AB,1994-01-26,0.0,30,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,10000.0,0.0,0.0,1993-08-30,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0


### Getting Number of Previous Commitments

Window size should be larger than the max number of commitments for any individual.

In [43]:
max_commitments = max(with_indicators.groupby('INMATE_DOC_NUMBER')['INMATE_COMMITMENT_PREFIX'].count())

In [86]:
previous_commit = with_indicators.groupby('INMATE_DOC_NUMBER')['INMATE_COMMITMENT_PREFIX']\
                                 .rolling(window = max_commitments + 1).count()

In [87]:
previous_commit_df = previous_commit.to_frame().reset_index().drop('level_1', axis=1)\
                       .rename({'INMATE_COMMITMENT_PREFIX': 'NUM_PRIOR_COMMITMENTS'}, axis=1)

In [88]:
with_indicators['PREVIOUS_COMMITMENTS'] = previous_commit_df['NUM_PRIOR_COMMITMENTS']

In [89]:
with_indicators.head()

Unnamed: 0,INMATE_DOC_NUMBER,INMATE_COMMITMENT_PREFIX,SENTENCE_END,PAROLE_DAYS,AGE_AT_RELEASE,NUMBER_OF_COUNTS,COUNTY_ALAMANCE,COUNTY_BEAUFORT,COUNTY_BRUNSWICK,COUNTY_BUNCOMBE,COUNTY_BURKE,COUNTY_CABARRUS,COUNTY_CALDWELL,COUNTY_CARTERET,COUNTY_CATAWBA,COUNTY_CLEVELAND,COUNTY_COLUMBUS,COUNTY_CRAVEN,COUNTY_CUMBERLAND,COUNTY_DAVIDSON,COUNTY_DUPLIN,COUNTY_DURHAM,COUNTY_EDGECOMBE,COUNTY_FORSYTH,COUNTY_FRANKLIN,COUNTY_GASTON,COUNTY_GRANVILLE,COUNTY_GUILFORD,COUNTY_HALIFAX,COUNTY_HARNETT,COUNTY_HAYWOOD,COUNTY_HENDERSON,COUNTY_HOKE,COUNTY_IREDELL,COUNTY_JOHNSTON,COUNTY_LEE,COUNTY_LENOIR,COUNTY_LINCOLN,COUNTY_MCDOWELL,COUNTY_MECKLENBURG,COUNTY_MOORE,COUNTY_NASH,COUNTY_NEW HANOVER,COUNTY_ONSLOW,COUNTY_ORANGE,COUNTY_OTHER,COUNTY_PERSON,COUNTY_PITT,COUNTY_RANDOLPH,COUNTY_RICHMOND,COUNTY_ROBESON,COUNTY_ROCKINGHAM,COUNTY_ROWAN,COUNTY_RUTHERFORD,COUNTY_SAMPSON,COUNTY_SCOTLAND,COUNTY_STANLY,COUNTY_SURRY,COUNTY_UNION,COUNTY_VANCE,COUNTY_WAKE,COUNTY_WAYNE,COUNTY_WILKES,COUNTY_WILSON,OFFENSE_3M REIMPRISON VIOL,OFFENSE_ABANDONMENT,OFFENSE_ARMED ROBBERY,OFFENSE_ASSAULT,OFFENSE_ASSAULT BY STRANGULATION,OFFENSE_ASSAULT INFLICT SERI BODY INJ,OFFENSE_ASSAULT ISI,OFFENSE_ASSAULT ON FEMALE,OFFENSE_ASSAULT ON OFFICER/ST EMPLOYEE,OFFENSE_ASSAULT ON POLICEMAN,OFFENSE_ASSAULT ON PUBLIC OFFICIAL,OFFENSE_AWDW,OFFENSE_AWDW GOV OFFICERS/EMPLOYEES,OFFENSE_AWDWISI,OFFENSE_AWDWWITK,OFFENSE_AWDWWITKISI,OFFENSE_B & E & L,OFFENSE_B & E VEHICLES,OFFENSE_BURGLARY 1ST DEGREE,OFFENSE_BURGLARY 2ND DEGREE,OFFENSE_CARRY CONCEALED WEAPON,OFFENSE_CHEAT - PROPERTY/SERVICES,OFFENSE_CHILD ABUSE,OFFENSE_COMMON LAW FORGERY,OFFENSE_COMMON LAW ROBBERY,OFFENSE_COMMUNICATING THREATS,OFFENSE_CREDIT CARD THEFT,OFFENSE_CRIME AGAINST NATURE,OFFENSE_DAMAGE TO PROPERTY,OFFENSE_DEL/SELL SCHEDULE II,OFFENSE_DISCHG FIREARM-OCC PROPERTY,OFFENSE_DISORDERLY CONDUCT,OFFENSE_DRIV LICENSE PERM RVK,OFFENSE_DRIV LICENSE REVOKED,OFFENSE_DRIVING UNDER INFLUENCE (DUI),OFFENSE_DRUG PARA - USE/POSSESS,OFFENSE_DRUNK & DISORDERLY,OFFENSE_DWI DRIVING WHILE IMPAIRED,OFFENSE_DWI LEVEL 1,OFFENSE_DWI LEVEL 1 AGGRAVATED,OFFENSE_DWI LEVEL 2,OFFENSE_DWI LEVEL 3,OFFENSE_DWI LEVEL 4,OFFENSE_DWI LEVEL 5,OFFENSE_EMBEZZLEMENT,OFFENSE_EMPLOY SEC VIOLATION,OFFENSE_ESCAPE PRISON,OFFENSE_FAIL TO REGISTER (SEX OFFENDER,OFFENSE_FAILURE TO STOP FOR ACCIDENT,OFFENSE_FELONY B&E,OFFENSE_FINANCIAL CARD FRAUD/MISD,OFFENSE_FORGERY,OFFENSE_FORGERY AND UTTERING,OFFENSE_HABITUAL FELON,OFFENSE_HABITUAL IMPAIRED DRIVING,OFFENSE_HABITUAL MISDEMEANOR ASSAULT,OFFENSE_HIT AND RUN,OFFENSE_IDENTITY FRAUD/THEFT,OFFENSE_INDECENT LIBERTY W/CHILD,OFFENSE_INVOLUNTARY MANSLAUGHTER,OFFENSE_KIDNAPPING 1ST DEGREE,OFFENSE_KIDNAPPING 2ND DEGREE,OFFENSE_LARCENY,OFFENSE_LARCENY (OVER $200),OFFENSE_LARCENY AFTER B & E,OFFENSE_LARCENY AND RECEIVING,OFFENSE_LARCENY BY SERVANT/EMPLOYEE,OFFENSE_LARCENY FROM PERSON,OFFENSE_LARCENY OF FIREARMS,OFFENSE_LARCENY OF MOTOR VEHICLE,OFFENSE_LARCENY OVER $1000,OFFENSE_MAINT ANY PLACE CONTR SUB,OFFENSE_MANSLAUGHTER,OFFENSE_MISD B&E,OFFENSE_MURDER FIRST DEGREE,OFFENSE_MURDER SECOND DEGREE,OFFENSE_NON-SUPPORT,OFFENSE_OBSTRUCTING JUSTICE,OFFENSE_OBT PROP BY FALSE PR/CHTS/SER,OFFENSE_OBTAIN CONTR SUBST BY FRAUD,OFFENSE_OPERATE VEHICLE W/O LICENSE,OFFENSE_OTHER,OFFENSE_OTHER MISDEMEANANT,OFFENSE_POSSESS SCHEDULE I,OFFENSE_POSSESS SCHEDULE II,OFFENSE_POSSESS SCHEDULE VI,OFFENSE_POSSESS WIT SELL CONTROL SUBST,OFFENSE_POSSESS WITS SCHEDULE I,OFFENSE_POSSESS WITS SCHEDULE II,OFFENSE_POSSESS WITS SCHEDULE VI,OFFENSE_POSSESSING STOLEN GOODS,OFFENSE_POSSESSION OF FIREARM BY FELON,OFFENSE_POST RELEASE REVOCATION,OFFENSE_RAPE FIRST DEGREE,OFFENSE_RAPE SECOND DEGREE,OFFENSE_RECEIVING STOLEN GOODS,OFFENSE_RECEIVING STOLEN VEHICLE,OFFENSE_RECKLESS DRIVING,OFFENSE_RESISTING OFFICER,OFFENSE_ROBBERY W/DANGEROUS WEAPON,OFFENSE_SECOND DEGREE TRESPASS,OFFENSE_SELL CONTROL SUBSTANCE,OFFENSE_SELL SCHEDULE I,OFFENSE_SELL SCHEDULE II,OFFENSE_SELL SCHEDULE VI,OFFENSE_SEXUAL OFFENSE 1ST DEGREE,OFFENSE_SEXUAL OFFENSE 2ND DEGREE,OFFENSE_SHOPLIFTING,OFFENSE_SIMPLE ASSAULT/AFFRAY,OFFENSE_SPEED ELUDE ARREST/ATTEMPT/SEC,OFFENSE_SPEEDING,OFFENSE_SPEEDING ELUDE ARREST OR/ATTEM,OFFENSE_TRAFFICKING SCHEDULE I,OFFENSE_TRAFFICKING SCHEDULE II,OFFENSE_TRESPASS,OFFENSE_UNAUTH USE MOTOR CONVEYANCE,OFFENSE_UTTERING FORGEDPAPER/INST/END,OFFENSE_VIOLATE REGULATE CONTROL SUBST,OFFENSE_VIOLATE VEHICLE REGISTRATION,OFFENSE_VIOLATION CONT SUB PENAL INST,OFFENSE_VIOLATION DRUG LAWS,OFFENSE_VIOLATION PROTECTIVE ORDER,OFFENSE_WANTON INJ PER/PROP GT $200,OFFENSE_WILL/WANT INJ REAL PROPERTY,OFFENSE_WORTHLESS CHECK,FELON,MISD.,MAX.TERM:,MIN.TERM:,MINIMUM_SENTENCE_LENGTH,MAXIMUM_SENTENCE_LENGTH,LENGTH_OF_SUPERVISION,PRIOR_RCD._POINTS/CONVICTIONS,SENTENCE_START,INFRAC_COUNT,DISCI._SEGREGATION_TIME_(DAYS),INFRAC_ACTIVE RIOTER,INFRAC_ASSAULT PERSON W/WEAPON,INFRAC_ASSAULT STAFF W/WEAPON,INFRAC_ASSAULT STAFF/THROWING LIQUIDS,INFRAC_ASSLT STAFF W/UNLIKELY INJ,INFRAC_ATTEMPT CLASS A OFFENSE,INFRAC_ATTEMPT CLASS B OFFENSE,INFRAC_ATTEMPT CLASS C OFFENSE,INFRAC_ATTEMPT CLASS D OFFENSE,INFRAC_BARTER/TRADE/LOAN MONEY,INFRAC_CREATE OFFENSIVE CONDITION,INFRAC_DAMAGE STATE/ANOTHERS PROPERTY,INFRAC_DISOBEY ORDER,INFRAC_ESCAPE,INFRAC_FAKE ILLNESS,INFRAC_FALSE ALLEGATIONS ON STAFF,INFRAC_FIGHT W/WEAPON OR REQ.OUT.MED,INFRAC_FIGHTING,INFRAC_FLOOD CELL,INFRAC_GAMBLING,INFRAC_HIGH RISK ACT,INFRAC_ILLEGAL CLOTH/LINEN/SHEETS,INFRAC_INTERFERE W/STAFF,INFRAC_INVOLVEMENT W/GANG OR SRG,INFRAC_LEAVE\QUIT COMM BASED PROGRAM,INFRAC_LOCK TAMPERING,INFRAC_MISUSE MEDICINE,INFRAC_MISUSE SUPPLIES,INFRAC_MISUSE/UNAUTH-USE PHONE/MAIL,INFRAC_NEGLIGENTLY PERFORM DUTIES,INFRAC_NO THREAT CONTRABAND,INFRAC_OFFER/ACCEPT BRIBE STAFF,INFRAC_OTHER,INFRAC_POSS AUDIO/VIDEO/IMAGE DEVICE,INFRAC_POSS MONEY/UNAUTHORIZED FUNDS,INFRAC_POSSESS EXCESS STAMPS,INFRAC_PROFANE LANGUAGE,INFRAC_PROPERTY TAMPERING,INFRAC_PROVOKE ASSAULT,INFRAC_REFUSE SUBMIT/DRUG/BREATH TEST,INFRAC_SELF INJURY,INFRAC_SELL/MISUSE MEDICATION,INFRAC_SET A FIRE,INFRAC_SEXUAL ACT,INFRAC_SUBSTANCE POSSESSION,INFRAC_THEFT OF PROPERTY,INFRAC_THREATEN TO HARM/INJURE STAFF,INFRAC_UNAUTHORIZED FUNDS,INFRAC_UNAUTHORIZED LEAVE,INFRAC_UNAUTHORIZED LOCATION,INFRAC_UNAUTHORIZED TOBACCO USE,INFRAC_UNKEMPT ROOM,INFRAC_VERBAL THREAT,INFRAC_WEAPON POSSESSION,NEW_PERIOD_OF_INCARCERATION_FL_Y,NEW_PERIOD_OF_INCARCERATION_FL_nan,P&P_COMMITMENT_STATUS_FLAG_ABSCONDED ABSC,P&P_COMMITMENT_STATUS_FLAG_CANC COURT/PC TERM CANCL,P&P_COMMITMENT_STATUS_FLAG_CLOSE ABSCOND CASE CLSABS,P&P_COMMITMENT_STATUS_FLAG_CLOSE OTH ST CASE CLOTH,P&P_COMMITMENT_STATUS_FLAG_COUNTY JAIL JAIL,P&P_COMMITMENT_STATUS_FLAG_DEATH DEATH,P&P_COMMITMENT_STATUS_FLAG_DWI DEFERRED DART,P&P_COMMITMENT_STATUS_FLAG_EARLY TERM EARLY,P&P_COMMITMENT_STATUS_FLAG_ELECT TO SERVE SERVE,P&P_COMMITMENT_STATUS_FLAG_ENT CORR INST CORR,P&P_COMMITMENT_STATUS_FLAG_ENT MED INST MEDIC,P&P_COMMITMENT_STATUS_FLAG_ENT MILITARY MILIT,P&P_COMMITMENT_STATUS_FLAG_ENTERED SCHOOL SCHOO,P&P_COMMITMENT_STATUS_FLAG_EXP ABSCONDER EXABS,P&P_COMMITMENT_STATUS_FLAG_IMPACT DEFERRED IMPAC,P&P_COMMITMENT_STATUS_FLAG_MOTION/APPR.RELIEF MAR,P&P_COMMITMENT_STATUS_FLAG_NORMAL NORM,P&P_COMMITMENT_STATUS_FLAG_OTHER TERM OTHTM,P&P_COMMITMENT_STATUS_FLAG_REVOKED REVOK,P&P_COMMITMENT_STATUS_FLAG_SUPV SUSPENDED SUSP,P&P_COMMITMENT_STATUS_FLAG_TOLLED TOLLD,P&P_COMMITMENT_STATUS_FLAG_TRANS O/S O/S,P&P_COMMITMENT_STATUS_FLAG_UNSATISFACTORY TERM UNSAT,P&P_COMMITMENT_STATUS_FLAG_UNSUPERVED UNSUP,P&P_COMMITMENT_STATUS_FLAG_VACATED VACAT,P&P_COMMITMENT_STATUS_FLAG_nan,NEW_PERIOD_OF_SUPERVISION_FLAG_Y,NEW_PERIOD_OF_SUPERVISION_FLAG_nan,INMATE_GENDER_CODE_MALE,INMATE_GENDER_CODE_nan,INMATE_RACE_CODE_BLACK,INMATE_RACE_CODE_INDIAN,INMATE_RACE_CODE_OTHER,INMATE_RACE_CODE_UNKNOWN,INMATE_RACE_CODE_WHITE,INMATE_RACE_CODE_nan,PREVIOUS_COMMITMENTS
0,4,AA,1984-07-11,0.0,22,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,30000.0,0.0,0.0,1983-07-12,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1.0
1,6,AA,1973-03-28,0.0,21,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,60.0,90.0,0.0,0.0,1973-01-30,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,1,0,1.0
2,6,AB,1975-08-18,0.0,24,27,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,30.0,0.0,0.0,0.0,1973-04-11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,2.0
3,8,AA,1990-05-17,0.0,26,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,10000.0,10000.0,0.0,0.0,1990-04-09,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1.0
4,8,AB,1994-01-26,0.0,30,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,10000.0,0.0,0.0,1993-08-30,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,2.0


In [90]:
with_indicators.to_pickle('with_indicators.pkl')

In [None]:
# with_indicators = pd.read_pickle('with_indicators.pkl')

## Set Recidivated Flag

Finally, I will make the target variable, recidivated. Here I will define recidivism as another commitment beginning between 3 months and 3 years after the end of the previous. This 3 month gap is to attempt to filter out some commitments which appear to be served consecutively with small recorded gaps in between.

The commitments are filtered by ID and then by commitment, so we can just loop over the rows and compare with 1 row back.

In [91]:
just_dates = with_indicators[['INMATE_DOC_NUMBER', 'INMATE_COMMITMENT_PREFIX', 'SENTENCE_START', 'SENTENCE_END']]

In [96]:
recidivated = [0] * just_dates.shape[0]

prior_end_date = pd.to_datetime("1800-1-1")
prior_id = 0

for row in just_dates.itertuples():
    index = row[0]
    ID = row[1]
    commit = row[2]
    start_date = row[3]
    end_date = row[4]

    
    if ID == prior_id:
        if (
            ( (start_date - prior_end_date) / np.timedelta64(1, 'D') <= 1095) & 
            ( (start_date - prior_end_date) / np.timedelta64(1, 'D') >= 90)
           ): 
            # if this start date is within 3 months and 3 years, the prior term gets the positive recitivated flag
            recidivated[index - 1] = 1
        
    prior_end_date = end_date
    prior_id = ID

In [98]:
final_data = with_indicators

In [99]:
final_data['RECIDIVATED'] = recidivated

In [7]:
final_data.head()

Unnamed: 0,INMATE_DOC_NUMBER,INMATE_COMMITMENT_PREFIX,SENTENCE_END,PAROLE_DAYS,AGE_AT_RELEASE,NUMBER_OF_COUNTS,COUNTY_ALAMANCE,COUNTY_BEAUFORT,COUNTY_BRUNSWICK,COUNTY_BUNCOMBE,COUNTY_BURKE,COUNTY_CABARRUS,COUNTY_CALDWELL,COUNTY_CARTERET,COUNTY_CATAWBA,COUNTY_CLEVELAND,COUNTY_COLUMBUS,COUNTY_CRAVEN,COUNTY_CUMBERLAND,COUNTY_DAVIDSON,COUNTY_DUPLIN,COUNTY_DURHAM,COUNTY_EDGECOMBE,COUNTY_FORSYTH,COUNTY_FRANKLIN,COUNTY_GASTON,COUNTY_GRANVILLE,COUNTY_GUILFORD,COUNTY_HALIFAX,COUNTY_HARNETT,COUNTY_HAYWOOD,COUNTY_HENDERSON,COUNTY_HOKE,COUNTY_IREDELL,COUNTY_JOHNSTON,COUNTY_LEE,COUNTY_LENOIR,COUNTY_LINCOLN,COUNTY_MCDOWELL,COUNTY_MECKLENBURG,COUNTY_MOORE,COUNTY_NASH,COUNTY_NEW HANOVER,COUNTY_ONSLOW,COUNTY_ORANGE,COUNTY_OTHER,COUNTY_PERSON,COUNTY_PITT,COUNTY_RANDOLPH,COUNTY_RICHMOND,COUNTY_ROBESON,COUNTY_ROCKINGHAM,COUNTY_ROWAN,COUNTY_RUTHERFORD,COUNTY_SAMPSON,COUNTY_SCOTLAND,COUNTY_STANLY,COUNTY_SURRY,COUNTY_UNION,COUNTY_VANCE,COUNTY_WAKE,COUNTY_WAYNE,COUNTY_WILKES,COUNTY_WILSON,OFFENSE_3M REIMPRISON VIOL,OFFENSE_ABANDONMENT,OFFENSE_ARMED ROBBERY,OFFENSE_ASSAULT,OFFENSE_ASSAULT BY STRANGULATION,OFFENSE_ASSAULT INFLICT SERI BODY INJ,OFFENSE_ASSAULT ISI,OFFENSE_ASSAULT ON FEMALE,OFFENSE_ASSAULT ON OFFICER/ST EMPLOYEE,OFFENSE_ASSAULT ON POLICEMAN,OFFENSE_ASSAULT ON PUBLIC OFFICIAL,OFFENSE_AWDW,OFFENSE_AWDW GOV OFFICERS/EMPLOYEES,OFFENSE_AWDWISI,OFFENSE_AWDWWITK,OFFENSE_AWDWWITKISI,OFFENSE_B & E & L,OFFENSE_B & E VEHICLES,OFFENSE_BURGLARY 1ST DEGREE,OFFENSE_BURGLARY 2ND DEGREE,OFFENSE_CARRY CONCEALED WEAPON,OFFENSE_CHEAT - PROPERTY/SERVICES,OFFENSE_CHILD ABUSE,OFFENSE_COMMON LAW FORGERY,OFFENSE_COMMON LAW ROBBERY,OFFENSE_COMMUNICATING THREATS,OFFENSE_CREDIT CARD THEFT,OFFENSE_CRIME AGAINST NATURE,OFFENSE_DAMAGE TO PROPERTY,OFFENSE_DEL/SELL SCHEDULE II,OFFENSE_DISCHG FIREARM-OCC PROPERTY,OFFENSE_DISORDERLY CONDUCT,OFFENSE_DRIV LICENSE PERM RVK,OFFENSE_DRIV LICENSE REVOKED,OFFENSE_DRIVING UNDER INFLUENCE (DUI),OFFENSE_DRUG PARA - USE/POSSESS,OFFENSE_DRUNK & DISORDERLY,OFFENSE_DWI DRIVING WHILE IMPAIRED,OFFENSE_DWI LEVEL 1,OFFENSE_DWI LEVEL 1 AGGRAVATED,OFFENSE_DWI LEVEL 2,OFFENSE_DWI LEVEL 3,OFFENSE_DWI LEVEL 4,OFFENSE_DWI LEVEL 5,OFFENSE_EMBEZZLEMENT,OFFENSE_EMPLOY SEC VIOLATION,OFFENSE_ESCAPE PRISON,OFFENSE_FAIL TO REGISTER (SEX OFFENDER,OFFENSE_FAILURE TO STOP FOR ACCIDENT,OFFENSE_FELONY B&E,OFFENSE_FINANCIAL CARD FRAUD/MISD,OFFENSE_FORGERY,OFFENSE_FORGERY AND UTTERING,OFFENSE_HABITUAL FELON,OFFENSE_HABITUAL IMPAIRED DRIVING,OFFENSE_HABITUAL MISDEMEANOR ASSAULT,OFFENSE_HIT AND RUN,OFFENSE_IDENTITY FRAUD/THEFT,OFFENSE_INDECENT LIBERTY W/CHILD,OFFENSE_INVOLUNTARY MANSLAUGHTER,OFFENSE_KIDNAPPING 1ST DEGREE,OFFENSE_KIDNAPPING 2ND DEGREE,OFFENSE_LARCENY,OFFENSE_LARCENY (OVER $200),OFFENSE_LARCENY AFTER B & E,OFFENSE_LARCENY AND RECEIVING,OFFENSE_LARCENY BY SERVANT/EMPLOYEE,OFFENSE_LARCENY FROM PERSON,OFFENSE_LARCENY OF FIREARMS,OFFENSE_LARCENY OF MOTOR VEHICLE,OFFENSE_LARCENY OVER $1000,OFFENSE_MAINT ANY PLACE CONTR SUB,OFFENSE_MANSLAUGHTER,OFFENSE_MISD B&E,OFFENSE_MURDER FIRST DEGREE,OFFENSE_MURDER SECOND DEGREE,OFFENSE_NON-SUPPORT,OFFENSE_OBSTRUCTING JUSTICE,OFFENSE_OBT PROP BY FALSE PR/CHTS/SER,OFFENSE_OBTAIN CONTR SUBST BY FRAUD,OFFENSE_OPERATE VEHICLE W/O LICENSE,OFFENSE_OTHER,OFFENSE_OTHER MISDEMEANANT,OFFENSE_POSSESS SCHEDULE I,OFFENSE_POSSESS SCHEDULE II,OFFENSE_POSSESS SCHEDULE VI,OFFENSE_POSSESS WIT SELL CONTROL SUBST,OFFENSE_POSSESS WITS SCHEDULE I,OFFENSE_POSSESS WITS SCHEDULE II,OFFENSE_POSSESS WITS SCHEDULE VI,OFFENSE_POSSESSING STOLEN GOODS,OFFENSE_POSSESSION OF FIREARM BY FELON,OFFENSE_POST RELEASE REVOCATION,OFFENSE_RAPE FIRST DEGREE,OFFENSE_RAPE SECOND DEGREE,OFFENSE_RECEIVING STOLEN GOODS,OFFENSE_RECEIVING STOLEN VEHICLE,OFFENSE_RECKLESS DRIVING,OFFENSE_RESISTING OFFICER,OFFENSE_ROBBERY W/DANGEROUS WEAPON,OFFENSE_SECOND DEGREE TRESPASS,OFFENSE_SELL CONTROL SUBSTANCE,OFFENSE_SELL SCHEDULE I,OFFENSE_SELL SCHEDULE II,OFFENSE_SELL SCHEDULE VI,OFFENSE_SEXUAL OFFENSE 1ST DEGREE,OFFENSE_SEXUAL OFFENSE 2ND DEGREE,OFFENSE_SHOPLIFTING,OFFENSE_SIMPLE ASSAULT/AFFRAY,OFFENSE_SPEED ELUDE ARREST/ATTEMPT/SEC,OFFENSE_SPEEDING,OFFENSE_SPEEDING ELUDE ARREST OR/ATTEM,OFFENSE_TRAFFICKING SCHEDULE I,OFFENSE_TRAFFICKING SCHEDULE II,OFFENSE_TRESPASS,OFFENSE_UNAUTH USE MOTOR CONVEYANCE,OFFENSE_UTTERING FORGEDPAPER/INST/END,OFFENSE_VIOLATE REGULATE CONTROL SUBST,OFFENSE_VIOLATE VEHICLE REGISTRATION,OFFENSE_VIOLATION CONT SUB PENAL INST,OFFENSE_VIOLATION DRUG LAWS,OFFENSE_VIOLATION PROTECTIVE ORDER,OFFENSE_WANTON INJ PER/PROP GT $200,OFFENSE_WILL/WANT INJ REAL PROPERTY,OFFENSE_WORTHLESS CHECK,FELON,MISD.,MAX.TERM:,MIN.TERM:,MINIMUM_SENTENCE_LENGTH,MAXIMUM_SENTENCE_LENGTH,LENGTH_OF_SUPERVISION,PRIOR_RCD._POINTS/CONVICTIONS,SENTENCE_START,INFRAC_COUNT,DISCI._SEGREGATION_TIME_(DAYS),INFRAC_ACTIVE RIOTER,INFRAC_ASSAULT PERSON W/WEAPON,INFRAC_ASSAULT STAFF W/WEAPON,INFRAC_ASSAULT STAFF/THROWING LIQUIDS,INFRAC_ASSLT STAFF W/UNLIKELY INJ,INFRAC_ATTEMPT CLASS A OFFENSE,INFRAC_ATTEMPT CLASS B OFFENSE,INFRAC_ATTEMPT CLASS C OFFENSE,INFRAC_ATTEMPT CLASS D OFFENSE,INFRAC_BARTER/TRADE/LOAN MONEY,INFRAC_CREATE OFFENSIVE CONDITION,INFRAC_DAMAGE STATE/ANOTHERS PROPERTY,INFRAC_DISOBEY ORDER,INFRAC_ESCAPE,INFRAC_FAKE ILLNESS,INFRAC_FALSE ALLEGATIONS ON STAFF,INFRAC_FIGHT W/WEAPON OR REQ.OUT.MED,INFRAC_FIGHTING,INFRAC_FLOOD CELL,INFRAC_GAMBLING,INFRAC_HIGH RISK ACT,INFRAC_ILLEGAL CLOTH/LINEN/SHEETS,INFRAC_INTERFERE W/STAFF,INFRAC_INVOLVEMENT W/GANG OR SRG,INFRAC_LEAVE\QUIT COMM BASED PROGRAM,INFRAC_LOCK TAMPERING,INFRAC_MISUSE MEDICINE,INFRAC_MISUSE SUPPLIES,INFRAC_MISUSE/UNAUTH-USE PHONE/MAIL,INFRAC_NEGLIGENTLY PERFORM DUTIES,INFRAC_NO THREAT CONTRABAND,INFRAC_OFFER/ACCEPT BRIBE STAFF,INFRAC_OTHER,INFRAC_POSS AUDIO/VIDEO/IMAGE DEVICE,INFRAC_POSS MONEY/UNAUTHORIZED FUNDS,INFRAC_POSSESS EXCESS STAMPS,INFRAC_PROFANE LANGUAGE,INFRAC_PROPERTY TAMPERING,INFRAC_PROVOKE ASSAULT,INFRAC_REFUSE SUBMIT/DRUG/BREATH TEST,INFRAC_SELF INJURY,INFRAC_SELL/MISUSE MEDICATION,INFRAC_SET A FIRE,INFRAC_SEXUAL ACT,INFRAC_SUBSTANCE POSSESSION,INFRAC_THEFT OF PROPERTY,INFRAC_THREATEN TO HARM/INJURE STAFF,INFRAC_UNAUTHORIZED FUNDS,INFRAC_UNAUTHORIZED LEAVE,INFRAC_UNAUTHORIZED LOCATION,INFRAC_UNAUTHORIZED TOBACCO USE,INFRAC_UNKEMPT ROOM,INFRAC_VERBAL THREAT,INFRAC_WEAPON POSSESSION,NEW_PERIOD_OF_INCARCERATION_FL_Y,NEW_PERIOD_OF_INCARCERATION_FL_nan,P&P_COMMITMENT_STATUS_FLAG_ABSCONDED ABSC,P&P_COMMITMENT_STATUS_FLAG_CANC COURT/PC TERM CANCL,P&P_COMMITMENT_STATUS_FLAG_CLOSE ABSCOND CASE CLSABS,P&P_COMMITMENT_STATUS_FLAG_CLOSE OTH ST CASE CLOTH,P&P_COMMITMENT_STATUS_FLAG_COUNTY JAIL JAIL,P&P_COMMITMENT_STATUS_FLAG_DEATH DEATH,P&P_COMMITMENT_STATUS_FLAG_DWI DEFERRED DART,P&P_COMMITMENT_STATUS_FLAG_EARLY TERM EARLY,P&P_COMMITMENT_STATUS_FLAG_ELECT TO SERVE SERVE,P&P_COMMITMENT_STATUS_FLAG_ENT CORR INST CORR,P&P_COMMITMENT_STATUS_FLAG_ENT MED INST MEDIC,P&P_COMMITMENT_STATUS_FLAG_ENT MILITARY MILIT,P&P_COMMITMENT_STATUS_FLAG_ENTERED SCHOOL SCHOO,P&P_COMMITMENT_STATUS_FLAG_EXP ABSCONDER EXABS,P&P_COMMITMENT_STATUS_FLAG_IMPACT DEFERRED IMPAC,P&P_COMMITMENT_STATUS_FLAG_MOTION/APPR.RELIEF MAR,P&P_COMMITMENT_STATUS_FLAG_NORMAL NORM,P&P_COMMITMENT_STATUS_FLAG_OTHER TERM OTHTM,P&P_COMMITMENT_STATUS_FLAG_REVOKED REVOK,P&P_COMMITMENT_STATUS_FLAG_SUPV SUSPENDED SUSP,P&P_COMMITMENT_STATUS_FLAG_TOLLED TOLLD,P&P_COMMITMENT_STATUS_FLAG_TRANS O/S O/S,P&P_COMMITMENT_STATUS_FLAG_UNSATISFACTORY TERM UNSAT,P&P_COMMITMENT_STATUS_FLAG_UNSUPERVED UNSUP,P&P_COMMITMENT_STATUS_FLAG_VACATED VACAT,P&P_COMMITMENT_STATUS_FLAG_nan,NEW_PERIOD_OF_SUPERVISION_FLAG_Y,NEW_PERIOD_OF_SUPERVISION_FLAG_nan,INMATE_GENDER_CODE_MALE,INMATE_GENDER_CODE_nan,INMATE_RACE_CODE_BLACK,INMATE_RACE_CODE_INDIAN,INMATE_RACE_CODE_OTHER,INMATE_RACE_CODE_UNKNOWN,INMATE_RACE_CODE_WHITE,INMATE_RACE_CODE_nan,PREVIOUS_COMMITMENTS,RECIDIVATED
0,4,AA,1984-07-11,0.0,22,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,30000.0,0.0,0.0,1983-07-12,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1.0,0
1,6,AA,1973-03-28,0.0,21,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,60.0,90.0,0.0,0.0,1973-01-30,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,1,0,1.0,0
2,6,AB,1975-08-18,0.0,24,27,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,30.0,0.0,0.0,0.0,1973-04-11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,2.0,0
3,8,AA,1990-05-17,0.0,26,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,10000.0,10000.0,0.0,0.0,1990-04-09,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,1.0,0
4,8,AB,1994-01-26,0.0,30,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,10000.0,0.0,0.0,1993-08-30,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,2.0,1


In [104]:
final_data.to_pickle('final_recid_data.pkl')

In [6]:
# final_data = pd.read_pickle('final_recid_data.pkl')