In [5]:
import os
import pandas as pd
os.chdir('/Users/amawest/Desktop/')
circuit = pd.read_csv("circuit_criminal_2019.csv")
# circuit = pd.read_csv("../Data/circuit_criminal_2019.csv")

In [6]:
circuit[['Charge','CodeSection']].head(5)

Unnamed: 0,Charge,CodeSection
0,VIOL PROBATION ON FEL OFF,19.2-306
1,GRAND LARCENY: AUTO THEFT,18.2-95
2,ENTER HOUSE TO COMMIT A&B ETC.,18.2-91
3,DRUGS: POSSESS SCH I OR II,18.2-250
4,SHOW CAUSE MOTION,18.2-456


In [7]:
len(circuit.CodeSection.unique())

787

In [8]:
x = circuit.groupby(['Charge','CodeSection']).size().reset_index(name="Count").sort_values(by=['Count'], ascending=False)
x


Unnamed: 0,Charge,CodeSection,Count
12751,VIOL PROBATION ON FEL OFF,19.2-306,7064
2143,BOND APPEAL,19.2-124,1531
9704,PROBATION VIOLATION,19.2-306,1439
9083,POSS.OF CONTROLLED SUBSTANCE,18.2-250,1421
6740,GRAND LARCENY,18.2-95,1004
...,...,...,...
4942,DRIVING REVOKED - 7TH OFF,B.46.2-301,1
4943,DRIVING REVOKED / 2ND OFFENSE,B.46.2-301,1
4944,DRIVING REVOKED / 4TH,B.46.2-301,1
4945,DRIVING REVOKED 4TH,B.46.2-301,1


In [9]:
# This simply creates a copy of the charge and charge code, 
# drops duplicates and keeps the first instance of the charge description.
# because there are SO many different charge codes, we are just using the 
# first instance, but it would be more accurate to go and manually label all of the 
# charge codes to be the most accurate.
x = x.drop_duplicates(subset='CodeSection', keep="first")
x = x.rename(columns={"Charge": "CodeSection_Description"})
x

Unnamed: 0,CodeSection_Description,CodeSection,Count
12751,VIOL PROBATION ON FEL OFF,19.2-306,7064
2143,BOND APPEAL,19.2-124,1531
9083,POSS.OF CONTROLLED SUBSTANCE,18.2-250,1421
6740,GRAND LARCENY,18.2-95,1004
7928,OBTAIN MONEY FALSE PRET >=$200,18.2-178,717
...,...,...,...
4804,DRIVE VIOL CURFEW,46.2-334.01,1
4807,DRIVE W/COMM LIC DISQUALIF,46.2-341.21,1
5093,DRUNK IN PUBLIC,28-147,1
4906,DRIVING IN SAFETY WORK ZONE,46.2-814,1


In [10]:
# Now we merge the data back together, with "CodeSection_Description"
# being 1:1 with each charge code (versus there being many "charge" instances for each charge code)
result = pd.merge(x, circuit, on="CodeSection")
result

Unnamed: 0,CodeSection_Description,CodeSection,Count,HearingDate,HearingResult,HearingJury,HearingPlea,HearingType,HearingRoom,fips,...,DrivingRestrictions,RestrictionEffectiveDate,RestrictionEndDate,VAAlcoholSafetyAction,RestitutionPaid,RestitutionAmount,Military,TrafficFatality,AppealedDate,person_id
0,VIOL PROBATION ON FEL OFF,19.2-306,7064,2019-12-17,Revoked - Sentence/Probation,,,Revocation,,91,...,,,,,,0.0,,,,9.318000e+13
1,VIOL PROBATION ON FEL OFF,19.2-306,7064,2019-12-12,Revoked - Sentence/Probation,,Guilty,Capias,,99,...,,,,,,0.0,,,,2.703000e+13
2,VIOL PROBATION ON FEL OFF,19.2-306,7064,2019-12-12,Revoked - Sentence/Probation,,Guilty,Capias,,99,...,,,,,,,,,,2.703000e+13
3,VIOL PROBATION ON FEL OFF,19.2-306,7064,2019-12-12,Revoked - Sentence/Probation,,Guilty,Capias,,99,...,,,,,,,,,,2.703000e+13
4,VIOL PROBATION ON FEL OFF,19.2-306,7064,2019-12-12,Revoked - Sentence/Probation,,Guilty,Bond,,99,...,,,,,,0.0,,,,2.750100e+14
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
78027,DRIVE VIOL CURFEW,46.2-334.01,1,2019-11-26,Nolle Prosequi,,,Plea,2C,107,...,,,,,,,,f,,2.701700e+14
78028,DRIVE W/COMM LIC DISQUALIF,46.2-341.21,1,2019-01-03,Sentenced,,Guilty,Plea,,179,...,,,,,,,,f,,1.031200e+14
78029,DRUNK IN PUBLIC,28-147,1,2019-10-25,Dismissed,,,Defendant Adjudication,10A,13,...,,,,,,,,,,2.301810e+14
78030,DRIVING IN SAFETY WORK ZONE,46.2-814,1,2019-09-11,Sentenced,,,Trial,10C,13,...,,,,,,0.0,,f,,6.619000e+13


In [11]:
# How we'll label everything
result['code_and_desc'] = result['CodeSection'] + ', ' + result['CodeSection_Description']

In [12]:
result.head(2)

Unnamed: 0,CodeSection_Description,CodeSection,Count,HearingDate,HearingResult,HearingJury,HearingPlea,HearingType,HearingRoom,fips,...,RestrictionEffectiveDate,RestrictionEndDate,VAAlcoholSafetyAction,RestitutionPaid,RestitutionAmount,Military,TrafficFatality,AppealedDate,person_id,code_and_desc
0,VIOL PROBATION ON FEL OFF,19.2-306,7064,2019-12-17,Revoked - Sentence/Probation,,,Revocation,,91,...,,,,,0.0,,,,93180000000000.0,"19.2-306, VIOL PROBATION ON FEL OFF"
1,VIOL PROBATION ON FEL OFF,19.2-306,7064,2019-12-12,Revoked - Sentence/Probation,,Guilty,Capias,,99,...,,,,,0.0,,,,27030000000000.0,"19.2-306, VIOL PROBATION ON FEL OFF"
