In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px

In [2]:
# Loading in the dataset

file_path = "./Data/NYPD_Arrests_Data.csv"

crime_df = pd.read_csv(file_path)

crime_df.head()

Unnamed: 0,ARREST_KEY,ARREST_DATE,PD_CD,PD_DESC,KY_CD,OFNS_DESC,LAW_CODE,LAW_CAT_CD,ARREST_BORO,ARREST_PRECINCT,...,X_COORD_CD,Y_COORD_CD,Latitude,Longitude,Lon_Lat,Zip Codes,Community Districts,Borough Boundaries,City Council Districts,Police Precincts
0,159838726,01/01/2017,115.0,RECKLESS ENDANGERMENT 2,355.0,OFFENSES AGAINST THE PERSON,PL 1202000,M,M,14,...,988912.0,212647.0,40.75035,-73.983175,POINT (-73.98317545899994 40.750350440000034),12080.0,11.0,4.0,51.0,8.0
1,159824786,01/01/2017,101.0,ASSAULT 3,344.0,ASSAULT 3 & RELATED OFFENSES,PL 1200001,M,M,18,...,992043.0,217246.0,40.762971,-73.97187,POINT (-73.97186979099996 40.76297132000008),12419.0,11.0,4.0,51.0,10.0
2,159840237,01/01/2017,729.0,"FORGERY,ETC.,UNCLASSIFIED-FELONY",113.0,FORGERY,PL 1702500,F,B,40,...,1006669.0,233631.0,40.807919,-73.919017,POINT (-73.91901728199997 40.80791877300004),10932.0,49.0,5.0,35.0,23.0
3,159828861,01/01/2017,744.0,BAIL JUMPING 3,359.0,OFFENSES AGAINST PUBLIC ADMINISTRATION,PL 2155500,M,K,67,...,1009039.0,176368.0,40.65074,-73.910667,POINT (-73.91066707899995 40.650739599000076),13827.0,61.0,2.0,25.0,40.0
4,159834533,01/01/2017,203.0,"TRESPASS 3, CRIMINAL",352.0,CRIMINAL TRESPASS,PL 1401000,M,K,60,...,986229.0,148311.0,40.573763,-73.992878,POINT (-73.99287775699997 40.573763375000055),18184.0,21.0,2.0,45.0,35.0


In [3]:
crime_df.dtypes

ARREST_KEY                  int64
ARREST_DATE                object
PD_CD                     float64
PD_DESC                    object
KY_CD                     float64
OFNS_DESC                  object
LAW_CODE                   object
LAW_CAT_CD                 object
ARREST_BORO                object
ARREST_PRECINCT             int64
JURISDICTION_CODE           int64
AGE_GROUP                  object
PERP_SEX                   object
PERP_RACE                  object
X_COORD_CD                float64
Y_COORD_CD                float64
Latitude                  float64
Longitude                 float64
Lon_Lat                    object
Zip Codes                 float64
Community Districts       float64
Borough Boundaries        float64
City Council Districts    float64
Police Precincts          float64
dtype: object

In [4]:
# Convert the ARREST_DATE column to a datetime data type

crime_df['ARREST_DATE'] = pd.to_datetime(crime_df['ARREST_DATE'])

print(crime_df.dtypes)

ARREST_KEY                         int64
ARREST_DATE               datetime64[ns]
PD_CD                            float64
PD_DESC                           object
KY_CD                            float64
OFNS_DESC                         object
LAW_CODE                          object
LAW_CAT_CD                        object
ARREST_BORO                       object
ARREST_PRECINCT                    int64
JURISDICTION_CODE                  int64
AGE_GROUP                         object
PERP_SEX                          object
PERP_RACE                         object
X_COORD_CD                       float64
Y_COORD_CD                       float64
Latitude                         float64
Longitude                        float64
Lon_Lat                           object
Zip Codes                        float64
Community Districts              float64
Borough Boundaries               float64
City Council Districts           float64
Police Precincts                 float64
dtype: object


In [5]:
# Listing all column names

crime_df.columns.tolist()

['ARREST_KEY',
 'ARREST_DATE',
 'PD_CD',
 'PD_DESC',
 'KY_CD',
 'OFNS_DESC',
 'LAW_CODE',
 'LAW_CAT_CD',
 'ARREST_BORO',
 'ARREST_PRECINCT',
 'JURISDICTION_CODE',
 'AGE_GROUP',
 'PERP_SEX',
 'PERP_RACE',
 'X_COORD_CD',
 'Y_COORD_CD',
 'Latitude',
 'Longitude',
 'Lon_Lat',
 'Zip Codes',
 'Community Districts',
 'Borough Boundaries',
 'City Council Districts',
 'Police Precincts']

In [6]:
# Removing Arrest_Key, Law_Code, X_COORD_CD, Y_COORD_CD, Zip Codes, Community Districts, Borough Boundaries, City Council Districts columns.

crime_df_cleaned = crime_df.drop(columns=['ARREST_KEY', 'LAW_CODE', 'X_COORD_CD', 'Y_COORD_CD', 'Zip Codes', 'Community Districts', 'Borough Boundaries', 'City Council Districts'], axis=1)

crime_df_cleaned.head(10)

Unnamed: 0,ARREST_DATE,PD_CD,PD_DESC,KY_CD,OFNS_DESC,LAW_CAT_CD,ARREST_BORO,ARREST_PRECINCT,JURISDICTION_CODE,AGE_GROUP,PERP_SEX,PERP_RACE,Latitude,Longitude,Lon_Lat,Police Precincts
0,2017-01-01,115.0,RECKLESS ENDANGERMENT 2,355.0,OFFENSES AGAINST THE PERSON,M,M,14,0,18-24,M,WHITE,40.75035,-73.983175,POINT (-73.98317545899994 40.750350440000034),8.0
1,2017-01-01,101.0,ASSAULT 3,344.0,ASSAULT 3 & RELATED OFFENSES,M,M,18,0,25-44,M,WHITE,40.762971,-73.97187,POINT (-73.97186979099996 40.76297132000008),10.0
2,2017-01-01,729.0,"FORGERY,ETC.,UNCLASSIFIED-FELONY",113.0,FORGERY,F,B,40,0,25-44,M,WHITE HISPANIC,40.807919,-73.919017,POINT (-73.91901728199997 40.80791877300004),23.0
3,2017-01-01,744.0,BAIL JUMPING 3,359.0,OFFENSES AGAINST PUBLIC ADMINISTRATION,M,K,67,0,25-44,M,BLACK,40.65074,-73.910667,POINT (-73.91066707899995 40.650739599000076),40.0
4,2017-01-01,203.0,"TRESPASS 3, CRIMINAL",352.0,CRIMINAL TRESPASS,M,K,60,0,18-24,M,BLACK,40.573763,-73.992878,POINT (-73.99287775699997 40.573763375000055),35.0
5,2017-01-01,259.0,"MISCHIEF,CRIMINAL UNCLASSIFIED 4TH DEG",351.0,CRIMINAL MISCHIEF & RELATED OFFENSES,M,Q,113,0,25-44,M,BLACK,40.675403,-73.792898,POINT (-73.79289777099996 40.67540344900006),71.0
6,2017-01-01,750.0,RESISTING ARREST,359.0,OFFENSES AGAINST PUBLIC ADMINISTRATION,M,B,47,0,25-44,M,BLACK HISPANIC,40.881793,-73.863713,POINT (-73.86371337599996 40.88179268700002),30.0
7,2017-01-01,802.0,ALCOHOLIC BEVERAGE CONTROL LAW,346.0,ALCOHOLIC BEVERAGE CONTROL LAW,M,K,77,0,45-64,M,BLACK,40.678444,-73.947757,POINT (-73.94775722799994 40.678443525000034),51.0
8,2017-01-01,101.0,ASSAULT 3,344.0,ASSAULT 3 & RELATED OFFENSES,M,M,9,2,25-44,F,BLACK,40.724793,-73.976785,POINT (-73.97678528999995 40.72479293300007),5.0
9,2017-01-01,750.0,RESISTING ARREST,359.0,OFFENSES AGAINST PUBLIC ADMINISTRATION,M,B,47,0,18-24,M,BLACK,40.881793,-73.863713,POINT (-73.86371337599996 40.88179268700002),30.0


In [7]:
crime_df_cleaned.head(10)

Unnamed: 0,ARREST_DATE,PD_CD,PD_DESC,KY_CD,OFNS_DESC,LAW_CAT_CD,ARREST_BORO,ARREST_PRECINCT,JURISDICTION_CODE,AGE_GROUP,PERP_SEX,PERP_RACE,Latitude,Longitude,Lon_Lat,Police Precincts
0,2017-01-01,115.0,RECKLESS ENDANGERMENT 2,355.0,OFFENSES AGAINST THE PERSON,M,M,14,0,18-24,M,WHITE,40.75035,-73.983175,POINT (-73.98317545899994 40.750350440000034),8.0
1,2017-01-01,101.0,ASSAULT 3,344.0,ASSAULT 3 & RELATED OFFENSES,M,M,18,0,25-44,M,WHITE,40.762971,-73.97187,POINT (-73.97186979099996 40.76297132000008),10.0
2,2017-01-01,729.0,"FORGERY,ETC.,UNCLASSIFIED-FELONY",113.0,FORGERY,F,B,40,0,25-44,M,WHITE HISPANIC,40.807919,-73.919017,POINT (-73.91901728199997 40.80791877300004),23.0
3,2017-01-01,744.0,BAIL JUMPING 3,359.0,OFFENSES AGAINST PUBLIC ADMINISTRATION,M,K,67,0,25-44,M,BLACK,40.65074,-73.910667,POINT (-73.91066707899995 40.650739599000076),40.0
4,2017-01-01,203.0,"TRESPASS 3, CRIMINAL",352.0,CRIMINAL TRESPASS,M,K,60,0,18-24,M,BLACK,40.573763,-73.992878,POINT (-73.99287775699997 40.573763375000055),35.0
5,2017-01-01,259.0,"MISCHIEF,CRIMINAL UNCLASSIFIED 4TH DEG",351.0,CRIMINAL MISCHIEF & RELATED OFFENSES,M,Q,113,0,25-44,M,BLACK,40.675403,-73.792898,POINT (-73.79289777099996 40.67540344900006),71.0
6,2017-01-01,750.0,RESISTING ARREST,359.0,OFFENSES AGAINST PUBLIC ADMINISTRATION,M,B,47,0,25-44,M,BLACK HISPANIC,40.881793,-73.863713,POINT (-73.86371337599996 40.88179268700002),30.0
7,2017-01-01,802.0,ALCOHOLIC BEVERAGE CONTROL LAW,346.0,ALCOHOLIC BEVERAGE CONTROL LAW,M,K,77,0,45-64,M,BLACK,40.678444,-73.947757,POINT (-73.94775722799994 40.678443525000034),51.0
8,2017-01-01,101.0,ASSAULT 3,344.0,ASSAULT 3 & RELATED OFFENSES,M,M,9,2,25-44,F,BLACK,40.724793,-73.976785,POINT (-73.97678528999995 40.72479293300007),5.0
9,2017-01-01,750.0,RESISTING ARREST,359.0,OFFENSES AGAINST PUBLIC ADMINISTRATION,M,B,47,0,18-24,M,BLACK,40.881793,-73.863713,POINT (-73.86371337599996 40.88179268700002),30.0


In [8]:
# Get all unique values in the "OFNS_DESC" column

unique_values = crime_df_cleaned['OFNS_DESC'].unique()

# Print the unique values

print(unique_values)

['OFFENSES AGAINST THE PERSON' 'ASSAULT 3 & RELATED OFFENSES' 'FORGERY'
 'OFFENSES AGAINST PUBLIC ADMINISTRATION' 'CRIMINAL TRESPASS'
 'CRIMINAL MISCHIEF & RELATED OFFENSES' 'ALCOHOLIC BEVERAGE CONTROL LAW'
 'FELONY ASSAULT' 'ADMINISTRATIVE CODE' 'ROBBERY' 'PETIT LARCENY'
 'OTHER TRAFFIC INFRACTION' 'DANGEROUS WEAPONS' 'MISCELLANEOUS PENAL LAW'
 'GRAND LARCENY' 'DANGEROUS DRUGS' 'FRAUDS' 'BURGLARY'
 'OFF. AGNST PUB ORD SENSBLTY & RGHTS TO PRIV'
 'INTOXICATED & IMPAIRED DRIVING' 'SEX CRIMES'
 'OTHER OFFENSES RELATED TO THEFT' 'VEHICLE AND TRAFFIC LAWS'
 'OFFENSES INVOLVING FRAUD' 'FORCIBLE TOUCHING' 'DISORDERLY CONDUCT'
 'POSSESSION OF STOLEN PROPERTY 5' 'NEW YORK CITY HEALTH CODE'
 'OFF. AGNST PUB ORD SENSBLTY &' 'HOMICIDE-NEGLIGENT-VEHICLE'
 'OTHER STATE LAWS (NON PENAL LAW)' 'INTOXICATED/IMPAIRED DRIVING'
 "BURGLAR'S TOOLS" 'HARRASSMENT 2' 'GRAND LARCENY OF MOTOR VEHICLE'
 'OTHER STATE LAWS' nan 'UNAUTHORIZED USE OF A VEHICLE 3 (UUV)' 'RAPE'
 'F.C.A. P.I.N.O.S.' 'POSSESSION OF STOLEN

In [9]:
# Get all unique values in the "ARREST_BORO" column

unique_values = crime_df_cleaned['ARREST_BORO'].unique()

# Print the unique values

print(unique_values)

['M' 'B' 'K' 'Q' 'S']


In [10]:
# Get all unique values in the "LAW_CAT_CD" column

unique_values = crime_df_cleaned['LAW_CAT_CD'].unique()

# Print the unique values

print(unique_values)

['M' 'F' 'V' 'I' nan]


In [11]:
# Get all unique values in the "AGE_GROUP" column

unique_values = crime_df_cleaned['AGE_GROUP'].unique()

# Print the unique values

print(unique_values)

['18-24' '25-44' '45-64' '<18' '65+']


In [12]:
# Get all unique values in the "PERP_RACE" column

unique_values = crime_df_cleaned['PERP_RACE'].unique()

# Print the unique values

print(unique_values)

['WHITE' 'WHITE HISPANIC' 'BLACK' 'BLACK HISPANIC'
 'ASIAN / PACIFIC ISLANDER' 'UNKNOWN' 'AMERICAN INDIAN/ALASKAN NATIVE']


In [13]:
# Delete rows with certain values in the "LAW_CAT_CD" column

crime_df_cleaned = crime_df_cleaned[crime_df_cleaned['LAW_CAT_CD'] != 'I']

# Print the updated dataframe

crime_df_cleaned.head()

Unnamed: 0,ARREST_DATE,PD_CD,PD_DESC,KY_CD,OFNS_DESC,LAW_CAT_CD,ARREST_BORO,ARREST_PRECINCT,JURISDICTION_CODE,AGE_GROUP,PERP_SEX,PERP_RACE,Latitude,Longitude,Lon_Lat,Police Precincts
0,2017-01-01,115.0,RECKLESS ENDANGERMENT 2,355.0,OFFENSES AGAINST THE PERSON,M,M,14,0,18-24,M,WHITE,40.75035,-73.983175,POINT (-73.98317545899994 40.750350440000034),8.0
1,2017-01-01,101.0,ASSAULT 3,344.0,ASSAULT 3 & RELATED OFFENSES,M,M,18,0,25-44,M,WHITE,40.762971,-73.97187,POINT (-73.97186979099996 40.76297132000008),10.0
2,2017-01-01,729.0,"FORGERY,ETC.,UNCLASSIFIED-FELONY",113.0,FORGERY,F,B,40,0,25-44,M,WHITE HISPANIC,40.807919,-73.919017,POINT (-73.91901728199997 40.80791877300004),23.0
3,2017-01-01,744.0,BAIL JUMPING 3,359.0,OFFENSES AGAINST PUBLIC ADMINISTRATION,M,K,67,0,25-44,M,BLACK,40.65074,-73.910667,POINT (-73.91066707899995 40.650739599000076),40.0
4,2017-01-01,203.0,"TRESPASS 3, CRIMINAL",352.0,CRIMINAL TRESPASS,M,K,60,0,18-24,M,BLACK,40.573763,-73.992878,POINT (-73.99287775699997 40.573763375000055),35.0


In [14]:
# Get all unique values in the "LAW_CAT_CD" column

unique_values = crime_df_cleaned['LAW_CAT_CD'].unique()

# Print the unique values

print(unique_values)

['M' 'F' 'V' nan]


In [15]:
# Count the number of null values in each column

null_counts = crime_df_cleaned.isnull().sum()

# Print the null value counts

print(null_counts)

ARREST_DATE             0
PD_CD                 162
PD_DESC              2031
KY_CD                2031
OFNS_DESC            2031
LAW_CAT_CD           7411
ARREST_BORO             0
ARREST_PRECINCT         0
JURISDICTION_CODE       0
AGE_GROUP               0
PERP_SEX                0
PERP_RACE               0
Latitude                0
Longitude               0
Lon_Lat                 0
Police Precincts      159
dtype: int64


In [16]:
# Drop rows with null values in any column

crime_df_cleaned = crime_df_cleaned.dropna()

# Print the updated dataframe

crime_df_cleaned.head()

Unnamed: 0,ARREST_DATE,PD_CD,PD_DESC,KY_CD,OFNS_DESC,LAW_CAT_CD,ARREST_BORO,ARREST_PRECINCT,JURISDICTION_CODE,AGE_GROUP,PERP_SEX,PERP_RACE,Latitude,Longitude,Lon_Lat,Police Precincts
0,2017-01-01,115.0,RECKLESS ENDANGERMENT 2,355.0,OFFENSES AGAINST THE PERSON,M,M,14,0,18-24,M,WHITE,40.75035,-73.983175,POINT (-73.98317545899994 40.750350440000034),8.0
1,2017-01-01,101.0,ASSAULT 3,344.0,ASSAULT 3 & RELATED OFFENSES,M,M,18,0,25-44,M,WHITE,40.762971,-73.97187,POINT (-73.97186979099996 40.76297132000008),10.0
2,2017-01-01,729.0,"FORGERY,ETC.,UNCLASSIFIED-FELONY",113.0,FORGERY,F,B,40,0,25-44,M,WHITE HISPANIC,40.807919,-73.919017,POINT (-73.91901728199997 40.80791877300004),23.0
3,2017-01-01,744.0,BAIL JUMPING 3,359.0,OFFENSES AGAINST PUBLIC ADMINISTRATION,M,K,67,0,25-44,M,BLACK,40.65074,-73.910667,POINT (-73.91066707899995 40.650739599000076),40.0
4,2017-01-01,203.0,"TRESPASS 3, CRIMINAL",352.0,CRIMINAL TRESPASS,M,K,60,0,18-24,M,BLACK,40.573763,-73.992878,POINT (-73.99287775699997 40.573763375000055),35.0


In [17]:
# Count the number of null values in each column

null_counts = crime_df_cleaned.isnull().sum()

# Print the null value counts

print(null_counts)

ARREST_DATE          0
PD_CD                0
PD_DESC              0
KY_CD                0
OFNS_DESC            0
LAW_CAT_CD           0
ARREST_BORO          0
ARREST_PRECINCT      0
JURISDICTION_CODE    0
AGE_GROUP            0
PERP_SEX             0
PERP_RACE            0
Latitude             0
Longitude            0
Lon_Lat              0
Police Precincts     0
dtype: int64


In [18]:
# Count the occurrences of each unique value in column 'PD_DESC'

counts = crime_df_cleaned['PD_DESC'].value_counts()

# Print the counts

print(counts.head(30))

ASSAULT 3                                     114746
ASSAULT 2,1,UNCLASSIFIED                       56890
TRAFFIC,UNCLASSIFIED MISDEMEAN                 53934
LARCENY,PETIT FROM OPEN AREAS,UNCLASSIFIED     46424
LARCENY,PETIT FROM OPEN AREAS,                 46085
MARIJUANA, POSSESSION 4 & 5                    27534
THEFT OF SERVICES, UNCLASSIFIED                27113
ROBBERY,OPEN AREA UNCLASSIFIED                 24951
INTOXICATED DRIVING,ALCOHOL                    23171
CONTROLLED SUBSTANCE, POSSESSION 7             22318
PUBLIC ADMINISTRATION,UNCLASSI                 21252
MENACING,UNCLASSIFIED                          20935
LARCENY,GRAND FROM OPEN AREAS, UNATTENDED      19953
CONTROLLED SUBSTANCE, POSSESSI                 18602
ROBBERY,UNCLASSIFIED,OPEN AREAS                18489
WEAPONS POSSESSION 1 & 2                       18045
AGGRAVATED HARASSMENT 2                        16843
LARCENY,GRAND FROM OPEN AREAS,UNCLASSIFIED     16415
PUBLIC ADMINISTRATION,UNCLASSIFIED FELONY     

In [19]:
# Count the occurrences of each unique value in column 'OFNS_DESC'

counts = crime_df_cleaned['OFNS_DESC'].value_counts()

# Print the counts

print(counts.head(30))

ASSAULT 3 & RELATED OFFENSES                   150782
DANGEROUS DRUGS                                124014
PETIT LARCENY                                   92648
FELONY ASSAULT                                  74457
VEHICLE AND TRAFFIC LAWS                        63859
MISCELLANEOUS PENAL LAW                         56653
ROBBERY                                         43655
GRAND LARCENY                                   43333
DANGEROUS WEAPONS                               42215
CRIMINAL MISCHIEF & RELATED OF                  27697
OTHER OFFENSES RELATED TO THEFT                 27270
BURGLARY                                        24266
CRIMINAL MISCHIEF & RELATED OFFENSES            20818
INTOXICATED & IMPAIRED DRIVING                  20801
FORGERY                                         20638
OFFENSES AGAINST PUBLIC ADMINI                  17405
SEX CRIMES                                      16651
CRIMINAL TRESPASS                               16260
OFFENSES AGAINST PUBLIC ADMI

In [20]:
# Count the occurrences of each unique value in column 'LAW_CAT_CD'

counts = crime_df_cleaned['LAW_CAT_CD'].value_counts()

# Print the counts

print(counts)

M    624374
F    394386
V     12526
Name: LAW_CAT_CD, dtype: int64


In [21]:
# Count the occurrences of each unique value in each group

counts = crime_df_cleaned.groupby('LAW_CAT_CD')['OFNS_DESC'].value_counts()

# Print the counts

print(counts)

LAW_CAT_CD  OFNS_DESC                             
F           FELONY ASSAULT                            74381
            MISCELLANEOUS PENAL LAW                   54414
            DANGEROUS DRUGS                           45055
            ROBBERY                                   43655
            GRAND LARCENY                             43333
                                                      ...  
V           LOITERING                                    27
            PROSTITUTION & RELATED OFFENSES              13
            NYS LAWS-UNCLASSIFIED VIOLATION               2
            UNLAWFUL POSS. WEAP. ON SCHOOL                1
            UNLAWFUL POSS. WEAP. ON SCHOOL GROUNDS        1
Name: OFNS_DESC, Length: 106, dtype: int64


In [22]:
# Count the occurrences of each unique value in each group

counts = crime_df_cleaned.groupby('LAW_CAT_CD')['PERP_RACE'].value_counts()

# Print the counts

print(counts)

LAW_CAT_CD  PERP_RACE                     
F           BLACK                             205875
            WHITE HISPANIC                     92261
            WHITE                              39522
            BLACK HISPANIC                     33954
            ASIAN / PACIFIC ISLANDER           19922
            UNKNOWN                             2004
            AMERICAN INDIAN/ALASKAN NATIVE       848
M           BLACK                             285280
            WHITE HISPANIC                    162931
            WHITE                              80176
            BLACK HISPANIC                     54410
            ASIAN / PACIFIC ISLANDER           34937
            UNKNOWN                             4678
            AMERICAN INDIAN/ALASKAN NATIVE      1962
V           BLACK                               5957
            WHITE HISPANIC                      3234
            WHITE                               2073
            BLACK HISPANIC                       715
   

In [23]:
# Count the number of unique combinations of values in the 'OFNS_DESC', 'LAW_CAT_CD', and 'PERP_RACE' columns

unique_counts = crime_df_cleaned[['OFNS_DESC', 'LAW_CAT_CD', 'PERP_RACE', 'ARREST_BORO', 'AGE_GROUP', 'PERP_SEX' ]].nunique()

print(unique_counts)

OFNS_DESC      80
LAW_CAT_CD      3
PERP_RACE       7
ARREST_BORO     5
AGE_GROUP       5
PERP_SEX        2
dtype: int64


In [25]:
pd.set_option('display.max_rows', None)

pd.set_option('display.max_columns', None)

crime_df.groupby(['LAW_CAT_CD', 'PERP_RACE', 'ARREST_BORO', 'AGE_GROUP', 'PERP_SEX']).size()

LAW_CAT_CD  PERP_RACE                       ARREST_BORO  AGE_GROUP  PERP_SEX
F           AMERICAN INDIAN/ALASKAN NATIVE  B            18-24      F               2
                                                                    M              14
                                                         25-44      F              10
                                                                    M              52
                                                         45-64      F               1
                                                                    M              23
                                                         65+        M               6
                                                         <18        F               1
                                                                    M               4
                                            K            18-24      F               6
                                                               