# City of New Orleans Electronic Police Report 2024

> Exploratory data analysis and machine learning prediction.

In [2]:
# setting the random seed for reproducibility
import random
random.seed(493)

# for manipulating dataframes
import pandas as pd
import numpy as np

# for statistical testing
from scipy import stats
from scipy.stats import mannwhitneyu

# for visualizations
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style="whitegrid")

# to print out all the outputs
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# set display options
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)

## GET THE DATA

In [3]:
# Read a csv file
df = pd.read_csv('../data/Electronic_Police_Report_2024.csv')

In [4]:
def show_missing(df):
    """
    Takes a dataframe and returns a dataframe with stats
    on missing and null values with their percentages.
    """
    null_count = df.isnull().sum()
    null_percentage = (null_count / df.shape[0]) * 100
    empty_count = pd.Series(((df == ' ') | (df == '')).sum())
    empty_percentage = (empty_count / df.shape[0]) * 100
    nan_count = pd.Series(((df == 'nan') | (df == 'NaN')).sum())
    nan_percentage = (nan_count / df.shape[0]) * 100
    dfx = pd.DataFrame({'num_missing': null_count, 'missing_percentage': null_percentage,
                         'num_empty': empty_count, 'empty_percentage': empty_percentage,
                         'nan_count': nan_count, 'nan_percentage': nan_percentage})
    return dfx

show_missing(df)

Unnamed: 0,num_missing,missing_percentage,num_empty,empty_percentage,nan_count,nan_percentage
Item_Number,0,0.0,0,0.0,0,0.0
District,0,0.0,0,0.0,0,0.0
Location,0,0.0,0,0.0,0,0.0
Disposition,0,0.0,0,0.0,0,0.0
Signal_Type,0,0.0,0,0.0,0,0.0
Signal_Description,0,0.0,0,0.0,0,0.0
Occurred_Date_Time,0,0.0,0,0.0,0,0.0
Charge_Code,97998,54.613546,0,0.0,0,0.0
Charge_Description,97998,54.613546,0,0.0,0,0.0
Offender_Number,76792,42.795602,0,0.0,0,0.0


In [5]:
df.head()

Unnamed: 0,Item_Number,District,Location,Disposition,Signal_Type,Signal_Description,Occurred_Date_Time,Charge_Code,Charge_Description,Offender_Number,OffenderID,OffenderStatus,Offender_Race,Offender_Gender,Offender_Age,Victim_Number,PersonType,Victim_Race,Victim_Gender,Victim_Age,Victim_Fatal_Status,Hate Crime,Report Type
0,G-07753-24,7,Morrison Rd & Newcastle St,OPEN,35,SIMPLE BATTERY,2024-07-08 17:00:00,14 35,SIMPLE BATTERY,1.0,903303.0,,BLACK,FEMALE,,1.0,VICTIM,BLACK,FEMALE,16.0,Non-fatal,,Incident Report
1,G-07737-24,8,001XX Carondelet St,CLOSED,966,DRUG LAW VIOLATION,2024-07-08 16:40:00,40 966,PROHIBITED ACTS--SCHEDULE I DRUGS,1.0,903297.0,ARRESTED,BLACK,MALE,37.0,1.0,VICTIM,,,,Non-fatal,,Incident Report
2,G-07737-24,8,001XX Carondelet St,CLOSED,966,DRUG LAW VIOLATION,2024-07-08 16:40:00,40 967,PROHIBITED ACTS-- SCHEDULE II DRUGS,1.0,903297.0,ARRESTED,BLACK,MALE,37.0,1.0,VICTIM,,,,Non-fatal,,Incident Report
3,G-07737-24,8,001XX Carondelet St,CLOSED,966,DRUG LAW VIOLATION,2024-07-08 16:40:00,14 108,RESISTING AN OFFICER,1.0,903297.0,ARRESTED,BLACK,MALE,37.0,1.0,VICTIM,,,,Non-fatal,,Incident Report
4,G-07737-24,8,001XX Carondelet St,CLOSED,966,DRUG LAW VIOLATION,2024-07-08 16:40:00,40 966,PROHIBITED ACTS--SCHEDULE I DRUGS,2.0,903298.0,ARRESTED,BLACK,MALE,28.0,1.0,VICTIM,,,,Non-fatal,,Incident Report


In [6]:
def get_values(df, columns):
    """
    Take a dataframe and a list of columns and
    returns the value counts for the columns.
    """
    for column in columns:
        print(column)
        print('=====================================')
        print(df[column].value_counts(dropna=False))
        print('\n')

def show_values(df, param):
    if param == 'all':
        get_values(df, df.columns)
    else:
        get_values(df, param) 

show_values(df, ['District', 'Disposition', 'Signal_Type', 'Signal_Description', 'Charge_Code', 'Charge_Description',
                'OffenderStatus', 'PersonType', 'Victim_Race', 'Victim_Gender', 'Victim_Fatal_Status',
                'Hate Crime', 'Report Type'])

District
District
7    32702
8    30580
5    23375
6    22706
3    19694
1    18554
2    16713
4    15115
Name: count, dtype: int64


Disposition
Disposition
OPEN         141972
CLOSED        35789
UNFOUNDED      1678
Name: count, dtype: int64


Signal_Type
Signal_Type
103D      25173
21        16383
103       10341
67S       10011
67         9929
35D        9641
35         7535
67A        7416
56         7222
62C        7096
67F        4842
21P        4724
29         3156
966        2881
63         2592
37         2432
27-67A     2189
67P        2119
103F       2053
21J        1989
62B        1840
62         1806
95G        1755
62R        1690
17F        1670
21M        1373
103M       1219
21R        1201
37D        1155
67C        1098
34S        1015
79         1005
64G         997
66          995
94          914
29U         908
24          900
56D         887
67B         830
93          787
67AR        785
29SA        674
43          581
34C         572
65          568
68        