In [1]:
#import dependencies
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as st
import numpy as np

# chicago crime data file
crime_path = "Resources/chicago_crimes_2022.csv"

# Read the mouse data and put into a dataframe
crime = pd.read_csv(crime_path)
crime_df = pd.DataFrame(crime)

# Display the data table for preview
crime_df.head()

Unnamed: 0,ID,Case Number,Date,Block,IUCR,Primary Type,Description,Location Description,Arrest,Domestic,...,Ward,Community Area,FBI Code,X Coordinate,Y Coordinate,Year,Updated On,Latitude,Longitude,Location
0,12648362,JF181248,3/16/2022 14:00,045XX S VINCENNES AVE,1130,DECEPTIVE PRACTICE,FRAUD OR CONFIDENCE GAME,RESIDENCE,False,False,...,3.0,38,11,1180381.0,1874867.0,2022,4/6/2023 17:05,41.811898,-87.613884,"(41.811898033, -87.613883795)"
1,13032945,JG212426,10/23/2022 12:00,006XX W IRVING PARK RD,2826,OTHER OFFENSE,HARASSMENT BY ELECTRONIC MEANS,APARTMENT,False,True,...,46.0,6,26,,,2022,4/6/2023 17:05,,,
2,12713141,JF259485,5/29/2022 0:00,014XX S MUSEUM CAMPUS DR,870,THEFT,POCKET-PICKING,SPORTS ARENA / STADIUM,False,False,...,4.0,33,6,,,2022,6/5/2022 16:48,,,
3,13032436,JG211735,4/20/2022 0:01,039XX S CALIFORNIA AVE,1582,OFFENSE INVOLVING CHILDREN,CHILD PORNOGRAPHY,POLICE FACILITY / VEHICLE PARKING LOT,False,False,...,12.0,58,17,,,2022,4/6/2023 17:05,,,
4,13032916,JG212285,12/14/2022 15:35,007XX E 40TH ST,890,THEFT,FROM BUILDING,SCHOOL - PRIVATE BUILDING,False,False,...,4.0,38,6,,,2022,4/6/2023 17:05,,,


In [2]:
#list all the columns available in the df
crime_df.columns

Index(['ID', 'Case Number', 'Date', 'Block', 'IUCR', 'Primary Type',
       'Description', 'Location Description', 'Arrest', 'Domestic', 'Beat',
       'District', 'Ward', 'Community Area', 'FBI Code', 'X Coordinate',
       'Y Coordinate', 'Year', 'Updated On', 'Latitude', 'Longitude',
       'Location'],
      dtype='object')

In [3]:
#get the counts of the "Primary Type" of crime committed
counts_crime = crime_df["Primary Type"].value_counts()
counts_crime


THEFT                                54781
BATTERY                              40890
CRIMINAL DAMAGE                      27239
MOTOR VEHICLE THEFT                  21444
ASSAULT                              20787
DECEPTIVE PRACTICE                   16204
OTHER OFFENSE                        14534
ROBBERY                               8966
WEAPONS VIOLATION                     8766
BURGLARY                              7592
NARCOTICS                             4716
CRIMINAL TRESPASS                     4221
OFFENSE INVOLVING CHILDREN            1881
CRIMINAL SEXUAL ASSAULT               1546
SEX OFFENSE                           1212
HOMICIDE                               724
PUBLIC PEACE VIOLATION                 710
STALKING                               442
ARSON                                  422
INTERFERENCE WITH PUBLIC OFFICER       392
PROSTITUTION                           283
LIQUOR LAW VIOLATION                   203
INTIMIDATION                           181
CONCEALED C

In [4]:
# Look at which crimes were most prevalent by district
district_df = crime_df.groupby(["District"])["Primary Type"].value_counts()
district_df

District  Primary Type       
1         THEFT                  5003
          BATTERY                1771
          DECEPTIVE PRACTICE     1101
          MOTOR VEHICLE THEFT    1079
          CRIMINAL DAMAGE        1078
                                 ... 
31        BATTERY                   1
          CRIMINAL DAMAGE           1
          CRIMINAL TRESPASS         1
          OTHER OFFENSE             1
          ROBBERY                   1
Name: Primary Type, Length: 593, dtype: int64

In [6]:
crime_cleaned_df = crime_df.replace({"THEFT":"THEFT (ALL TYPES)","MOTOR VEHICLE THEFT":"THEFT (ALL TYPES)","ROBBERY":"THEFT (ALL TYPES)", 
                                    "BURGLARY":"THEFT (ALL TYPES)","DECEPTIVE PRACTICE":"THEFT (ALL TYPES)",
                                    "PUBLIC PEACE VIOLATION":"OTHER MINOR CRIMES","INTERFERENCE WITH PUBLIC OFFICER":"OTHER MINOR CRIMES",
                                    "LIQUOR LAW VIOLATION":"OTHER MINOR CRIMES","OBSCENITY":"OTHER MINOR CRIMES","GAMBLING":"OTHER MINOR CRIMES",
                                    "NON-CRIMINAL":"OTHER MINOR CRIMES","PUBLIC INDECENCY":"OTHER MINOR CRIMES","OTHER OFFENSE":"OTHER MINOR CRIMES",
                                    "PROSTITUTION":"OTHER MINOR CRIMES","KIDNAPPING":"KIDNAPPING AND HUMAN TRAFFICKING","HUMAN TRAFFICKING" : "KIDNAPPING AND HUMAN TRAFFICKING",
                                    "NARCOTICS":"DRUG RELATED","OTHER NARCOTIC VIOLATION":"DRUG RELATED","ASSAULT": "ASSAULT & BATTERY","BATTERY": "ASSAULT & BATTERY",
                                    "RITUALISM":"ASSAULT & BATTERY","STALKING":"ASSAULT & BATTERY","INTIMIDATION":"ASSAULT & BATTERY","CRIMINAL TRESPASS":"MINOR PROPERTY CRIME",
                                    "CRIMINAL DAMAGE":"MINOR PROPERTY CRIME","CRIM SEXUAL ASSAULT": "SEXUAL RELATED CRIME","CRIMINAL SEXUAL ASSAULT": "SEXUAL RELATED CRIME",
                                    "SEX OFFENSE": "SEXUAL RELATED CRIME","WEAPONS VIOLATION":"WEAPONS RELATED CRIME","CONCEALED CARRY LICENSE VIOLATION":"WEAPONS RELATED CRIME"})

crime_cleaned_df


Unnamed: 0,ID,Case Number,Date,Block,IUCR,Primary Type,Description,Location Description,Arrest,Domestic,...,Ward,Community Area,FBI Code,X Coordinate,Y Coordinate,Year,Updated On,Latitude,Longitude,Location
0,12648362,JF181248,3/16/2022 14:00,045XX S VINCENNES AVE,1130,THEFT (ALL TYPES),FRAUD OR CONFIDENCE GAME,RESIDENCE,False,False,...,3.0,38,11,1180381.0,1874867.0,2022,4/6/2023 17:05,41.811898,-87.613884,"(41.811898033, -87.613883795)"
1,13032945,JG212426,10/23/2022 12:00,006XX W IRVING PARK RD,2826,OTHER MINOR CRIMES,HARASSMENT BY ELECTRONIC MEANS,APARTMENT,False,True,...,46.0,6,26,,,2022,4/6/2023 17:05,,,
2,12713141,JF259485,5/29/2022 0:00,014XX S MUSEUM CAMPUS DR,870,THEFT (ALL TYPES),POCKET-PICKING,SPORTS ARENA / STADIUM,False,False,...,4.0,33,6,,,2022,6/5/2022 16:48,,,
3,13032436,JG211735,4/20/2022 0:01,039XX S CALIFORNIA AVE,1582,OFFENSE INVOLVING CHILDREN,CHILD PORNOGRAPHY,POLICE FACILITY / VEHICLE PARKING LOT,False,False,...,12.0,58,17,,,2022,4/6/2023 17:05,,,
4,13032916,JG212285,12/14/2022 15:35,007XX E 40TH ST,890,THEFT (ALL TYPES),FROM BUILDING,SCHOOL - PRIVATE BUILDING,False,False,...,4.0,38,6,,,2022,4/6/2023 17:05,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
238517,12847575,JF420478,9/1/2022 5:00,005XX W SURF ST,2825,OTHER MINOR CRIMES,HARASSMENT BY TELEPHONE,RESIDENCE,False,True,...,44.0,6,26,1172497.0,1919410.0,2022,1/3/2023 15:46,41.934305,-87.641485,"(41.934304581, -87.641484982)"
238518,12847801,JF420319,7/8/2022 0:00,114XX S PRAIRIE AVE,1130,THEFT (ALL TYPES),FRAUD OR CONFIDENCE GAME,STREET,False,False,...,9.0,49,11,1179966.0,1828818.0,2022,1/3/2023 15:46,41.685544,-87.616813,"(41.685543881, -87.616812541)"
238519,12847324,JF420102,9/27/2022 11:00,023XX E 70TH ST,810,THEFT (ALL TYPES),OVER $500,RESIDENCE,False,False,...,5.0,43,6,1193181.0,1859005.0,2022,1/3/2023 15:46,41.768068,-87.567453,"(41.768068052, -87.567452932)"
238520,12847570,JF420427,9/3/2022 10:25,052XX W CARMEN AVE,2021,DRUG RELATED,POSSESS - BARBITURATES,RESIDENCE - YARD (FRONT / BACK),True,False,...,45.0,11,18,1140553.0,1933418.0,2022,1/3/2023 15:46,41.973391,-87.758535,"(41.973391184, -87.758534512)"


In [7]:
crime_cleaned_df = crime_cleaned_df[["Case Number", "Date", "Block", "Primary Type", "Description", "District", "Ward",
                                     "Year", "Latitude", "Longitude", "Location"]]
crime_cleaned_df.head()

Unnamed: 0,Case Number,Date,Block,Primary Type,Description,District,Ward,Year,Latitude,Longitude,Location
0,JF181248,3/16/2022 14:00,045XX S VINCENNES AVE,THEFT (ALL TYPES),FRAUD OR CONFIDENCE GAME,2,3.0,2022,41.811898,-87.613884,"(41.811898033, -87.613883795)"
1,JG212426,10/23/2022 12:00,006XX W IRVING PARK RD,OTHER MINOR CRIMES,HARASSMENT BY ELECTRONIC MEANS,19,46.0,2022,,,
2,JF259485,5/29/2022 0:00,014XX S MUSEUM CAMPUS DR,THEFT (ALL TYPES),POCKET-PICKING,1,4.0,2022,,,
3,JG211735,4/20/2022 0:01,039XX S CALIFORNIA AVE,OFFENSE INVOLVING CHILDREN,CHILD PORNOGRAPHY,9,12.0,2022,,,
4,JG212285,12/14/2022 15:35,007XX E 40TH ST,THEFT (ALL TYPES),FROM BUILDING,2,4.0,2022,,,


In [8]:
counts_crime = crime_cleaned_df["Primary Type"].value_counts()
counts_crime

THEFT (ALL TYPES)                   108987
ASSAULT & BATTERY                    62300
MINOR PROPERTY CRIME                 31460
OTHER MINOR CRIMES                   16190
WEAPONS RELATED CRIME                 8943
DRUG RELATED                          4723
SEXUAL RELATED CRIME                  2758
OFFENSE INVOLVING CHILDREN            1881
HOMICIDE                               724
ARSON                                  422
KIDNAPPING AND HUMAN TRAFFICKING       134
Name: Primary Type, dtype: int64

In [42]:
district_df = crime_cleaned_df.groupby(["District"])[["Primary Type"]].count().sort_values(by = "Primary Type", ascending = False)
district_df

Unnamed: 0_level_0,Primary Type
District,Unnamed: 1_level_1
8,14786
6,14678
12,14321
4,13916
11,13436
1,13030
18,12413
19,12211
25,11939
3,11933


In [9]:
grouped_district_df = crime_cleaned_df.groupby(["District"])["Primary Type"].value_counts()
grouped_district_df

District  Primary Type                    
1         THEFT (ALL TYPES)                   7754
          ASSAULT & BATTERY                   2674
          MINOR PROPERTY CRIME                1449
          OTHER MINOR CRIMES                   627
          WEAPONS RELATED CRIME                217
                                              ... 
25        KIDNAPPING AND HUMAN TRAFFICKING       7
31        THEFT (ALL TYPES)                     10
          ASSAULT & BATTERY                      2
          MINOR PROPERTY CRIME                   2
          OTHER MINOR CRIMES                     1
Name: Primary Type, Length: 245, dtype: int64