In [39]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.graph_objects as go
df = pd.read_csv("crime_map_data.csv")

In [40]:
num_rows, num_columns = df.shape
column_names = df.columns
df.rename(columns={'DATE  OF OCCURRENCE': 'DATE'}, inplace=True)

In [33]:
print("Row Count:", num_rows)
print("Column Count:", num_columns)
print("Column Names:", column_names)

Row Count: 255799
Column Count: 17
Column Names: Index(['CASE#', 'DATE  OF OCCURRENCE', 'BLOCK', ' IUCR',
       ' PRIMARY DESCRIPTION', ' SECONDARY DESCRIPTION',
       ' LOCATION DESCRIPTION', 'ARREST', 'DOMESTIC', 'BEAT', 'WARD', 'FBI CD',
       'X COORDINATE', 'Y COORDINATE', 'LATITUDE', 'LONGITUDE', 'LOCATION'],
      dtype='object')


In [42]:
num_unique_values = df[' PRIMARY DESCRIPTION'].nunique()
print("Number of unique values in 'PRIMARY DESCRIPTION' column:", num_unique_values)

unique_values = df[' PRIMARY DESCRIPTION'].unique()
print("\nUnique values in ' PRIMARY DESCRIPTION' column:")
for value in unique_values:
    print(value)

Number of unique values in 'PRIMARY DESCRIPTION' column: 31

Unique values in ' PRIMARY DESCRIPTION' column:
THEFT
DECEPTIVE PRACTICE
CRIMINAL DAMAGE
OTHER OFFENSE
BATTERY
MOTOR VEHICLE THEFT
NARCOTICS
BURGLARY
PUBLIC PEACE VIOLATION
SEX OFFENSE
ASSAULT
CRIMINAL SEXUAL ASSAULT
CRIMINAL TRESPASS
WEAPONS VIOLATION
OFFENSE INVOLVING CHILDREN
ROBBERY
HOMICIDE
ARSON
OBSCENITY
INTIMIDATION
KIDNAPPING
STALKING
LIQUOR LAW VIOLATION
OTHER NARCOTIC VIOLATION
INTERFERENCE WITH PUBLIC OFFICER
HUMAN TRAFFICKING
PROSTITUTION
CONCEALED CARRY LICENSE VIOLATION
PUBLIC INDECENCY
GAMBLING
NON-CRIMINAL


In [43]:
print(df.columns)

Index(['CASE#', 'DATE', 'BLOCK', ' IUCR', ' PRIMARY DESCRIPTION',
       ' SECONDARY DESCRIPTION', ' LOCATION DESCRIPTION', 'ARREST', 'DOMESTIC',
       'BEAT', 'WARD', 'FBI CD', 'X COORDINATE', 'Y COORDINATE', 'LATITUDE',
       'LONGITUDE', 'LOCATION'],
      dtype='object')


In [44]:
df['DATE'] = pd.to_datetime(df['DATE'], format='%m/%d/%Y %I:%M:%S %p')

In [45]:
first_date = df['DATE'].min()
last_date = df['DATE'].max()
print("Start:", first_date, "End:", last_date)

Start: 2022-07-28 05:15:00 End: 2023-07-27 23:59:00


In [46]:
sorted_df = df.sort_values(by='DATE')
print(sorted_df.head(10))

           CASE#                DATE                     BLOCK  IUCR  \
66831   JF334407 2022-07-28 05:15:00  026XX N NARRAGANSETT AVE  0810   
110518  JF334413 2022-07-28 05:28:00        071XX S EUCLID AVE  0560   
55727   JF334409 2022-07-28 05:30:00          079XX S GREEN ST  0820   
36008   JF334412 2022-07-28 05:30:00      058XX N GLENWOOD AVE  2820   
49813   JF335715 2022-07-28 05:30:00       003XX E RANDOLPH ST  0810   
12907   JF335647 2022-07-28 05:35:00      056XX N VIRGINIA AVE  1360   
245     JF335698 2022-07-28 05:47:00        036XX S ARCHER AVE  1320   
47554   JF334894 2022-07-28 05:48:00      060XX S OAK PARK AVE  2826   
55090   JF334373 2022-07-28 05:49:00      049XX W MONTROSE AVE  0820   
107868  JF334374 2022-07-28 05:50:00           028XX W 22ND PL  0460   

        PRIMARY DESCRIPTION           SECONDARY DESCRIPTION  \
66831                 THEFT                       OVER $500   
110518              ASSAULT                          SIMPLE   
55727             

In [None]:
# Group data by 'Date' and 'Crime' and count the occurrences
crime_counts = df.groupby(['DATE', ' PRIMARY DESCRIPTION']).size().reset_index(name='Occurrences')

# Pivot the data to have 'Crime' as columns and 'Date' as index, with 'Occurrences' as values
pivot_table = crime_counts.pivot(index='DATE', columns=' PRIMARY DESCRIPTION', values='Occurrences')

# Fill missing values with 0 (if a specific crime did not occur on a date)
pivot_table.fillna(0, inplace=True)

# Plot the data using stacked bar plot
pivot_table.plot(kind='bar', stacked=True, figsize=(12, 6))

# Add title and axis labels (customize as needed)
plt.title('CHICAGO CRIME JULY 2022 - JULY 2023')
plt.xlabel('JULY 2022 - JULY 2023')
plt.ylabel('Number of Occurrences')

# Show the plot
plt.show()
