In [1]:
import pandas as pd   
import numpy as np  
import matplotlib.pyplot as plt 
import seaborn as sns
from scipy.stats import levene
from scipy.stats import f_oneway
from scipy.stats import shapiro
from scipy.stats import ttest_ind
import numpy as np
import geopandas as gpd
import folium
from folium.plugins import MarkerCluster
from folium.plugins import FastMarkerCluster

# Read CSV, Nashville Crime and Unemployment

In [2]:
nash_crime_df=pd.read_csv('../data/Nashville/Metro_Nashville_Police.csv',dtype={"ZIP Code":str})

i use the :str to change the Zip code column from float to string, in other to clean it up, thereby remove the .0 that comes with it

In [3]:
nash_crime_df.head(2)

Unnamed: 0,Primary Key,Incident Number,Report Type,Report Type Description,Incident Status Code,Incident Status Description,Investigation Status,Incident Occurred,Incident Reported,Incident Location,...,Weapon Description,Victim Number,Domestic Related,Victim Type,Victim Description,Victim Gender,Victim Race,Victim Ethnicity,Victim County Resident,Mapped Location
0,20190496213_11,20190496213,D,DISPATCHED,U,UNFOUNDED,Closed,07/01/2019 05:31:00 PM,07/01/2019 06:17:00 PM,2410 2410,...,NONE,1,True,I,INDIVIDUAL (18 AND OVER),M,W,Non-Hispanic,RESIDENT,POINT (-86.696 36.218)
1,20180290646_11,20180290646,D,DISPATCHED,A,CLEARED BY ARREST,Closed,03/31/2018 12:21:00 PM,03/31/2018 07:41:00 PM,5824 5824,...,NONE,1,False,B,BUSINESS,,,,,POINT (-86.713 36.042)


In [4]:
nash_crime_df['ZIP Code']

0         37214
1         37013
2         37207
3         37205
4         37076
          ...  
865170      NaN
865171      NaN
865172      NaN
865173      NaN
865174    37210
Name: ZIP Code, Length: 865175, dtype: object

In [5]:
nash_unemployment_df=pd.read_csv('../data/Nashville/Unemployment_Nashville-Davidson.csv')                   

In [6]:
nash_unemployment_df.head(2)

Unnamed: 0,Series ID,Year,Period,Label,Value
0,LAUCA474000000000004,2021,M01,2021 Jan,54892
1,LAUCA474000000000004,2021,M02,2021 Feb,52123


In [7]:
nash_unemployment_df[['Year', 'Month']] = nash_unemployment_df['Label'].str.split(' ', expand=True)
nash_unemployment_df.head()

Unnamed: 0,Series ID,Year,Period,Label,Value,Month
0,LAUCA474000000000004,2021,M01,2021 Jan,54892,Jan
1,LAUCA474000000000004,2021,M02,2021 Feb,52123,Feb
2,LAUCA474000000000004,2021,M03,2021 Mar,50828,Mar
3,LAUCA474000000000004,2021,M04,2021 Apr,48648,Apr
4,LAUCA474000000000004,2021,M05,2021 May,45838,May


it is interesting to note that Nashville unemployment data is in numbers and not rate or percentage and contains from 2021 - 2023

In [8]:
nash_unemployment_df.rename(columns={'Value': 'Unemployed Population'}, inplace=True)
nash_unemployment_df.tail(2)

Unnamed: 0,Series ID,Year,Period,Label,Unemployed Population,Month
34,LAUCA474000000000004,2023,M11,2023 Nov,33534,Nov
35,LAUCA474000000000004,2023,M12,2023 Dec,29693,Dec


# Analysis Average unemployment number per month

In [9]:
avg_unemployment_month=round(nash_unemployment_df.groupby('Month')['Unemployed Population'].mean(), 2)
avg_unemployment_month

Month
Apr    34904.67
Aug    36713.67
Dec    29191.00
Feb    39792.33
Jan    40784.67
Jul    39103.00
Jun    43610.00
Mar    37635.00
May    36722.67
Nov    32051.33
Oct    34361.67
Sep    34573.33
Name: Unemployed Population, dtype: float64

In [10]:
month_with_max_unemployment = avg_unemployment_month.idxmax()
month_with_max_unemployment

'Jun'

In [11]:
month_with_min_unemployment = avg_unemployment_month.idxmin()
month_with_min_unemployment

'Dec'

June is the month with the highest unemployment population while December is the lowest

In [12]:
avg_unemployment_year=round(nash_unemployment_df.groupby('Year')['Unemployed Population'].mean(), 2)
avg_unemployment_year

Year
2021    44323.50
2022    32793.50
2023    32743.83
Name: Unemployed Population, dtype: float64

from the analysis, there is an average of more people who are unemployment in 2021 than 2023 and 2023. Remember, the economy was partially shut down in 2021, during the COVID-19 recovery years.

In [13]:
nash_unemployment_df_by_year = nash_unemployment_df.groupby('Year')['Unemployed Population'].sum().reset_index()
nash_unemployment_df_by_year

Unnamed: 0,Year,Unemployed Population
0,2021,531882
1,2022,393522
2,2023,392926


getting the sum of the unemployment number

In [14]:
nash_unemployment_df_by_year['Year'] = nash_unemployment_df_by_year['Year'].astype('int32')
nash_unemployment_df_by_year.dtypes

Year                     int32
Unemployed Population    int64
dtype: object

In [15]:
nash_crime_df.columns

Index(['Primary Key', 'Incident Number', 'Report Type',
       'Report Type Description', 'Incident Status Code',
       'Incident Status Description', 'Investigation Status',
       'Incident Occurred', 'Incident Reported', 'Incident Location',
       'Latitude', 'Longitude', 'ZIP Code', 'RPA', 'Zone', 'Location Code',
       'Location Description', 'Offense Number', 'Offense NIBRS',
       'Offense Description', 'Weapon Primary', 'Weapon Description',
       'Victim Number', 'Domestic Related', 'Victim Type',
       'Victim Description', 'Victim Gender', 'Victim Race',
       'Victim Ethnicity', 'Victim County Resident', 'Mapped Location'],
      dtype='object')

# Cleaning Zip Code to remove any that does not belong to Nashville

In [16]:
nash_crime_df['ZIP Code'].unique()

array(['37214', '37013', '37207', '37205', '37076', '37201', '37208', nan,
       '37219', '37211', '37138', '37216', '37203', '37228', '37209',
       '37210', '37220', '37072', '37218', '37206', '37115', '37212',
       '37221', '37217', '37189', '37213', '37204', '37215', '37240',
       '37080', '37027', '37167', '37049', '37025', '37135', '37015',
       '37122', '37086', '37143', '37064', '37683', '37232', '6706',
       '37130', '37066', '3701', '37129', '37087', '37075', '38109',
       '30139', '38562', '37330', '37067', '53701', '27707', '38401',
       '91107', '27216', '89502', '37029', '90069', '37082', '0', '39202',
       '37185', '37043', '30318', '32801', '19153', '43440', '37110',
       '15213', '37028', '29501', '23236', '17202', '37179', '37090',
       '37078', '37274', '60123'], dtype=object)

In [17]:
nash_crime_df.dropna(subset=['ZIP Code'], inplace=True)
nash_crime_df['ZIP Code'].unique()

array(['37214', '37013', '37207', '37205', '37076', '37201', '37208',
       '37219', '37211', '37138', '37216', '37203', '37228', '37209',
       '37210', '37220', '37072', '37218', '37206', '37115', '37212',
       '37221', '37217', '37189', '37213', '37204', '37215', '37240',
       '37080', '37027', '37167', '37049', '37025', '37135', '37015',
       '37122', '37086', '37143', '37064', '37683', '37232', '6706',
       '37130', '37066', '3701', '37129', '37087', '37075', '38109',
       '30139', '38562', '37330', '37067', '53701', '27707', '38401',
       '91107', '27216', '89502', '37029', '90069', '37082', '0', '39202',
       '37185', '37043', '30318', '32801', '19153', '43440', '37110',
       '15213', '37028', '29501', '23236', '17202', '37179', '37090',
       '37078', '37274', '60123'], dtype=object)

In [18]:
# nash_crime_df['ZIP Code'] = nash_crime_df['ZIP Code'].astype(str).str.rstrip('.')
# nash_crime_df['ZIP Code'].unique()

In [19]:
# Assuming 'ZIP Code' is a column in nash_crime_df and you want to remove the last '0' from each ZIP code
nash_crime_df['ZIP Code'] = nash_crime_df['ZIP Code'].astype(str).apply(lambda x: x[:-1] if x.endswith('0') else x)
nash_crime_df['ZIP Code'].unique()

array(['37214', '37013', '37207', '37205', '37076', '37201', '37208',
       '37219', '37211', '37138', '37216', '37203', '37228', '37209',
       '3721', '3722', '37072', '37218', '37206', '37115', '37212',
       '37221', '37217', '37189', '37213', '37204', '37215', '3724',
       '3708', '37027', '37167', '37049', '37025', '37135', '37015',
       '37122', '37086', '37143', '37064', '37683', '37232', '6706',
       '3713', '37066', '3701', '37129', '37087', '37075', '38109',
       '30139', '38562', '3733', '37067', '53701', '27707', '38401',
       '91107', '27216', '89502', '37029', '90069', '37082', '', '39202',
       '37185', '37043', '30318', '32801', '19153', '4344', '3711',
       '15213', '37028', '29501', '23236', '17202', '37179', '3709',
       '37078', '37274', '60123'], dtype=object)

I am applying a function to each element that checks if the ZIP code ends with '0', and then remove it. I tried striping the dots and the zeros came up, which makes it difficult. 

In [20]:
# nash_crime_df = nash_crime_df.dropna(subset=['ZIP Code'])
# nash_crime_df.head(1)

In [21]:
# nash_crime_df['ZIP Code'].unique()

In [22]:
zip_codes_to_remove = ['38109', '3722','27707', '38401','37029', '37683', '37067', '38562', '37025', '37049', '37274', '3724', '3733', '3708', '3713', '3709', '37078','17202','3711','37082','37028','90069','39202','3701','15213','91107','60123','89502','89502', '30139','53701','37043', '19153','27216', '4344','6706','30318', '32801','29501', '23236']
nash_crime_df_filtered = nash_crime_df[~nash_crime_df['ZIP Code'].isin(zip_codes_to_remove)]
nash_crime_df_filtered['ZIP Code'].unique()

array(['37214', '37013', '37207', '37205', '37076', '37201', '37208',
       '37219', '37211', '37138', '37216', '37203', '37228', '37209',
       '3721', '37072', '37218', '37206', '37115', '37212', '37221',
       '37217', '37189', '37213', '37204', '37215', '37027', '37167',
       '37135', '37015', '37122', '37086', '37143', '37064', '37232',
       '37066', '37129', '37087', '37075', '', '37185', '37179'],
      dtype=object)

In [23]:
nash_crime_df_filtered.dropna(subset=['ZIP Code'])

Unnamed: 0,Primary Key,Incident Number,Report Type,Report Type Description,Incident Status Code,Incident Status Description,Investigation Status,Incident Occurred,Incident Reported,Incident Location,...,Weapon Description,Victim Number,Domestic Related,Victim Type,Victim Description,Victim Gender,Victim Race,Victim Ethnicity,Victim County Resident,Mapped Location
0,20190496213_11,20190496213,D,DISPATCHED,U,UNFOUNDED,Closed,07/01/2019 05:31:00 PM,07/01/2019 06:17:00 PM,2410 2410,...,NONE,1,True,I,INDIVIDUAL (18 AND OVER),M,W,Non-Hispanic,RESIDENT,POINT (-86.696 36.218)
1,20180290646_11,20180290646,D,DISPATCHED,A,CLEARED BY ARREST,Closed,03/31/2018 12:21:00 PM,03/31/2018 07:41:00 PM,5824 5824,...,NONE,1,False,B,BUSINESS,,,,,POINT (-86.713 36.042)
2,20200243837_11,20200243837,D,DISPATCHED,U,UNFOUNDED,Closed,04/06/2020 11:15:00 AM,04/06/2020 01:11:00 PM,3144 3144,...,NONE,1,False,G,GOVERNMENT,,,,,POINT (-86.759 36.232)
3,20170471747_11,20170471747,D,DISPATCHED,U,UNFOUNDED,Closed,05/29/2017 02:33:00 AM,05/29/2017 04:27:00 AM,1714 1714,...,NONE,1,False,I,INDIVIDUAL (18 AND OVER),M,W,Non-Hispanic,RESIDENT,POINT (-86.907 36.086)
4,20171099391_21,20171099391,D,DISPATCHED,A,CLEARED BY ARREST,Closed,12/16/2017 06:30:00 PM,12/16/2017 10:44:00 PM,1101 1101,...,PERSONAL (HANDS),1,True,I,INDIVIDUAL (18 AND OVER),F,B,Non-Hispanic,RESIDENT,POINT (-86.586 36.178)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
865161,20230778865_11,20230778865,T,,U,UNFOUNDED,Closed,12/27/2023 04:00:00 PM,12/30/2023 12:06:00 PM,1708 1708,...,NONE,1,False,I,INDIVIDUAL (18 AND OVER),F,W,Non-Hispanic,RESIDENT,POINT (-86.799 36.118)
865163,20230774687_12,20230774687,S,SUSPECT,U,UNFOUNDED,Closed,12/28/2023 10:37:00 AM,12/28/2023 10:37:00 AM,1201 1201,...,NONE,2,False,I,INDIVIDUAL (18 AND OVER),U,U,Unknown,NON RESIDENT,POINT (-86.733 36.158)
865167,20230453680_11,20230453680,D,DISPATCHED,A,CLEARED BY ARREST,Closed,08/03/2023 01:47:00 AM,08/03/2023 09:15:00 AM,426 426,...,HANDGUN - SEMIAUTOMATIC,1,False,I,INDIVIDUAL (18 AND OVER),M,W,Hispanic,NON RESIDENT,POINT (-86.646 36.16)
865169,20230777152_11,20230777152,D,DISPATCHED,U,UNFOUNDED,Closed,12/29/2023 02:52:00 PM,12/29/2023 02:52:00 PM,301 301,...,NONE,1,False,I,INDIVIDUAL (18 AND OVER),M,B,Non-Hispanic,RESIDENT,POINT (-86.815 36.147)


In [24]:
nash_crime_df_filtered['ZIP Code'].unique()

array(['37214', '37013', '37207', '37205', '37076', '37201', '37208',
       '37219', '37211', '37138', '37216', '37203', '37228', '37209',
       '3721', '37072', '37218', '37206', '37115', '37212', '37221',
       '37217', '37189', '37213', '37204', '37215', '37027', '37167',
       '37135', '37015', '37122', '37086', '37143', '37064', '37232',
       '37066', '37129', '37087', '37075', '', '37185', '37179'],
      dtype=object)

In [25]:
nash_crime_df_filtered.loc[:, 'ZIP Code'] = nash_crime_df_filtered['ZIP Code'].replace('3721', '37210')
nash_crime_df_filtered['ZIP Code'].unique()

array(['37214', '37013', '37207', '37205', '37076', '37201', '37208',
       '37219', '37211', '37138', '37216', '37203', '37228', '37209',
       '37210', '37072', '37218', '37206', '37115', '37212', '37221',
       '37217', '37189', '37213', '37204', '37215', '37027', '37167',
       '37135', '37015', '37122', '37086', '37143', '37064', '37232',
       '37066', '37129', '37087', '37075', '', '37185', '37179'],
      dtype=object)

In [26]:
nash_crime_df_filtered['ZIP Code'].value_counts()

ZIP Code
37207    46979
37013    43267
37211    42507
37115    37916
37203    27536
37210    26131
37208    25850
37206    23151
37217    22838
37209    22506
37214    20437
37076    20269
37201    12684
37218    11942
37216     9908
37221     9794
37212     6525
37138     6150
37204     5825
37205     5135
37219     5109
37215     4169
37072     3177
37228     2840
37213     2806
37189     2611
37027     2141
37135      238
37015      163
37122       62
37086       26
37143       17
37232        6
37075        5
37167        4
37087        4
37064        3
37066        2
37129        2
             1
37185        1
37179        1
Name: count, dtype: int64

In [27]:
nash_crime_df_filtered['Incident Occurred']

0         07/01/2019 05:31:00 PM
1         03/31/2018 12:21:00 PM
2         04/06/2020 11:15:00 AM
3         05/29/2017 02:33:00 AM
4         12/16/2017 06:30:00 PM
                   ...          
865161    12/27/2023 04:00:00 PM
865163    12/28/2023 10:37:00 AM
865167    08/03/2023 01:47:00 AM
865169    12/29/2023 02:52:00 PM
865174    12/28/2023 10:37:00 AM
Name: Incident Occurred, Length: 450738, dtype: object

In [28]:
#convert to datetime
nash_crime_df_filtered.loc[:,'Incident Occurred'] = pd.to_datetime(nash_crime_df_filtered['Incident Occurred'])
nash_crime_df_filtered['Incident Occurred']

  nash_crime_df_filtered.loc[:,'Incident Occurred'] = pd.to_datetime(nash_crime_df_filtered['Incident Occurred'])


0         2019-07-01 17:31:00
1         2018-03-31 12:21:00
2         2020-04-06 11:15:00
3         2017-05-29 02:33:00
4         2017-12-16 18:30:00
                 ...         
865161    2023-12-27 16:00:00
865163    2023-12-28 10:37:00
865167    2023-08-03 01:47:00
865169    2023-12-29 14:52:00
865174    2023-12-28 10:37:00
Name: Incident Occurred, Length: 450738, dtype: object

In [29]:
# Convert 'Incident Occurred' to datetime and creating a new column year
nash_crime_df_filtered['Incident Occurred'] = pd.to_datetime(nash_crime_df_filtered['Incident Occurred'], errors='coerce')
nash_crime_df_filtered.loc[:, 'Year'] = nash_crime_df_filtered['Incident Occurred'].dt.year
nash_crime_df_filtered.head(1)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nash_crime_df_filtered['Incident Occurred'] = pd.to_datetime(nash_crime_df_filtered['Incident Occurred'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nash_crime_df_filtered.loc[:, 'Year'] = nash_crime_df_filtered['Incident Occurred'].dt.year


Unnamed: 0,Primary Key,Incident Number,Report Type,Report Type Description,Incident Status Code,Incident Status Description,Investigation Status,Incident Occurred,Incident Reported,Incident Location,...,Victim Number,Domestic Related,Victim Type,Victim Description,Victim Gender,Victim Race,Victim Ethnicity,Victim County Resident,Mapped Location,Year
0,20190496213_11,20190496213,D,DISPATCHED,U,UNFOUNDED,Closed,2019-07-01 17:31:00,07/01/2019 06:17:00 PM,2410 2410,...,1,True,I,INDIVIDUAL (18 AND OVER),M,W,Non-Hispanic,RESIDENT,POINT (-86.696 36.218),2019


In [30]:
#extracting and creating a month column
nash_crime_df_filtered.loc[:,'Month'] = nash_crime_df_filtered['Incident Occurred'].dt.month
nash_crime_df_filtered.head(1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nash_crime_df_filtered.loc[:,'Month'] = nash_crime_df_filtered['Incident Occurred'].dt.month


Unnamed: 0,Primary Key,Incident Number,Report Type,Report Type Description,Incident Status Code,Incident Status Description,Investigation Status,Incident Occurred,Incident Reported,Incident Location,...,Domestic Related,Victim Type,Victim Description,Victim Gender,Victim Race,Victim Ethnicity,Victim County Resident,Mapped Location,Year,Month
0,20190496213_11,20190496213,D,DISPATCHED,U,UNFOUNDED,Closed,2019-07-01 17:31:00,07/01/2019 06:17:00 PM,2410 2410,...,True,I,INDIVIDUAL (18 AND OVER),M,W,Non-Hispanic,RESIDENT,POINT (-86.696 36.218),2019,7


In [31]:
# nash_crime_df_filtered['Time'] = nash_crime_df_filtered['Incident Occurred'].dt.time
# nash_crime_df_filtered.head(1)

nash_crime_df_filtered.loc[:, 'Time'] = nash_crime_df_filtered['Incident Occurred'].dt.time
nash_crime_df_filtered.head(1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nash_crime_df_filtered.loc[:, 'Time'] = nash_crime_df_filtered['Incident Occurred'].dt.time


Unnamed: 0,Primary Key,Incident Number,Report Type,Report Type Description,Incident Status Code,Incident Status Description,Investigation Status,Incident Occurred,Incident Reported,Incident Location,...,Victim Type,Victim Description,Victim Gender,Victim Race,Victim Ethnicity,Victim County Resident,Mapped Location,Year,Month,Time
0,20190496213_11,20190496213,D,DISPATCHED,U,UNFOUNDED,Closed,2019-07-01 17:31:00,07/01/2019 06:17:00 PM,2410 2410,...,I,INDIVIDUAL (18 AND OVER),M,W,Non-Hispanic,RESIDENT,POINT (-86.696 36.218),2019,7,17:31:00


You would notice i use the .loc which does not give me any warning issues, which i used after i googled it up

In [32]:
crime_count_by_year=nash_crime_df_filtered.groupby('Year').size().reset_index(name='Crime Count')
crime_count_by_year

Unnamed: 0,Year,Crime Count
0,2017,71019
1,2018,69170
2,2019,61968
3,2020,58112
4,2021,58873
5,2022,60605
6,2023,64029
7,2024,6962


There are more crimes in 2017 that any other year, remember, we are still in 2024. 

In [33]:
year_with_max_crime_count = crime_count_by_year.loc[crime_count_by_year['Crime Count'].idxmax(), 'Year']
year_with_max_crime_count

2017

In [34]:
crime_count_by_year = nash_crime_df_filtered.groupby('Month').size().reset_index(name='Crime Count')
avg_crime_count_by_year = crime_count_by_year['Crime Count'].mean().round(2)
avg_crime_count_by_year

37561.5

# drilling down to 2023

In [35]:

twenty_twenty_three = nash_crime_df_filtered.loc[nash_crime_df_filtered['Year'].isin([2023])]
twenty_twenty_three.head()

Unnamed: 0,Primary Key,Incident Number,Report Type,Report Type Description,Incident Status Code,Incident Status Description,Investigation Status,Incident Occurred,Incident Reported,Incident Location,...,Victim Type,Victim Description,Victim Gender,Victim Race,Victim Ethnicity,Victim County Resident,Mapped Location,Year,Month,Time
144,20230321366_11,20230321366,D,DISPATCHED,U,UNFOUNDED,Closed,2023-06-01 18:30:00,06/01/2023 07:40:00 PM,409 409,...,I,INDIVIDUAL (18 AND OVER),M,W,Non-Hispanic,RESIDENT,,2023,6,18:30:00
966,20230770706_11,20230770706,D,DISPATCHED,U,UNFOUNDED,Closed,2023-12-25 15:20:00,12/28/2023 03:29:00 PM,41 41,...,B,BUSINESS,,,,,POINT (-86.835 36.091),2023,12,15:20:00
1120,20230761089_21,20230761089,S,SUSPECT,A,CLEARED BY ARREST,Closed,2023-12-20 17:00:00,12/20/2023 06:39:00 PM,405 405,...,S,SOCIETY,,,,,POINT (-86.799 36.158),2023,12,17:00:00
1435,20230774490_11,20230774490,D,DISPATCHED,U,UNFOUNDED,Closed,2023-12-15 09:55:00,12/28/2023 09:55:00 AM,121 121,...,B,BUSINESS,,,,,POINT (-86.72 36.251),2023,12,09:55:00
1449,20230774845_11,20230774845,D,DISPATCHED,U,UNFOUNDED,Closed,2023-12-28 13:15:00,12/28/2023 02:59:00 PM,5824 5824,...,I,INDIVIDUAL (18 AND OVER),M,B,Non-Hispanic,RESIDENT,POINT (-86.713 36.039),2023,12,13:15:00


In [36]:
twenty_twenty_three_df = twenty_twenty_three.reset_index(drop=True)
twenty_twenty_three_df.head() #64029 rows × 35 columns

Unnamed: 0,Primary Key,Incident Number,Report Type,Report Type Description,Incident Status Code,Incident Status Description,Investigation Status,Incident Occurred,Incident Reported,Incident Location,...,Victim Type,Victim Description,Victim Gender,Victim Race,Victim Ethnicity,Victim County Resident,Mapped Location,Year,Month,Time
0,20230321366_11,20230321366,D,DISPATCHED,U,UNFOUNDED,Closed,2023-06-01 18:30:00,06/01/2023 07:40:00 PM,409 409,...,I,INDIVIDUAL (18 AND OVER),M,W,Non-Hispanic,RESIDENT,,2023,6,18:30:00
1,20230770706_11,20230770706,D,DISPATCHED,U,UNFOUNDED,Closed,2023-12-25 15:20:00,12/28/2023 03:29:00 PM,41 41,...,B,BUSINESS,,,,,POINT (-86.835 36.091),2023,12,15:20:00
2,20230761089_21,20230761089,S,SUSPECT,A,CLEARED BY ARREST,Closed,2023-12-20 17:00:00,12/20/2023 06:39:00 PM,405 405,...,S,SOCIETY,,,,,POINT (-86.799 36.158),2023,12,17:00:00
3,20230774490_11,20230774490,D,DISPATCHED,U,UNFOUNDED,Closed,2023-12-15 09:55:00,12/28/2023 09:55:00 AM,121 121,...,B,BUSINESS,,,,,POINT (-86.72 36.251),2023,12,09:55:00
4,20230774845_11,20230774845,D,DISPATCHED,U,UNFOUNDED,Closed,2023-12-28 13:15:00,12/28/2023 02:59:00 PM,5824 5824,...,I,INDIVIDUAL (18 AND OVER),M,B,Non-Hispanic,RESIDENT,POINT (-86.713 36.039),2023,12,13:15:00


converted 2023 sorted crime to df

In [37]:
crime_count_by_month_in_23=twenty_twenty_three_df.groupby('Month').size().reset_index(name='Crime Count')
crime_count_by_month_in_23

Unnamed: 0,Month,Crime Count
0,1,5189
1,2,4840
2,3,5412
3,4,5356
4,5,5691
5,6,5599
6,7,5648
7,8,5635
8,9,5503
9,10,5425


In [38]:
month_with_max_crime_count_in_23 = crime_count_by_month_in_23.loc[crime_count_by_month_in_23['Crime Count'].idxmax(), 'Month']
month_with_max_crime_count_in_23

5

In [39]:
crime_count_by_month = twenty_twenty_three_df.groupby('Month').size().reset_index(name='Crime Count')
avg_crime_count_by_month = crime_count_by_month_in_23['Crime Count'].mean().round(2)
avg_crime_count_by_month

5335.75

In [40]:
twenty_twenty_three_df.columns

Index(['Primary Key', 'Incident Number', 'Report Type',
       'Report Type Description', 'Incident Status Code',
       'Incident Status Description', 'Investigation Status',
       'Incident Occurred', 'Incident Reported', 'Incident Location',
       'Latitude', 'Longitude', 'ZIP Code', 'RPA', 'Zone', 'Location Code',
       'Location Description', 'Offense Number', 'Offense NIBRS',
       'Offense Description', 'Weapon Primary', 'Weapon Description',
       'Victim Number', 'Domestic Related', 'Victim Type',
       'Victim Description', 'Victim Gender', 'Victim Race',
       'Victim Ethnicity', 'Victim County Resident', 'Mapped Location', 'Year',
       'Month', 'Time'],
      dtype='object')

In [41]:
twenty_twenty_three_df['Weapon Description'].unique()

array(['HANDGUN', 'NONE', 'PERSONAL (HANDS)', 'CLUB',
       'LETHAL/CUTTING INSTRUMENT', 'Unarmed', 'OTHER FIREARM', nan,
       'OTHER', 'HANDGUN - SEMIAUTOMATIC', 'SHOTGUN', 'REVOLVER',
       'MOTOR VEHICLE', 'RIFLE', 'DRUGS', 'FIRE/INCENDIARY DEVICES',
       'ASPHYXIATION', 'EXPLOSIVES', 'POISON'], dtype=object)

In [42]:
# twenty_twenty_three_df = twenty_twenty_three_df[~twenty_twenty_three_df['ZIP Code'].isin(['23236', '30318','39202','27216','38562','60123','91107','29501', '17202','0','90069','27707', '30139'])]
# twenty_twenty_three_df['ZIP Code'].unique()

In [43]:
twenty_twenty_three_df['Offense Description'].unique()

array(['POLICE INQUIRY', 'WEAPON OFFENSE, CRIMINAL ATTEMPT',
       'SIMPLE ASSLT', 'BURGLARY - AGGRAVATED', 'MARIJUANA - POSSESS',
       'RECOVERY, STOLEN PROPERTY', 'FOUND PROPERTY',
       'POSSESSION OF A CONTROLLED SUBSTANCE',
       'INTENTIONAL AGGRAVATED ASSAULT', 'BURGLARY- MOTOR VEHICLE',
       'KIDNAPPING, CRIMINAL ATTEMPT',
       'DRUG FREE SCH.ZONE-CONT.SUB.-SCH. IV', 'DEATH NATURAL', 'ASSAULT',
       'TRANSPORT', 'DRUG PARAPHERNALIA- UNLAWFUL USE', 'FAMILY OFFENSE',
       'MARIJUANA - SELL', 'LOST PROPERTY',
       'DEATH UNNATURAL...(ACCIDENTAL)', 'CIVIL CASE',
       'POSS. W/INT-CONT.SUB. - FENTANYL .5 GR OR MORE', 'OVERDOSE',
       'CRIMINAL TRESPASS', 'ASSAULT- FEAR OF BODILY INJURY',
       'Assault, Aggravated - Deadly Weapon - Int/Kn',
       'DAMAGE PROP - PRIVATE', 'DRUGS, COCAINE, POSSESS',
       'CONTEMPT - VIOLATION OF PROTECTION ORDER',
       'CRIMINAL CONTEMPT-VIOLATION OF ORDER OF PROTECTION',
       'CRIMINAL IMPERSONATION', 'VANDALISM- $1,000 OR 

In [44]:
twenty_twenty_three_df['Offense Description'].value_counts().head(20)

Offense Description
POLICE INQUIRY                                             22698
FOUND PROPERTY                                              4592
LOST PROPERTY                                               4573
SIMPLE ASSLT                                                4118
RECOVERY, STOLEN PROPERTY                                   3779
TRANSPORT                                                   2624
ASSAULT- FEAR OF BODILY INJURY                              1742
DRUG PARAPHERNALIA- UNLAWFUL USE                            1616
ASSAULT- OFFENSIVE OR PROVOCATIVE CONTACT                   1491
INTENTIONAL AGGRAVATED ASSAULT                              1384
WEAPON OFFENSE, CRIMINAL ATTEMPT                            1373
DAMAGE PROP - PRIVATE                                        942
VEHICLE THEFT                                                831
MARIJUANA - POSSESS                                          625
DEATH NATURAL                                                543
SHOPL

I am going for the top three crime count, after that i will drug paraphernalia and vehicle theft

# Analysis by Offense Description, 'Lost Property' and 'Location Description'

In [45]:
Lost_Property =twenty_twenty_three_df.loc[twenty_twenty_three_df['Offense Description'].isin(['LOST PROPERTY'])]
Lost_Property.head(2)

Unnamed: 0,Primary Key,Incident Number,Report Type,Report Type Description,Incident Status Code,Incident Status Description,Investigation Status,Incident Occurred,Incident Reported,Incident Location,...,Victim Type,Victim Description,Victim Gender,Victim Race,Victim Ethnicity,Victim County Resident,Mapped Location,Year,Month,Time
46,20230777187_11,20230777187,T,,U,UNFOUNDED,Closed,2023-10-03 12:00:00,12/29/2023 03:20:00 PM,2345 2345,...,I,INDIVIDUAL (18 AND OVER),F,B,Non-Hispanic,RESIDENT,POINT (-86.648 36.067),2023,10,12:00:00
85,20230326820_11,20230326820,D,DISPATCHED,U,UNFOUNDED,Closed,2023-06-04 15:58:00,06/04/2023 03:58:00 PM,GALLATIN PKE & E TRINITY LN,...,I,INDIVIDUAL (18 AND OVER),M,W,Non-Hispanic,RESIDENT,POINT (-86.738 36.205),2023,6,15:58:00


The concept of lost property crime in Nashville, Tennessee involves situations where individuals come into possession of property that they know or should know is lost, mislaid, or misdelivered. Under Tennessee law, the offense of theft includes various categories such as petty theft (misdemeanor theft) for items under 500 and grand theft (felony theft) for larger amounts over 500. Additionally, receiving stolen property is considered a crime when an individual accepts property that they know to be stolen with the intent to sell or use it as their own. see https://nashvillelawoffices.com/criminal-law/theft/

I am going for the top three crime count

In [47]:
# Lost_Property_copy = Lost_Property.copy()
# Lost_Property_copy['ZIP Code'] = Lost_Property_copy['ZIP Code'].astype(str).str.rstrip('.')

# Lost_Property.loc[:, 'ZIP Code'] = Lost_Property_copy['ZIP Code']

In [48]:
Lost_Property['ZIP Code'].unique()

array(['37013', '37216', '37217', '37211', '37201', '37203', '37219',
       '37209', '37208', '37115', '37205', '37214', '37210', '37189',
       '37206', '37076', '37221', '37207', '37228', '37215', '37218',
       '37204', '37027', '37212', '37072', '37138', '37213', '37135',
       '37015', '37179', '37086', '37122'], dtype=object)

i update the original DataFrame using .loc[row_indexer, col_indexer] = value

In [49]:
zip_Lost_Property_count=Lost_Property['ZIP Code'].value_counts()
zip_Lost_Property_count.head(15)

ZIP Code
37013    476
37203    438
37211    385
37201    381
37115    292
37210    262
37217    256
37214    246
37207    233
37209    199
37076    160
37206    133
37219    126
37208    126
37221    111
Name: count, dtype: int64

"The median household income of 47,455 is slightly less than average compared to the rest of the country." 37013, Antioch area has more crime count than any other Zip code in Nashville, than 37221, which has the lowest count Bellevue area that with "The median household income of $68,473 is high compared to the rest of the country. It is also slightly higher than average compared to nearby ZIP codes. So 37221 is likely to be one of the nicer parts of town with a more affluent demographic."

In [50]:
location_count=Lost_Property['Location Description'].value_counts()
location_count.head(19)

Location Description
RESIDENCE, HOME                       1308
HIGHWAY, ROAD, ALLEY                   802
PARKING LOT, GARAGE                    443
BAR, NIGHT CLUB                        306
School-Elementary/Secondary            239
Auto Dealership New/Used               175
APARTMENT                              133
HOTEL, MOTEL, ETC.                     129
COMMERCIAL, OFFICE BUILDING            123
GOVERNMENT, PUBLIC BUILDING            112
RESTAURANT                             100
AIR, BUS, TRAIN TERMINAL                92
SPECIALTY STORE                         78
CONVENIENCE STORE                       65
DEPARTMENT, DISCOUNT STORE              55
GROCERY, SUPERMARKET                    52
OTHER, UNKNOWN                          51
SERVICE, GAS STATION                    41
Arena/Stadium/Fairgrounds/Coliseum      41
Name: count, dtype: int64

In [51]:
weapon_Lost_Property=Lost_Property['Weapon Description'].value_counts()
weapon_Lost_Property

Weapon Description
NONE                       3698
Unarmed                     217
PERSONAL (HANDS)            121
HANDGUN                       7
OTHER                         5
HANDGUN - SEMIAUTOMATIC       1
Name: count, dtype: int64

it is surprising to see handgun-semiautomatic as a weapon used to commit this type of crime

In [52]:
time_Lost_Property=Lost_Property['Time'].value_counts()
time_Lost_Property

Time
12:00:00    477
08:00:00    472
00:00:00    283
09:00:00    180
10:00:00    157
           ... 
11:22:00      1
16:41:00      1
08:40:00      1
23:55:00      1
19:37:00      1
Name: count, Length: 710, dtype: int64

There are more crimes happening at 12 pm than 19:37:00 that has the lowest number of crime count. Also, more reported crime of lost properties happen at RESIDENCE, HOME with 13336 than public areas like Arena/Stadium/Fairgrounds/Coliseum andSERVICE, GAS STATION that has 41 counts

# Analysis by offense description', 'Simple assault' and 'Location Description


In [53]:
Simple_assault =twenty_twenty_three_df.loc[twenty_twenty_three_df['Offense Description'].isin(['SIMPLE ASSLT'])]
Simple_assault.head(3)

Unnamed: 0,Primary Key,Incident Number,Report Type,Report Type Description,Incident Status Code,Incident Status Description,Investigation Status,Incident Occurred,Incident Reported,Incident Location,...,Victim Type,Victim Description,Victim Gender,Victim Race,Victim Ethnicity,Victim County Resident,Mapped Location,Year,Month,Time
5,20230735954_11,20230735954,D,DISPATCHED,A,CLEARED BY ARREST,Closed,2023-12-08 22:57:00,12/09/2023 12:07:00 AM,838 838,...,I,INDIVIDUAL (18 AND OVER),F,U,Non-Hispanic,RESIDENT,POINT (-86.761 36.15),2023,12,22:57:00
44,20230778134_11,20230778134,D,DISPATCHED,A,CLEARED BY ARREST,Closed,2023-12-29 23:20:00,12/30/2023 12:31:00 AM,306 306,...,I,INDIVIDUAL (18 AND OVER),M,W,Hispanic,NON RESIDENT,POINT (-86.776 36.162),2023,12,23:20:00
62,20230767246_11,20230767246,D,DISPATCHED,R,REFUSED TO COOPERATE,Closed,2023-12-23 14:25:00,12/23/2023 05:11:00 PM,706 706,...,I,INDIVIDUAL (18 AND OVER),F,B,Non-Hispanic,RESIDENT,POINT (-86.755 36.218),2023,12,14:25:00


In Nashville, Tennessee, simple assault is considered a Class A misdemeanor under the state's criminal laws. Simple assault is defined as intentionally, knowingly, or recklessly causing bodily injury to another person or causing a person to reasonably fear imminent bodily injury. Additionally, physical contact that a reasonable person would find extremely provocative or offensive can also constitute simple assault https://www.tncriminaldefensefirm.com/practice-areas/assault/simple-assault/

In [54]:
victim_Simple_assault_count=Simple_assault['Victim Description'].value_counts()
victim_Simple_assault_count.head(15)

Victim Description
INDIVIDUAL (18 AND OVER)    4093
POLICE OFFICER                21
UNKNOWN                        3
OTHER                          1
Name: count, dtype: int64

it appears that police officers were the second highest number of those who were assaulted in Nashville

In [55]:
zip_Simple_assault_count=Simple_assault['ZIP Code'].value_counts()
zip_Simple_assault_count.head(15)

ZIP Code
37013    461
37211    417
37207    405
37115    331
37217    279
37208    241
37203    230
37206    227
37210    221
37076    217
37214    157
37209    157
37201    130
37218     97
37221     89
Name: count, dtype: int64

Antioch, TN (View All Cities) The median household income of $47,455 is slightly less than average compared to the rest of the country. It is also slightly less than average compared to nearby ZIP codes. While money isn't everything, residents in ZIP code 37013 earn less than in other parts of town. The people living in ZIP code 37013 are primarily white. The number of people in their late 20s to early 40s is extremely large while the number of seniors is small. There are also a small number of families and a large number of single parents. https://www.unitedstateszipcodes.org/37013/

In [56]:
Assault_location_count=Simple_assault['Location Description'].value_counts()
Assault_location_count.head(19)

Location Description
RESIDENCE, HOME                       1826
APARTMENT                             1112
PARKING LOT, GARAGE                    322
HIGHWAY, ROAD, ALLEY                   303
HOTEL, MOTEL, ETC.                     136
BAR, NIGHT CLUB                         87
RESTAURANT                              47
School-Elementary/Secondary             31
CONVENIENCE STORE                       26
AIR, BUS, TRAIN TERMINAL                25
SERVICE, GAS STATION                    25
DEPARTMENT, DISCOUNT STORE              18
SPECIALTY STORE                         17
Arena/Stadium/Fairgrounds/Coliseum      14
COMMERCIAL, OFFICE BUILDING             14
PARK                                    13
HOSPITAL                                12
GROCERY, SUPERMARKET                    10
Shelter-Mission/Homeless                 9
Name: count, dtype: int64

In [57]:
weapon_Simple_assault=Simple_assault['Weapon Description'].value_counts()
weapon_Simple_assault

Weapon Description
PERSONAL (HANDS)    3774
OTHER                280
NONE                  50
Unarmed                6
CLUB                   2
MOTOR VEHICLE          1
Name: count, dtype: int64

For this analysis, personal hands were the highest kind of weapon used and motor vehicle as a weapon has one count, and club has one count

In [58]:
time_Simple_assault=Simple_assault['Time'].value_counts()
time_Simple_assault

Time
12:00:00    90
22:00:00    87
21:00:00    81
18:00:00    76
23:00:00    73
            ..
05:05:00     1
11:38:00     1
01:35:00     1
13:52:00     1
14:41:00     1
Name: count, Length: 965, dtype: int64

12 pm top the count again, same with Lost_Property

# Analysis by Offense Description, 'VEHICLE THEFT ' and 'Location Description'

In [59]:
vehicle_theft =twenty_twenty_three_df.loc[twenty_twenty_three_df['Offense Description'].isin(['VEHICLE THEFT'])]
vehicle_theft.head(3)

Unnamed: 0,Primary Key,Incident Number,Report Type,Report Type Description,Incident Status Code,Incident Status Description,Investigation Status,Incident Occurred,Incident Reported,Incident Location,...,Victim Type,Victim Description,Victim Gender,Victim Race,Victim Ethnicity,Victim County Resident,Mapped Location,Year,Month,Time
123,20230315610_11,20230315610,D,DISPATCHED,A,CLEARED BY ARREST,Closed,2023-05-30 12:27:00,05/30/2023 01:09:00 PM,2910 2910,...,I,INDIVIDUAL (18 AND OVER),M,B,Non-Hispanic,RESIDENT,POINT (-86.82 36.155),2023,5,12:27:00
214,20230323512_11,20230323512,D,DISPATCHED,A,CLEARED BY ARREST,Closed,2023-06-02 14:00:00,06/02/2023 08:32:00 PM,LEAFLAND & DAYTON,...,I,INDIVIDUAL (18 AND OVER),M,W,Non-Hispanic,NON RESIDENT,POINT (-86.745 36.127),2023,6,14:00:00
403,20240001062_11,20240001062,D,DISPATCHED,U,UNFOUNDED,Closed,2023-12-31 22:00:00,01/01/2024 05:47:00 PM,908 908,...,I,INDIVIDUAL (18 AND OVER),M,W,Non-Hispanic,RESIDENT,POINT (-86.641 36.054),2023,12,22:00:00


In [60]:
victim_vehicle_theft_count=vehicle_theft['Victim Description'].value_counts()
victim_vehicle_theft_count.head(15)

Victim Description
INDIVIDUAL (18 AND OVER)    761
BUSINESS                     64
GOVERNMENT                    6
Name: count, dtype: int64

it is interesting here as Businesses are the second highest to report vehicle theft in Nashville

In [61]:
zip_vehicle_theft_count=vehicle_theft['ZIP Code'].value_counts()
zip_vehicle_theft_count.head(15)

ZIP Code
37013    118
37211    100
37207     96
37214     56
37217     51
37115     50
37203     46
37206     36
37210     35
37076     34
37209     32
37208     31
37218     20
37201     17
37204     16
Name: count, dtype: int64

37013 zip topped the list again as it did in simple assault

In [62]:
location_vehicle_theft_count=vehicle_theft['Location Description'].value_counts()
location_vehicle_theft_count.head(19)

Location Description
PARKING LOT, GARAGE            472
RESIDENCE, HOME                153
HIGHWAY, ROAD, ALLEY           105
APARTMENT                       26
HOTEL, MOTEL, ETC.              13
Auto Dealership New/Used        13
SERVICE, GAS STATION            11
CONVENIENCE STORE                7
OTHER, UNKNOWN                   4
GROCERY, SUPERMARKET             3
SPECIALTY STORE                  3
HOSPITAL                         3
CHURCH, SYNAGOGUE, TEMPLE        3
BAR, NIGHT CLUB                  2
RESTAURANT                       2
Shelter-Mission/Homeless         2
COMMERCIAL, OFFICE BUILDING      2
Carport                          1
RENTAL STORAGE FACILITY          1
Name: count, dtype: int64

parking lot came up as highest count followed by residence and home in vehicle theft while rental storage the least. This provides insight as per where owners of parking lot/garage should invest more money on signs to remind people to lock their cars before leaving or invest security cameras in these places. It is also surprising that there is vehicle theft in CHURCH, SYNAGOGUE, TEMPLE, places of worship. 

In [63]:
time_vehicle_theft=vehicle_theft['Time'].value_counts()
time_vehicle_theft

Time
23:00:00    34
21:00:00    30
12:00:00    30
22:00:00    29
19:00:00    27
            ..
20:36:00     1
04:55:00     1
17:20:00     1
03:14:00     1
11:57:00     1
Name: count, Length: 256, dtype: int64

this makes a little sense that vehicle theft occurs mostly at night, where people are sleeping. 

In [64]:
weapon_vehicle_theft=vehicle_theft['Weapon Description'].value_counts()
weapon_vehicle_theft

Weapon Description
NONE                         579
Unarmed                      171
PERSONAL (HANDS)              37
CLUB                           9
OTHER                          7
HANDGUN - SEMIAUTOMATIC        3
HANDGUN                        2
LETHAL/CUTTING INSTRUMENT      2
Name: count, dtype: int64

this counts shows that most of the time, there were no weapon involved, maybe the owner comes out and finds that their vehicle is missing, which is followed my when the they are unarmed

# Analysis, DRUG PARAPHERNALIA- UNLAWFUL USE

In [65]:
drug_parphernalia =twenty_twenty_three_df.loc[twenty_twenty_three_df['Offense Description'].isin(['DRUG PARAPHERNALIA- UNLAWFUL USE'])]
drug_parphernalia.head(3)

Unnamed: 0,Primary Key,Incident Number,Report Type,Report Type Description,Incident Status Code,Incident Status Description,Investigation Status,Incident Occurred,Incident Reported,Incident Location,...,Victim Type,Victim Description,Victim Gender,Victim Race,Victim Ethnicity,Victim County Resident,Mapped Location,Year,Month,Time
34,20230777129_22,20230777129,D,DISPATCHED,A,CLEARED BY ARREST,Closed,2023-12-29 13:00:00,12/29/2023 06:57:00 PM,1808 1808,...,S,SOCIETY,,,,,POINT (-86.809 36.18),2023,12,13:00:00
60,20230777980_11,20230777980,S,SUSPECT,A,CLEARED BY ARREST,Closed,2023-12-29 23:57:00,12/29/2023 11:57:00 PM,614 614,...,S,SOCIETY,,,,,POINT (-86.776 36.153),2023,12,23:57:00
121,20230326601_11,20230326601,D,DISPATCHED,A,CLEARED BY ARREST,Closed,2023-06-04 12:25:00,06/04/2023 01:34:00 PM,1504 1504,...,S,SOCIETY,,,,,POINT (-86.779 36.207),2023,6,12:25:00


The unlawful use of drug paraphernalia in Nashville, Tennessee involves various activities related to controlled substances. Tennessee law prohibits the use or possession with intent to use drug paraphernalia for activities like planting, cultivating, manufacturing, or introducing controlled substances into the human body, unless authorized by specific regulationsViolations of these regulations are classified as Class A misdemeanors or Class E felonies, depending on the nature of the offense and the individual's age. Furthermore, possessing drug paraphernalia in Tennessee can lead to legal consequences such as a Class A misdemeanor charge, which carries penalties of up to 11 months and 29 days in jail along with fines. The possession of marijuana paraphernalia specifically is a common criminal charge in Tennessee, with penalties including jail time and fines based on the circumstances and prior offenses. see https://nashvilleattorneynow.com/possession-of-drug-paraphernalia-tn/

In [66]:
victim_drug_parphernalia_count=drug_parphernalia['Victim Description'].value_counts()
victim_drug_parphernalia_count.head()

Victim Description
SOCIETY    1616
Name: count, dtype: int64

no much information

In [67]:
zip_drug_parphernalia_count=drug_parphernalia['ZIP Code'].value_counts()
zip_drug_parphernalia_count.head(15)

ZIP Code
37207    220
37203    170
37211    143
37210    136
37201    125
37217    114
37208    108
37013     98
37115     95
37214     84
37076     52
37206     45
37209     44
37216     33
37218     31
Name: count, dtype: int64

Unfortunately, i have to do my analysis, "The people living in ZIP code 37207 are primarily black or African American. The number of people in their late 20s to early 40s is extremely large while the number of middle aged adults is large. There are also an extremely large number of single parents and an extremely small number of families. The percentage of children under 18 living in the 37207 ZIP code is large compared to other areas of the country." https://www.unitedstateszipcodes.org/37207/

In [68]:
location_drug_parphernalia_count=drug_parphernalia['Location Description'].value_counts()
location_drug_parphernalia_count.head(19)

Location Description
HIGHWAY, ROAD, ALLEY           671
PARKING LOT, GARAGE            463
RESIDENCE, HOME                110
APARTMENT                       50
SERVICE, GAS STATION            44
HOTEL, MOTEL, ETC.              41
JAIL, PRISON                    30
PARK                            29
CONVENIENCE STORE               24
DEPARTMENT, DISCOUNT STORE      24
AIR, BUS, TRAIN TERMINAL        14
School-Elementary/Secondary     14
Shelter-Mission/Homeless        13
RESTAURANT                       9
COMMERCIAL, OFFICE BUILDING      9
GOVERNMENT, PUBLIC BUILDING      8
GROCERY, SUPERMARKET             7
SPECIALTY STORE                  7
CONSTRUCTION SITE                5
Name: count, dtype: int64

highway has the highest count for drug paraphernalia and construction site has one of the least count

In [69]:
time_drug_parphernalia=drug_parphernalia['Time'].value_counts()
time_drug_parphernalia

Time
16:00:00    30
12:00:00    29
18:00:00    25
13:00:00    25
15:00:00    24
            ..
05:11:00     1
09:06:00     1
09:42:00     1
03:47:00     1
08:58:00     1
Name: count, Length: 748, dtype: int64

This seems reasonable that people do drugs or are caught with the possession of drugs in the day time

# Back to the whole data that includes the whole year

In [70]:
crime_count_by_year = nash_crime_df_filtered.groupby('Month').size().reset_index(name='Crime Count')
avg_crime_count_by_year = crime_count_by_year['Crime Count'].mean().round(2)
avg_crime_count_by_year

37561.5

In [71]:
nash_crime_df_filtered['Month'].dtypes

dtype('int32')

In [72]:
import calendar

nash_crime_df_filtered['Month'] = nash_crime_df_filtered['Month'].apply(lambda x: calendar.month_name[x])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nash_crime_df_filtered['Month'] = nash_crime_df_filtered['Month'].apply(lambda x: calendar.month_name[x])


Convert month numbers to month names. i am using the  apply() method to apply a lambda function to each element in the 'Month' column. 

In [73]:
crime_count_by_month=nash_crime_df_filtered.groupby('Month').size().reset_index(name='Crime Count')
crime_count_by_month

Unnamed: 0,Month,Crime Count
0,April,37356
1,August,38854
2,December,34203
3,February,35915
4,January,41796
5,July,38818
6,June,38040
7,March,37075
8,May,39957
9,November,34260


In [74]:
month_with_max_crime_count = crime_count_by_month.loc[crime_count_by_month['Crime Count'].idxmax(), 'Month']
month_with_max_crime_count

'January'

In [75]:
crime_count_by_month = nash_crime_df_filtered.groupby('Month').size().reset_index(name='Crime Count')
avg_crime_count_by_month = crime_count_by_month['Crime Count'].mean().round(2)

avg_crime_count_by_month

37561.5

In [76]:
crime_count_by_time=nash_crime_df_filtered.groupby('Time').size().reset_index(name='Crime Count')
crime_count_by_time

Unnamed: 0,Time,Crime Count
0,00:00:00,7853
1,00:01:00,1292
2,00:02:00,182
3,00:03:00,175
4,00:04:00,165
...,...,...
1435,23:55:00,252
1436,23:56:00,149
1437,23:57:00,135
1438,23:58:00,144


In [83]:
twenty_twenty_three_df.to_csv('new_Nashville_crime.csv', index=False)

In [84]:
twenty_twenty_three_df

Unnamed: 0,Primary Key,Incident Number,Report Type,Report Type Description,Incident Status Code,Incident Status Description,Investigation Status,Incident Occurred,Incident Reported,Incident Location,...,Victim Type,Victim Description,Victim Gender,Victim Race,Victim Ethnicity,Victim County Resident,Mapped Location,Year,Month,Time
0,20230321366_11,20230321366,D,DISPATCHED,U,UNFOUNDED,Closed,2023-06-01 18:30:00,06/01/2023 07:40:00 PM,409 409,...,I,INDIVIDUAL (18 AND OVER),M,W,Non-Hispanic,RESIDENT,,2023,6,18:30:00
1,20230770706_11,20230770706,D,DISPATCHED,U,UNFOUNDED,Closed,2023-12-25 15:20:00,12/28/2023 03:29:00 PM,41 41,...,B,BUSINESS,,,,,POINT (-86.835 36.091),2023,12,15:20:00
2,20230761089_21,20230761089,S,SUSPECT,A,CLEARED BY ARREST,Closed,2023-12-20 17:00:00,12/20/2023 06:39:00 PM,405 405,...,S,SOCIETY,,,,,POINT (-86.799 36.158),2023,12,17:00:00
3,20230774490_11,20230774490,D,DISPATCHED,U,UNFOUNDED,Closed,2023-12-15 09:55:00,12/28/2023 09:55:00 AM,121 121,...,B,BUSINESS,,,,,POINT (-86.72 36.251),2023,12,09:55:00
4,20230774845_11,20230774845,D,DISPATCHED,U,UNFOUNDED,Closed,2023-12-28 13:15:00,12/28/2023 02:59:00 PM,5824 5824,...,I,INDIVIDUAL (18 AND OVER),M,B,Non-Hispanic,RESIDENT,POINT (-86.713 36.039),2023,12,13:15:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64024,20230778865_11,20230778865,T,,U,UNFOUNDED,Closed,2023-12-27 16:00:00,12/30/2023 12:06:00 PM,1708 1708,...,I,INDIVIDUAL (18 AND OVER),F,W,Non-Hispanic,RESIDENT,POINT (-86.799 36.118),2023,12,16:00:00
64025,20230774687_12,20230774687,S,SUSPECT,U,UNFOUNDED,Closed,2023-12-28 10:37:00,12/28/2023 10:37:00 AM,1201 1201,...,I,INDIVIDUAL (18 AND OVER),U,U,Unknown,NON RESIDENT,POINT (-86.733 36.158),2023,12,10:37:00
64026,20230453680_11,20230453680,D,DISPATCHED,A,CLEARED BY ARREST,Closed,2023-08-03 01:47:00,08/03/2023 09:15:00 AM,426 426,...,I,INDIVIDUAL (18 AND OVER),M,W,Hispanic,NON RESIDENT,POINT (-86.646 36.16),2023,8,01:47:00
64027,20230777152_11,20230777152,D,DISPATCHED,U,UNFOUNDED,Closed,2023-12-29 14:52:00,12/29/2023 02:52:00 PM,301 301,...,I,INDIVIDUAL (18 AND OVER),M,B,Non-Hispanic,RESIDENT,POINT (-86.815 36.147),2023,12,14:52:00
