# Crimes in Italy 2019 - 2023

### Website: [Istat](http://dati.istat.it/Index.aspx)
### Data source: [Link](http://dati.istat.it/Index.aspx?QueryId=25097&lang=en#)
### Wikipedia: [Comuni d'Italia per popolazione](https://it.wikipedia.org/wiki/Comuni_d%27Italia_per_popolazione)

In [1]:
from IPython.display import Image
from IPython.core.display import HTML 
Image(url="https://shorturl.at/HHicV")

*Photo credits: Thoughtco. https://www.thoughtco.com/types-of-crimes-3026270

In [1]:
# Importing libraries

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.simplefilter(action="ignore",category=FutureWarning)
import datetime as dt
pd.options.mode.chained_assignment = None

In [2]:
# Importing Data

filepath="DCCV_DELITTIPS_27112024221041352.csv"
df=pd.read_csv(filepath)

### 1. Exploring Data

In [3]:
df.head()

Unnamed: 0,ITTER107,Territory,TIPO_DATO35,Data type,REATI_PS,Type of crime,RIF_TIME,Reference period of crime,TIME,Select time,Value,Flag Codes,Flags
0,ITC16,Cuneo,CRIMET,crimes reported by the police forces to the ju...,CULPINJU,culpable injuries,YRDUR,during the reference year,2019,2019,82.5,,
1,ITC16,Cuneo,CRIMET,crimes reported by the police forces to the ju...,CULPINJU,culpable injuries,YRDUR,during the reference year,2020,2020,60.4,,
2,ITC16,Cuneo,CRIMET,crimes reported by the police forces to the ju...,CULPINJU,culpable injuries,YRDUR,during the reference year,2021,2021,70.1,,
3,ITC16,Cuneo,CRIMET,crimes reported by the police forces to the ju...,CULPINJU,culpable injuries,YRDUR,during the reference year,2022,2022,75.3,,
4,ITC16,Cuneo,CRIMET,crimes reported by the police forces to the ju...,CULPINJU,culpable injuries,YRDUR,during the reference year,2023,2023,77.9,,


In [4]:
df.shape
f"{df.shape[0]} Rows by {df.shape[1]} Columns"

'25145 Rows by 13 Columns'

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25145 entries, 0 to 25144
Data columns (total 13 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   ITTER107                   25145 non-null  object 
 1   Territory                  25145 non-null  object 
 2   TIPO_DATO35                25145 non-null  object 
 3   Data type                  25145 non-null  object 
 4   REATI_PS                   25145 non-null  object 
 5   Type of crime              25145 non-null  object 
 6   RIF_TIME                   25145 non-null  object 
 7   Reference period of crime  25145 non-null  object 
 8   TIME                       25145 non-null  int64  
 9   Select time                25145 non-null  int64  
 10  Value                      25108 non-null  float64
 11  Flag Codes                 37 non-null     float64
 12  Flags                      37 non-null     object 
dtypes: float64(2), int64(2), object(9)
memory usag

### 2. Cleaning up the Data!

In [6]:
df.head(3)

Unnamed: 0,ITTER107,Territory,TIPO_DATO35,Data type,REATI_PS,Type of crime,RIF_TIME,Reference period of crime,TIME,Select time,Value,Flag Codes,Flags
0,ITC16,Cuneo,CRIMET,crimes reported by the police forces to the ju...,CULPINJU,culpable injuries,YRDUR,during the reference year,2019,2019,82.5,,
1,ITC16,Cuneo,CRIMET,crimes reported by the police forces to the ju...,CULPINJU,culpable injuries,YRDUR,during the reference year,2020,2020,60.4,,
2,ITC16,Cuneo,CRIMET,crimes reported by the police forces to the ju...,CULPINJU,culpable injuries,YRDUR,during the reference year,2021,2021,70.1,,


In [7]:
df.drop(["ITTER107","TIPO_DATO35","TIPO_DATO35","Data type","REATI_PS","RIF_TIME","Reference period of crime","Select time","Flag Codes","Flags"],axis=1,inplace=True)

In [8]:
df.rename(columns=({"TIME":"Year"}),inplace=True)

In [9]:
df.head(20)

Unnamed: 0,Territory,Type of crime,Year,Value
0,Cuneo,culpable injuries,2019,82.5
1,Cuneo,culpable injuries,2020,60.4
2,Cuneo,culpable injuries,2021,70.1
3,Cuneo,culpable injuries,2022,75.3
4,Cuneo,culpable injuries,2023,77.9
5,Cuneo,kidnappings,2019,0.2
6,Cuneo,kidnappings,2020,1.7
7,Cuneo,kidnappings,2021,0.2
8,Cuneo,kidnappings,2022,0.2
9,Cuneo,kidnappings,2023,1.0


In [10]:
df.rename(columns=({"Value":"Total"}),inplace=True)

In [11]:
df

Unnamed: 0,Territory,Type of crime,Year,Total
0,Cuneo,culpable injuries,2019,82.5
1,Cuneo,culpable injuries,2020,60.4
2,Cuneo,culpable injuries,2021,70.1
3,Cuneo,culpable injuries,2022,75.3
4,Cuneo,culpable injuries,2023,77.9
...,...,...,...,...
25140,Oristano,burglary,2019,86.7
25141,Oristano,burglary,2020,82.8
25142,Oristano,burglary,2021,52.2
25143,Oristano,burglary,2022,68.5


In [12]:
df["Territory"].unique()

array(['Cuneo', 'Asti', 'Alessandria', "Valle d'Aosta / Vallée d'Aoste",
       'Imperia', 'Savona', 'Genova', 'Ragusa', 'Trento', 'Rieti',
       'Napoli', 'Trieste', 'Lodi', 'Cosenza', 'Fermo', 'Foggia',
       'Treviso', 'Venezia', 'Catania', 'Matera', 'Perugia', 'Pistoia',
       'Lucca', 'Forlì-Cesena', "L'Aquila", 'Modena', 'Pavia', 'Cagliari',
       'Isernia', 'Bologna', 'Firenze', 'Monza e della Brianza',
       'Crotone', 'Bari', 'Siracusa', 'Frosinone', 'Pescara', 'Roma',
       'Prato', 'Terni', 'Agrigento', 'Ascoli Piceno', 'Milano',
       'Macerata', 'Como', 'Ancona', 'Vicenza', 'Caserta', 'Viterbo',
       'Verbano-Cusio-Ossola', 'Teramo', 'Biella', 'Latina', 'Vercelli',
       'Padova', 'Gorizia', 'Ferrara', 'Udine', 'Bolzano / Bozen',
       'Rimini', 'Pisa', 'Catanzaro', 'Enna', 'Siena', 'Nuoro', 'Taranto',
       'Salerno', 'Vibo Valentia', 'Belluno', "Reggio nell'Emilia",
       'Brindisi', 'Barletta-Andria-Trani', 'Benevento', 'Piacenza',
       'Massa-Carrara', '

## Web Scrapping to extract Italian regions and cities

In [13]:
url="https://it.wikipedia.org/wiki/Comuni_d%27Italia_per_popolazione"

In [14]:
italian_cities= pd.read_html(url)[0]
italian_cities

Unnamed: 0,N°,Comune,Regione,Provincia / Città metropolitana,Abitanti
0,1,Roma,Lazio,Roma,2 754 719
1,2,Milano,Lombardia,Milano,1 371 850
2,3,Napoli,Campania,Napoli,911 697
3,4,Torino,Piemonte,Torino,846 926
4,5,Palermo,Sicilia,Palermo,628 894
...,...,...,...,...,...
131,132,Civitavecchia,Lazio,Roma,51 750
132,133,Teramo,Abruzzo,Teramo,51 526
133,134,Rho,Lombardia,Milano,50 847
134,135,Acireale,Sicilia,Catania,50 590


In [15]:
italian_cities.rename(columns=({"Comune":"City","Regione":"Region"}),inplace=True)
italian_cities

Unnamed: 0,N°,City,Region,Provincia / Città metropolitana,Abitanti
0,1,Roma,Lazio,Roma,2 754 719
1,2,Milano,Lombardia,Milano,1 371 850
2,3,Napoli,Campania,Napoli,911 697
3,4,Torino,Piemonte,Torino,846 926
4,5,Palermo,Sicilia,Palermo,628 894
...,...,...,...,...,...
131,132,Civitavecchia,Lazio,Roma,51 750
132,133,Teramo,Abruzzo,Teramo,51 526
133,134,Rho,Lombardia,Milano,50 847
134,135,Acireale,Sicilia,Catania,50 590


In [16]:
#Merging Tables

merge_df=pd.merge(left=df,right=italian_cities[["Region","City"]],left_on="Territory",right_on="City",how="inner")
merge_df

Unnamed: 0,Territory,Type of crime,Year,Total,Region,City
0,Cuneo,culpable injuries,2019,82.5,Piemonte,Cuneo
1,Cuneo,culpable injuries,2020,60.4,Piemonte,Cuneo
2,Cuneo,culpable injuries,2021,70.1,Piemonte,Cuneo
3,Cuneo,culpable injuries,2022,75.3,Piemonte,Cuneo
4,Cuneo,culpable injuries,2023,77.9,Piemonte,Cuneo
...,...,...,...,...,...,...
17150,Cremona,shoplifting,2019,94.9,Lombardia,Cremona
17151,Cremona,shoplifting,2020,66.1,Lombardia,Cremona
17152,Cremona,shoplifting,2021,84.1,Lombardia,Cremona
17153,Cremona,shoplifting,2022,111.4,Lombardia,Cremona


In [17]:
merge_df.isna().sum()

Territory         0
Type of crime     0
Year              0
Total            37
Region            0
City              0
dtype: int64

In [18]:
merge_df[merge_df["Total"].isna()]

Unnamed: 0,Territory,Type of crime,Year,Total,Region,City
1655,Napoli,manslaughter,2019,,Campania,Napoli
1687,Napoli,homicides for theft or robbery,2021,,Campania,Napoli
1796,Napoli,mass murder,2020,,Campania,Napoli
1799,Napoli,mass murder,2023,,Campania,Napoli
1873,Napoli,corruption of a minor,2022,,Campania,Napoli
6345,Roma,mass murder,2019,,Lazio,Roma
6348,Roma,mass murder,2022,,Lazio,Roma
6390,Roma,smuggling,2019,,Lazio,Roma
6391,Roma,smuggling,2020,,Lazio,Roma
6392,Roma,smuggling,2021,,Lazio,Roma


In [19]:
merge_df["Total"]= merge_df["Total"].fillna(0)

In [20]:
merge_df

Unnamed: 0,Territory,Type of crime,Year,Total,Region,City
0,Cuneo,culpable injuries,2019,82.5,Piemonte,Cuneo
1,Cuneo,culpable injuries,2020,60.4,Piemonte,Cuneo
2,Cuneo,culpable injuries,2021,70.1,Piemonte,Cuneo
3,Cuneo,culpable injuries,2022,75.3,Piemonte,Cuneo
4,Cuneo,culpable injuries,2023,77.9,Piemonte,Cuneo
...,...,...,...,...,...,...
17150,Cremona,shoplifting,2019,94.9,Lombardia,Cremona
17151,Cremona,shoplifting,2020,66.1,Lombardia,Cremona
17152,Cremona,shoplifting,2021,84.1,Lombardia,Cremona
17153,Cremona,shoplifting,2022,111.4,Lombardia,Cremona


In [21]:
merge_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17155 entries, 0 to 17154
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Territory      17155 non-null  object 
 1   Type of crime  17155 non-null  object 
 2   Year           17155 non-null  int64  
 3   Total          17155 non-null  float64
 4   Region         17155 non-null  object 
 5   City           17155 non-null  object 
dtypes: float64(1), int64(1), object(4)
memory usage: 804.3+ KB


In [22]:
italian_crimes= merge_df[["Year","Region","City","Type of crime","Total"]]
italian_crimes

Unnamed: 0,Year,Region,City,Type of crime,Total
0,2019,Piemonte,Cuneo,culpable injuries,82.5
1,2020,Piemonte,Cuneo,culpable injuries,60.4
2,2021,Piemonte,Cuneo,culpable injuries,70.1
3,2022,Piemonte,Cuneo,culpable injuries,75.3
4,2023,Piemonte,Cuneo,culpable injuries,77.9
...,...,...,...,...,...
17150,2019,Lombardia,Cremona,shoplifting,94.9
17151,2020,Lombardia,Cremona,shoplifting,66.1
17152,2021,Lombardia,Cremona,shoplifting,84.1
17153,2022,Lombardia,Cremona,shoplifting,111.4


In [23]:
grouped_data=italian_crimes.groupby(["City"],as_index=False)["Total"].count()
print(grouped_data.to_string())

             City  Total
0       Agrigento    235
1     Alessandria    235
2          Ancona    235
3          Arezzo    235
4            Asti    235
5        Avellino    235
6            Bari    235
7       Benevento    235
8         Bergamo    235
9         Brescia    235
10       Brindisi    235
11       Cagliari    235
12  Caltanissetta    235
13        Caserta    235
14        Catania    235
15      Catanzaro    235
16           Como    235
17        Cosenza    235
18        Cremona    235
19        Crotone    235
20          Cuneo    235
21        Ferrara    235
22        Firenze    235
23         Foggia    235
24         Genova    235
25       Grosseto    235
26       L'Aquila    235
27      La Spezia    235
28         Latina    235
29          Lecce    235
30        Livorno    235
31          Lucca    235
32         Matera    235
33        Messina    235
34         Milano    235
35         Modena    235
36         Napoli    235
37         Novara    235
38         Padova    235


In [24]:
# Check for Duplicates

italian_crimes[italian_crimes.duplicated()]

Unnamed: 0,Year,Region,City,Type of crime,Total


In [None]:
# Data looks spark and clean for analysis

In [59]:
# Exporting Data

italian_crimes.to_csv("Italian Crimes 2023.csv",index=False)