# The Battle of the Neigbourhoods - Rome

In this notebook we collect the neighbourhoods of Rome from wikepedia webpages. <br>
After cleaning the Neighbourhood data, it will be enriched with the geographical coordinates.

## Importing libraries

In [1]:
!pip install beautifulsoup4
!pip install lxml
import requests # library to handle requests
import pandas as pd # library for data analsysis
import numpy as np # library to handle data in a vectorized manner

from bs4 import BeautifulSoup # Library for scraping webpage
from IPython.display import display_html # Library for displaying HTML

#!pip install geopy
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
from geopy.extra.rate_limiter import RateLimiter # ratelimiter for stopping if it takes to long to get the geocodes

# Library for saving en reading data from the project
from project_lib import Project

print('Importing ready!')

Importing ready!


## Retreive neighbourhoods of Rome from Wikipedia webpage

### Get the Quarters of Rome

In [2]:
# Get webpage
source = requests.get('https://en.wikipedia.org/wiki/Quarters_of_Rome').text
# Scrape webpage
soup = BeautifulSoup(source,'lxml')
# Check title of webpage
print(soup.title)
# Collect all list items from div class in a list
list = []
for item in soup.findAll('div',{'class':'div-col'}):
    sub_items = item.findAll('li')
    for sub_item in sub_items:
        list.append(['Rome', sub_item.text])
list

<title>Quarters of Rome - Wikipedia</title>


[['Rome', 'Q. I Flaminio'],
 ['Rome', 'Q. II Parioli'],
 ['Rome', 'Q. III Pinciano'],
 ['Rome', 'Q. IV Salario'],
 ['Rome', 'Q. V Nomentano'],
 ['Rome', 'Q. VI Tiburtino'],
 ['Rome', 'Q. VII Prenestino-Labicano'],
 ['Rome', 'Q. VIII Tuscolano'],
 ['Rome', 'Q. IX Appio-Latino'],
 ['Rome', 'Q. X Ostiense'],
 ['Rome', 'Q. XI Portuense'],
 ['Rome', 'Q. XII Gianicolense'],
 ['Rome', 'Q. XIII Aurelio'],
 ['Rome', 'Q. XIV Trionfale'],
 ['Rome', 'Q. XV Della Vittoria'],
 ['Rome', 'Q. XVI Monte Sacro'],
 ['Rome', 'Q. XVII Trieste'],
 ['Rome', 'Q. XVIII Tor di Quinto'],
 ['Rome', 'Q. XIX Prenestino-Centocelle'],
 ['Rome', 'Q. XX Ardeatino'],
 ['Rome', 'Q. XXI Pietralata'],
 ['Rome', 'Q. XXII Collatino'],
 ['Rome', 'Q. XXIII Alessandrino'],
 ['Rome', 'Q. XXIV Don Bosco'],
 ['Rome', 'Q. XXV Appio Claudio'],
 ['Rome', 'Q. XXVI Appio-Pignatelli'],
 ['Rome', 'Q. XXVII Primavalle'],
 ['Rome', 'Q. XXVIII Monte Sacro Alto'],
 ['Rome', 'Q. XXIX Ponte Mammolo'],
 ['Rome', 'Q. XXX San Basilio'],
 ['Rome', 

In [3]:
# Create a dataframe with the Neighbourhoods of Amsterdam
df_quarters=pd.DataFrame(list,columns=['City', 'Quarter'])
#df_quarters["Neighbourhood"]  = df_quarters["Neighbourhood"].str.strip()
df_quarters

Unnamed: 0,City,Quarter
0,Rome,Q. I Flaminio
1,Rome,Q. II Parioli
2,Rome,Q. III Pinciano
3,Rome,Q. IV Salario
4,Rome,Q. V Nomentano
5,Rome,Q. VI Tiburtino
6,Rome,Q. VII Prenestino-Labicano
7,Rome,Q. VIII Tuscolano
8,Rome,Q. IX Appio-Latino
9,Rome,Q. X Ostiense


In [4]:
df_quarters.shape

(35, 2)

### Cleaning en preparing the quarters of Rome

In [5]:
# Split de Quarters into type of Neighbourhood, the number and the name
df_quarters[['Type','Number','Neighbourhood']] = df_quarters['Quarter'].str.split(n=2, expand=True)
df_quarters

Unnamed: 0,City,Quarter,Type,Number,Neighbourhood
0,Rome,Q. I Flaminio,Q.,I,Flaminio
1,Rome,Q. II Parioli,Q.,II,Parioli
2,Rome,Q. III Pinciano,Q.,III,Pinciano
3,Rome,Q. IV Salario,Q.,IV,Salario
4,Rome,Q. V Nomentano,Q.,V,Nomentano
5,Rome,Q. VI Tiburtino,Q.,VI,Tiburtino
6,Rome,Q. VII Prenestino-Labicano,Q.,VII,Prenestino-Labicano
7,Rome,Q. VIII Tuscolano,Q.,VIII,Tuscolano
8,Rome,Q. IX Appio-Latino,Q.,IX,Appio-Latino
9,Rome,Q. X Ostiense,Q.,X,Ostiense


In [6]:
# Remove columns with the complete quarter information, because it is has been spit into separate columns
df_quarters.drop(df_quarters.columns[[1]], axis = 1, inplace = True) 
# Remove the point in the column Type
df_quarters['Type'] = df_quarters['Type'].str.replace('.', '')
df_quarters

Unnamed: 0,City,Type,Number,Neighbourhood
0,Rome,Q,I,Flaminio
1,Rome,Q,II,Parioli
2,Rome,Q,III,Pinciano
3,Rome,Q,IV,Salario
4,Rome,Q,V,Nomentano
5,Rome,Q,VI,Tiburtino
6,Rome,Q,VII,Prenestino-Labicano
7,Rome,Q,VIII,Tuscolano
8,Rome,Q,IX,Appio-Latino
9,Rome,Q,X,Ostiense


In [7]:
df_quarters.shape

(35, 4)

### Get the Rioni of Rome

In [8]:
# Get webpage
source = requests.get('https://it.wikipedia.org/wiki/Rioni_di_Roma').text
# Scrape webpage
soup = BeautifulSoup(source,'lxml')
# Check title of webpage
print(soup.title)
# Get table from webpage
html_table = str(soup.findAll('table',{'class':'wikitable sortable'}))
# Display table
display_html(html_table,raw=True)

<title>Rioni di Roma - Wikipedia</title>


Stemma,Numero,Nome,Popolazione[4],Superficie,Densità,Circoscrizione,Mappa
,R. I,Monti,13 028,"1,6508 km²","7891,93 ab./km²",Municipio I,
,R. II,Trevi,2 327,"0,5503 km²","4228,6 ab./km²",Municipio I,
,R. III,Colonna,2 111,"0,2689 km²","7850,5 ab./km²",Municipio I,
,R. IV,Campo Marzio,5 860,"0,8817 km²","6646,25 ab./km²",Municipio I,
,R. V,Ponte,3 596,"0,3189 km²","11276,26 ab./km²",Municipio I,
,R. VI,Parione,2 572,"0,1938 km²","13271,41 ab./km²",Municipio I,
,R. VII,Regola,3 238,"0,3189 km²","10153,65 ab./km²",Municipio I,
,R. VIII,Sant'Eustachio,1 962,"0,1688 km²","11623,22 ab./km²",Municipio I,
,R. IX,Pigna,10 737,"0,2063 km²","52045,56 ab./km²",Municipio I,
,R. X,Campitelli,552,"0,5990 km²","921,54 ab./km²",Municipio I,


In [9]:
# Create a list from the HTML table
list = pd.read_html(html_table)
# Create a dataframe from that list
df_rioni = list[0]
df_rioni


Unnamed: 0,Stemma,Numero,Nome,Popolazione[4],Superficie,Densità,Circoscrizione,Mappa
0,,R. I,Monti,13 028,"1,6508 km²","7891,93 ab./km²",Municipio I,
1,,R. II,Trevi,2 327,"0,5503 km²","4228,6 ab./km²",Municipio I,
2,,R. III,Colonna,2 111,"0,2689 km²","7850,5 ab./km²",Municipio I,
3,,R. IV,Campo Marzio,5 860,"0,8817 km²","6646,25 ab./km²",Municipio I,
4,,R. V,Ponte,3 596,"0,3189 km²","11276,26 ab./km²",Municipio I,
5,,R. VI,Parione,2 572,"0,1938 km²","13271,41 ab./km²",Municipio I,
6,,R. VII,Regola,3 238,"0,3189 km²","10153,65 ab./km²",Municipio I,
7,,R. VIII,Sant'Eustachio,1 962,"0,1688 km²","11623,22 ab./km²",Municipio I,
8,,R. IX,Pigna,10 737,"0,2063 km²","52045,56 ab./km²",Municipio I,
9,,R. X,Campitelli,552,"0,5990 km²","921,54 ab./km²",Municipio I,


### Cleaning en preparing the rioni of Rome

### Cleaning en preparing the rioni of Rome

In [10]:
# Remove columns that we don't need
df_rioni.drop(df_rioni.columns[[0, 3, 4, 5, 6, 7]], axis = 1, inplace = True) 
# Remove rows without number
df_rioni.dropna(axis=0, inplace=True)
df_rioni

Unnamed: 0,Numero,Nome
0,R. I,Monti
1,R. II,Trevi
2,R. III,Colonna
3,R. IV,Campo Marzio
4,R. V,Ponte
5,R. VI,Parione
6,R. VII,Regola
7,R. VIII,Sant'Eustachio
8,R. IX,Pigna
9,R. X,Campitelli


In [11]:
# Split de Quarters into type of Neighbourhood, the number and the name
df_rioni[['Type','Number']] = df_rioni['Numero'].str.split(n=1, expand=True)
df_rioni

Unnamed: 0,Numero,Nome,Type,Number
0,R. I,Monti,R.,I
1,R. II,Trevi,R.,II
2,R. III,Colonna,R.,III
3,R. IV,Campo Marzio,R.,IV
4,R. V,Ponte,R.,V
5,R. VI,Parione,R.,VI
6,R. VII,Regola,R.,VII
7,R. VIII,Sant'Eustachio,R.,VIII
8,R. IX,Pigna,R.,IX
9,R. X,Campitelli,R.,X


In [12]:
# Rename columns (Italien to English)
df_rioni.rename(columns={'Nome': 'Neighbourhood'}, inplace=True)
# Remove the point in the column Type
df_rioni['Type'] = df_rioni['Type'].str.replace('.', '')
df_rioni

Unnamed: 0,Numero,Neighbourhood,Type,Number
0,R. I,Monti,R,I
1,R. II,Trevi,R,II
2,R. III,Colonna,R,III
3,R. IV,Campo Marzio,R,IV
4,R. V,Ponte,R,V
5,R. VI,Parione,R,VI
6,R. VII,Regola,R,VII
7,R. VIII,Sant'Eustachio,R,VIII
8,R. IX,Pigna,R,IX
9,R. X,Campitelli,R,X


In [13]:
# Add column City to dataframe
df_rioni['City'] = 'Rome'
# Reorganize column order in dataframe
df_rioni = df_rioni[ ['City', 'Type', 'Number', 'Neighbourhood']]
df_rioni

Unnamed: 0,City,Type,Number,Neighbourhood
0,Rome,R,I,Monti
1,Rome,R,II,Trevi
2,Rome,R,III,Colonna
3,Rome,R,IV,Campo Marzio
4,Rome,R,V,Ponte
5,Rome,R,VI,Parione
6,Rome,R,VII,Regola
7,Rome,R,VIII,Sant'Eustachio
8,Rome,R,IX,Pigna
9,Rome,R,X,Campitelli


In [14]:
# Merge de rioni and quarters dataframe
df1 = pd.concat([df_rioni, df_quarters], ignore_index=True)
df1

Unnamed: 0,City,Type,Number,Neighbourhood
0,Rome,R,I,Monti
1,Rome,R,II,Trevi
2,Rome,R,III,Colonna
3,Rome,R,IV,Campo Marzio
4,Rome,R,V,Ponte
5,Rome,R,VI,Parione
6,Rome,R,VII,Regola
7,Rome,R,VIII,Sant'Eustachio
8,Rome,R,IX,Pigna
9,Rome,R,X,Campitelli


In [15]:
df1.shape

(57, 4)

## Collecting the geographical coordinates for the neighbourhoods of Amsterdam

In [16]:
# Create a column 'Address' for getting the geographical coordinates
df1["Address"] = df1["Neighbourhood"] + ', ' +  df1["City"]
df1

Unnamed: 0,City,Type,Number,Neighbourhood,Address
0,Rome,R,I,Monti,"Monti, Rome"
1,Rome,R,II,Trevi,"Trevi, Rome"
2,Rome,R,III,Colonna,"Colonna, Rome"
3,Rome,R,IV,Campo Marzio,"Campo Marzio, Rome"
4,Rome,R,V,Ponte,"Ponte, Rome"
5,Rome,R,VI,Parione,"Parione, Rome"
6,Rome,R,VII,Regola,"Regola, Rome"
7,Rome,R,VIII,Sant'Eustachio,"Sant'Eustachio, Rome"
8,Rome,R,IX,Pigna,"Pigna, Rome"
9,Rome,R,X,Campitelli,"Campitelli, Rome"


In [17]:
# Get the Geographical coordinates of 1 neighboorhood, to check if the geolocator works
address = 'Lido di Castel Fusano, Rome'

geolocator = Nominatim(user_agent="neighbourhoud_explorer")

location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Rome are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Rome are 41.7181852, 12.3506004.


In [18]:
# 1 - convenient function to delay between geocoding calls
geocode = RateLimiter(geolocator.geocode, min_delay_seconds=1)

In [19]:
# 2- - create location column
df2 = df1.copy()
df2['location'] = df2['Address'].apply(geocode)

In [20]:
# 3 - create longitude, latitude and altitude from location column (returns tuple)
df2['point'] = df2['location'].apply(lambda loc: tuple(loc.point) if loc else None)

In [21]:
# Check for Neighbourhoods without geogrophical coordinates
print(df2.loc[df2["location"].isnull()].count())
df2.loc[df2["location"].isnull()]

City             1
Type             1
Number           1
Neighbourhood    1
Address          1
location         0
point            0
dtype: int64


Unnamed: 0,City,Type,Number,Neighbourhood,Address,location,point
49,Rome,Q,XXVIII,Monte Sacro Alto,"Monte Sacro Alto, Rome",,


In [22]:
# Remove the rows without geographical coordinats
df2.dropna(inplace=True)
df2.reset_index(drop=True, inplace=True)
df2

Unnamed: 0,City,Type,Number,Neighbourhood,Address,location,point
0,Rome,R,I,Monti,"Monti, Rome","(Monti, Municipio Roma I, Roma, Roma Capitale,...","(41.8960807, 12.4902875, 0.0)"
1,Rome,R,II,Trevi,"Trevi, Rome","(Fontana di Trevi, Piazza di Trevi, Rione II T...","(41.9009778, 12.483284973816568, 0.0)"
2,Rome,R,III,Colonna,"Colonna, Rome","(Colonna, Roma Capitale, Lazio, Italia, (41.83...","(41.833718, 12.753184, 0.0)"
3,Rome,R,IV,Campo Marzio,"Campo Marzio, Rome","(Campo Marzio, Via di Monte D'Oro, Rione IV Ca...","(41.9046467, 12.477055153077067, 0.0)"
4,Rome,R,V,Ponte,"Ponte, Rome","(Ponte, Vicovaro-Prato delle Forme, Vicovaro, ...","(42.0405159, 12.8535379, 0.0)"
5,Rome,R,VI,Parione,"Parione, Rome","(Rione VI Parione, Municipio Roma I, Roma, Rom...","(41.897357650000004, 12.471102470129265, 0.0)"
6,Rome,R,VII,Regola,"Regola, Rome","(Rione VII Regola, Municipio Roma I, Roma, Rom...","(41.894375, 12.471030186272873, 0.0)"
7,Rome,R,VIII,Sant'Eustachio,"Sant'Eustachio, Rome","(Sant'Eustachio, 82, Piazza di Sant'Eustachio,...","(41.898244, 12.4753209, 0.0)"
8,Rome,R,IX,Pigna,"Pigna, Rome","(Rione IX Pigna, Municipio Roma I, Roma, Roma ...","(41.89711585, 12.479195768321528, 0.0)"
9,Rome,R,X,Campitelli,"Campitelli, Rome","(Rione X Campitelli, Municipio Roma I, Roma, R...","(41.89008455, 12.487415857658215, 0.0)"


In [23]:
df2.shape

(56, 7)

In [24]:
# 4 - split point column into latitude, longitude and altitude columns
df2[['latitude', 'longitude', 'altitude']] = pd.DataFrame(df2['point'].tolist(), index=df2.index)
df2

Unnamed: 0,City,Type,Number,Neighbourhood,Address,location,point,latitude,longitude,altitude
0,Rome,R,I,Monti,"Monti, Rome","(Monti, Municipio Roma I, Roma, Roma Capitale,...","(41.8960807, 12.4902875, 0.0)",41.896081,12.490288,0.0
1,Rome,R,II,Trevi,"Trevi, Rome","(Fontana di Trevi, Piazza di Trevi, Rione II T...","(41.9009778, 12.483284973816568, 0.0)",41.900978,12.483285,0.0
2,Rome,R,III,Colonna,"Colonna, Rome","(Colonna, Roma Capitale, Lazio, Italia, (41.83...","(41.833718, 12.753184, 0.0)",41.833718,12.753184,0.0
3,Rome,R,IV,Campo Marzio,"Campo Marzio, Rome","(Campo Marzio, Via di Monte D'Oro, Rione IV Ca...","(41.9046467, 12.477055153077067, 0.0)",41.904647,12.477055,0.0
4,Rome,R,V,Ponte,"Ponte, Rome","(Ponte, Vicovaro-Prato delle Forme, Vicovaro, ...","(42.0405159, 12.8535379, 0.0)",42.040516,12.853538,0.0
5,Rome,R,VI,Parione,"Parione, Rome","(Rione VI Parione, Municipio Roma I, Roma, Rom...","(41.897357650000004, 12.471102470129265, 0.0)",41.897358,12.471102,0.0
6,Rome,R,VII,Regola,"Regola, Rome","(Rione VII Regola, Municipio Roma I, Roma, Rom...","(41.894375, 12.471030186272873, 0.0)",41.894375,12.47103,0.0
7,Rome,R,VIII,Sant'Eustachio,"Sant'Eustachio, Rome","(Sant'Eustachio, 82, Piazza di Sant'Eustachio,...","(41.898244, 12.4753209, 0.0)",41.898244,12.475321,0.0
8,Rome,R,IX,Pigna,"Pigna, Rome","(Rione IX Pigna, Municipio Roma I, Roma, Roma ...","(41.89711585, 12.479195768321528, 0.0)",41.897116,12.479196,0.0
9,Rome,R,X,Campitelli,"Campitelli, Rome","(Rione X Campitelli, Municipio Roma I, Roma, R...","(41.89008455, 12.487415857658215, 0.0)",41.890085,12.487416,0.0


In [25]:
# The code was removed by Watson Studio for sharing.

In [26]:
# Create an access to this project
project = Project.access(None,token,token)

# Save the collected Neighbourhoods and geographical data in project data bucket
project.save_data(file_name="geo_rome.csv", data=df2.to_csv(index=False))

2021-02-12 18:28:22,573 - __PROJECT_LIB__ - ERROR - failed to initialize ibmos2spark integration
Traceback (most recent call last):
  File "/opt/conda/envs/Python-3.7-main/lib/python3.7/site-packages/project_lib/storage/bcos.py", line 138, in _initialize_bcos2spark
    import ibmos2spark
ModuleNotFoundError: No module named 'ibmos2spark'


{'file_name': 'geo_rome.csv',
 'message': 'File saved to project storage.',
 'bucket_name': 'applieddatasciencecapstone-donotdelete-pr-qfypp7va5rbswh',
 'asset_id': 'b74594ae-0172-4bb2-a4aa-f0edd23aa375'}