In [1]:
import requests
import os
import geopandas as gpd
import pandas as pd

In [2]:
# Import of the Polygons
gdf_combined = gpd.read_file("C:/Users/edoar/combined_quartieri.geojson")

# Dataset Milano

In [6]:
# List of geojson file URLs from the website
geojson_urls = [
    #Dog Parks
    "https://dati.comune.milano.it/dataset/7efe1ac1-7a5f-4e33-b7ab-c24438bc9fb1/resource/99011d87-d640-43a3-9f70-20e1825d7441/download/ds52_aree_fruizione_cani.geojson",
    # Pharmacies
    "https://dati.comune.milano.it/dataset/7e18f0d3-b7f1-49b7-969d-da2c04131dd6/resource/8cc1abb8-fe10-4f50-b7b4-e673250f34c6/download/ds501_farmacie-nel-comune-di-m_dr8f-cz2h_final.geojson",
    # Playgrounds
    "https://dati.comune.milano.it/dataset/47444653-1f40-4690-8fb3-66deae6647d4/resource/5e5b89a1-20bb-4c0c-9182-a55f16af4dfb/download/ds724_aree_gioco.geojson",
    # Sport Venues
    "https://dati.comune.milano.it/dataset/c613f251-6f66-4320-8cac-6ee08d8fd2ef/resource/6811f693-ee63-41ca-9ff7-ea0464f6d600/download/ds34_impianti_sportivi_final.geojson",
    # Public Schools
    "https://dati.comune.milano.it/dataset/97c5000b-3569-4f77-a289-35f5a667867a/resource/30c25888-e857-4b8c-bb49-cabaffe9fdc8/download/ds1305_elenco_scuole_statali_as2020_21_final_.geojson",
    # Private Schools
    "https://dati.comune.milano.it/dataset/d7159f7f-6d93-4c08-93dc-36eedc47d16c/resource/b051dfed-89db-446b-9342-636ffd6c21a1/download/ds1582_elenco_scuole_paritarie_2020-21.geojson",
    # Univeristies
    "https://dati.comune.milano.it/dataset/8c3f8110-aecd-48c0-b707-52785eb5bf46/resource/0dbaf2ad-c935-4413-b33e-2664e78b66d4/download/ds94_universita__final.geojson",
    # Coworking spaces
    "https://dati.comune.milano.it/dataset/34b330d2-e34a-4d98-a327-447342001ee3/resource/e8cd794c-a194-4df8-a880-175e02426e05/download/coworking_joined_f.geojson",
    # Libraries
    "https://dati.comune.milano.it/dataset/ee08abe0-aba1-44ab-b6ad-21fcd8a45100/resource/7c399aa3-1778-446f-88d1-79c21581edc8/download/ds1306-biblioteche.geojson",
    # Suburban railway
    "https://dati.comune.milano.it/dataset/802a1d1f-4203-44c9-b9f5-dd8aa1f6bc3c/resource/7a07abfb-6a0d-458d-9a91-e6eb5b1ba703/download/sistema_ferroviario_urbano_layer_0_stazioni__final.geojson",
    # Metro stops
    "https://dati.comune.milano.it/dataset/b7344a8f-0ef5-424b-a902-f7f06e32dd67/resource/dd6a770a-b321-44f0-b58c-9725d84409bb/download/tpl_metrofermate.geojson",
    # Bus stops
    "https://dati.comune.milano.it/dataset/ac494f5d-acd3-4fd3-8cfc-ed24f5c3d923/resource/7d21bd77-3ad1-4235-9a40-8a8cdfeb65a0/download/tpl_fermate.geojson",
]

# thedirectory to save the downloaded geojson files
output_directory = "geojson_files"

# create the directory if it doesn't exist
os.makedirs(output_directory, exist_ok=True)

# loop through each url and download the file
for url in geojson_urls:
    try:
        print(f"Downloading {url}...")
        response = requests.get(url)
        response.raise_for_status()  #error for HTTP issues

        # Extract the file name from the URL
        file_name = url.split("/")[-1]
        output_path = os.path.join(output_directory, file_name)

        # Save the GeoJSON file
        with open(output_path, "wb") as file:
            file.write(response.content)

        print(f"Saved {file_name} to {output_directory}")

    except requests.exceptions.RequestException as e:
        print(f"Failed to download {url}: {e}")


Downloading https://dati.comune.milano.it/dataset/7efe1ac1-7a5f-4e33-b7ab-c24438bc9fb1/resource/99011d87-d640-43a3-9f70-20e1825d7441/download/ds52_aree_fruizione_cani.geojson...
Saved ds52_aree_fruizione_cani.geojson to geojson_files
Downloading https://dati.comune.milano.it/dataset/7e18f0d3-b7f1-49b7-969d-da2c04131dd6/resource/8cc1abb8-fe10-4f50-b7b4-e673250f34c6/download/ds501_farmacie-nel-comune-di-m_dr8f-cz2h_final.geojson...
Saved ds501_farmacie-nel-comune-di-m_dr8f-cz2h_final.geojson to geojson_files
Downloading https://dati.comune.milano.it/dataset/47444653-1f40-4690-8fb3-66deae6647d4/resource/5e5b89a1-20bb-4c0c-9182-a55f16af4dfb/download/ds724_aree_gioco.geojson...
Saved ds724_aree_gioco.geojson to geojson_files
Downloading https://dati.comune.milano.it/dataset/c613f251-6f66-4320-8cac-6ee08d8fd2ef/resource/6811f693-ee63-41ca-9ff7-ea0464f6d600/download/ds34_impianti_sportivi_final.geojson...
Saved ds34_impianti_sportivi_final.geojson to geojson_files
Downloading https://dati.com

## Dog Parks

In [5]:
# Load the GeoDataFrame
dog_parks = gpd.read_file("geojson_files/ds52_aree_fruizione_cani.geojson") # Dog park multipolygons

# ensure both GeoDataFrames have the same CRS
if gdf_combined.crs != dog_parks.crs:
    dog_parks = dog_parks.to_crs(gdf_combined.crs)

# Perform the spatial join
# 'within' ensures dog park is inside the neighborhood polygon
PolyDogParks = gpd.sjoin(dog_parks, gdf_combined, how="left", predicate="within")
PolyDogParks = PolyDogParks[~PolyDogParks["Neighborhood"].isna()]

PolyDogParks

Unnamed: 0,id_area,municipio,area,località,obj_id,codice,descrizione_codice,data_ini,area_mq,perim_m,geometry,index_right,Neighborhood
0,1_043,1,043,via Ronzoni,132009,S327554,Area fruizione cani,2017-02-27,573.687432,165.347310,"MULTIPOLYGON (((9.17668 45.45472, 9.17651 45.4...",73.0,Ticinese
1,1_308,1,308,giardino Renata Tebaldi,25318,S327554,Area fruizione cani,2006-03-15,353.238550,80.318583,"MULTIPOLYGON (((9.16711 45.46935, 9.16712 45.4...",38.0,Magenta - San Vittore
2,1_213,1,213,giardino Malgeri Alfredo,176223,S327554,Area fruizione cani,2024-05-18,182.406688,54.439780,"MULTIPOLYGON (((9.1904 45.48045, 9.19052 45.48...",9.0,Brera
3,1_092,1,092,giardino Roberto Bazlen,114785,S327554,Area fruizione cani,2013-05-06,70.918852,41.837688,"MULTIPOLYGON (((9.19838 45.45439, 9.19843 45.4...",80.0,Vigentina
4,1_239,1,239,viale Beatrice d'Este,52831,S327554,Area fruizione cani,2011-04-15,914.706264,189.052021,"MULTIPOLYGON (((9.1887 45.452, 9.18872 45.4520...",80.0,Vigentina
...,...,...,...,...,...,...,...,...,...,...,...,...,...
418,9_036,9,036,giardino Wanda Osiris,25440,S327554,Area fruizione cani,2003-06-18,2016.530150,211.347870,"MULTIPOLYGON (((9.19559 45.50327, 9.19555 45.5...",37.0,Maciachini - Maggiolina
419,9_556,9,556,vie Catone - Maffucci,175468,S327554,Area fruizione cani,2024-01-08,467.273674,87.661082,"MULTIPOLYGON (((9.17171 45.49748, 9.17203 45.4...",8.0,Bovisa
420,9_159,9,159,"vie Bovisasca - Modignani - Assietta "" Pioppet...",122423,S327554,Area fruizione cani,2015-06-13,1158.441243,147.915796,"MULTIPOLYGON (((9.15614 45.51811, 9.15639 45.5...",7.0,Bovisasca
421,9_095,9,095,piazzale Lugano,25501,S327554,Area fruizione cani,2003-06-18,741.116750,108.191502,"MULTIPOLYGON (((9.16533 45.49802, 9.16533 45.4...",23.0,Farini


### Although there are two duplicates, they both have been maintained in the dataframe since they extend across two different neighborhoods, so technically both neighborhoods have that dog park in their geometries

In [8]:
# Find duplicates based on the geometry
duplicates = PolyDogParks[PolyDogParks.duplicated(subset=['geometry'], keep=False)]

duplicates 

Unnamed: 0,id_area,municipio,area,località,obj_id,codice,descrizione_codice,data_ini,area_mq,perim_m,geometry,index_right,Neighborhood
165,5_227,5,227,via Manduria,25305,S327554,Area fruizione cani,2005-11-22,516.8207,115.037699,"MULTIPOLYGON (((9.18075 45.40212, 9.18069 45.4...",0.0,Parco delle Abbazie
165,5_227,5,227,via Manduria,25305,S327554,Area fruizione cani,2005-11-22,516.8207,115.037699,"MULTIPOLYGON (((9.18075 45.40212, 9.18069 45.4...",64.0,Ronchetto delle Rane


In [10]:
# Keeping only relevant variables
PolyDogParks = PolyDogParks[["località", "area_mq", "perim_m", "obj_id",
                           "municipio", "geometry", "Neighborhood"]]

### There are no missing values

In [16]:
# Rows with NaN
nan_rows = PolyDogParks[PolyDogParks.isna().any(axis=1)]

nan_rows

Unnamed: 0,località,area_mq,perim_m,obj_id,municipio,geometry,Neighborhood


### Overall the cleaned dataframe is of a good quality

In [19]:
# Saving the file
PolyDogParks.to_file("PolyDogParks.geojson", driver="GeoJSON")

## Pharmacies

In [21]:
# Load the GeoDataFrame
pharmacies = gpd.read_file("geojson_files/ds501_farmacie-nel-comune-di-m_dr8f-cz2h_final.geojson")

# Ensure both GeoDataFrames have the same CRS
if gdf_combined.crs != pharmacies.crs:
    pharmacies = pharmacies.to_crs(gdf_combined.crs)

# Perform the spatial join
PolyPharmacy = gpd.sjoin(pharmacies, gdf_combined, how="left", predicate="within")
PolyPharmacy = PolyPharmacy[~PolyPharmacy["Neighborhood"].isna()]

# Inspect the resulting GeoDataFrame
PolyPharmacy

Unnamed: 0,CODICE_FARMACIA,DESCRIZIONE_FARMACIA,PARTITA_IVA,INDIRIZZO,COMUNE,FRAZIONE,CAP,PROVINCIA,CODICE_ISTAT,MUNICIPIO,ID_NIL,NIL,LONGITUDINE,LATITUDINE,Location,geometry,index_right,Neighborhood
0,MI1854,TOLSTOJ,09287610969,"VIA LEONE TOLSTOI, 17",MILANO,,20146,MI,15146,6,49,GIAMBELLINO,9.150814,45.451086,"(45.4510863058, 9.1508142972)",POINT (9.15081 45.45109),27.0,Giambellino
1,MI1018,A.F.M. N. 83,13195220150,"VIALE MONZA, 226",MILANO,,20128,MI,15146,2,16,GORLA - PRECOTTO,9.225081,45.513064,"(45.5130644867, 9.2250814797)",POINT (9.22508 45.51306),79.0,Viale Monza
2,MI1820,NOVARA SRL,08267990961,"VIA NOVARA,90",MILANO,,20153,MI,15146,7,60,STADIO - IPPODROMI,9.119702,45.472478,"(45.47247759, 9.11970245)",POINT (9.1197 45.47248),66.0,San Siro
3,MI0013,PONTESEVESO,01533800155,"VIA GIOVANNI SCHIAPARELLI, 4",MILANO,,20125,MI,15146,2,10,STAZIONE CENTRALE - PONTE SEVESO,9.205393,45.489574,"(45.4895744126, 9.2053929759)",POINT (9.20539 45.48957),14.0,Centrale
4,MI1979,SALGARI SRL,12094200966,"VIA ALBERTO CARONCINI, 6",MILANO,,20137,MI,15146,4,28,UMBRIA - MOLISE - CALVAIRATE,9.219724,45.449769,"(45.4497685868, 9.2197240187)",POINT (9.21972 45.44977),78.0,Umbria - Molise
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
418,MI2016,TRIPHARMA SAS DI MANELLI MARIA LUISA & C.,12968820964,"VIA GUGLIELMO SILVA, 39",MILANO,,20149,MI,15146,8,58,DE ANGELI - MONTE ROSA,9.147192,45.478255,"(45.47825482, 9.1471918901)",POINT (9.14719 45.47825),19.0,De Angeli - Monte Rosa
419,MI1830,OLMI SNC,08716030963,"VIA DELLE BETULLE, 10",MILANO,,20152,MI,15146,7,55,BAGGIO - Q.RE DEGLI OLMI - Q.RE VALSESIA,9.083453,45.453203,"(45.4532030878, 9.083452613)",POINT (9.08345 45.4532),3.0,Baggio
420,MI1666,FERRARINI SNC,04026870966,"PIAZZA CINQUE GIORNATE, 6",MILANO,,20129,MI,15146,1,4,GUASTALLA,9.207182,45.461711,"(45.4617107033, 9.2071820093)",POINT (9.20718 45.46171),31.0,Guastalla
421,MI0684,A.F.M. N. 28,13195220150,"VIA TEODOSIO, 74",MILANO,,20131,MI,15146,3,20,LORETO - CASORETTO - NOLO,9.230088,45.488214,"(45.4882144069, 9.2300875714)",POINT (9.23009 45.48821),36.0,Loreto


### There are no duplicates

In [24]:
# Find duplicates based on the 'geometry' column
duplicates = PolyPharmacy[PolyPharmacy.duplicated(subset=['geometry'], keep=False)]

# Inspect the duplicate rows
duplicates

Unnamed: 0,CODICE_FARMACIA,DESCRIZIONE_FARMACIA,PARTITA_IVA,INDIRIZZO,COMUNE,FRAZIONE,CAP,PROVINCIA,CODICE_ISTAT,MUNICIPIO,ID_NIL,NIL,LONGITUDINE,LATITUDINE,Location,geometry,index_right,Neighborhood


In [26]:
# Keeping only relevant variables
PolyPharmacy = PolyPharmacy[["CODICE_FARMACIA", "DESCRIZIONE_FARMACIA", "MUNICIPIO", "INDIRIZZO",
                             "geometry", "Neighborhood"]]


### There are no missing values

In [29]:
# Rows with NaN
nan_rows = PolyPharmacy[PolyPharmacy.isna().any(axis=1)]

nan_rows

Unnamed: 0,CODICE_FARMACIA,DESCRIZIONE_FARMACIA,MUNICIPIO,INDIRIZZO,geometry,Neighborhood


In [31]:
# Saving the file
PolyPharmacy.to_file("PolyPharmacy.geojson", driver="GeoJSON")

## Play Grounds

In [34]:
# Load the GeoDataFrame
playgrounds = gpd.read_file("geojson_files/ds724_aree_gioco.geojson")

# Ensure both GeoDataFrames have the same CRS
if gdf_combined.crs != playgrounds.crs:
    playgrounds = playgrounds.to_crs(gdf_combined.crs)

# Perform the spatial join
PolyPlaygrounds = gpd.sjoin(playgrounds, gdf_combined, how="left", predicate="within")
PolyPlaygrounds = PolyPlaygrounds[~PolyPlaygrounds["Neighborhood"].isna()]

# Inspect the resulting GeoDataFrame
PolyPlaygrounds

Unnamed: 0,id_area,municipio,area,località,obj_id,codice,descrizione_codice,data_ini,area_mq,perim_m,geometry,index_right,Neighborhood
0,1_170,1,170,corso di Porta Vigentina n. 15,45165,S327552,Area Gioco,2007-10-09,543.961185,125.762038,"MULTIPOLYGON (((9.19641 45.45388, 9.19641 45.4...",80.0,Vigentina
1,1_232,1,232,rotonda della Besana,28387,S327552,Area Gioco,2003-08-29,95.929250,35.964448,"MULTIPOLYGON (((9.20546 45.45946, 9.20546 45.4...",31.0,Guastalla
2,1_172,1,172,via Crivelli n. 21,139581,S327552,Area Gioco,2018-09-03,223.818797,59.464232,"MULTIPOLYGON (((9.19548 45.45371, 9.19548 45.4...",80.0,Vigentina
3,1_172,1,172,via Crivelli n. 21,45169,S327552,Area Gioco,2008-05-21,644.966050,110.463586,"MULTIPOLYGON (((9.19538 45.45333, 9.19541 45.4...",80.0,Vigentina
4,1_092,1,092,giardino Roberto Bazlen,28223,S327552,Area Gioco,2007-05-01,797.496000,107.612642,"MULTIPOLYGON (((9.19704 45.45427, 9.19695 45.4...",80.0,Vigentina
...,...,...,...,...,...,...,...,...,...,...,...,...,...
978,9_359,9,359,via Palletta n. 6,45243,S327552,Area Gioco,NaT,3.125667,6.275213,"MULTIPOLYGON (((9.1918 45.51028, 9.1918 45.510...",43.0,Niguarda - Cà Granda
979,9_159,9,159,"vie Bovisasca - Modignani - Assietta "" Pioppet...",44849,S327552,Area Gioco,2003-06-18,514.141738,90.853619,"MULTIPOLYGON (((9.1563 45.51734, 9.15657 45.51...",7.0,Bovisasca
980,9_159,9,159,"vie Bovisasca - Modignani - Assietta "" Pioppet...",28436,S327552,Area Gioco,2003-06-18,150.691583,55.893729,"MULTIPOLYGON (((9.15261 45.52002, 9.15259 45.5...",7.0,Bovisasca
981,9_095,9,095,piazzale Lugano,28381,S327552,Area Gioco,2003-06-18,432.007550,79.638202,"MULTIPOLYGON (((9.16535 45.49788, 9.16535 45.4...",23.0,Farini


In [35]:
# Find duplicates based on the 'geometry' column
duplicates = PolyPlaygrounds[PolyPlaygrounds.duplicated(subset=['geometry'], keep=False)]

# Inspect the duplicate rows
duplicates

Unnamed: 0,id_area,municipio,area,località,obj_id,codice,descrizione_codice,data_ini,area_mq,perim_m,geometry,index_right,Neighborhood
355,5_227,5,227,via Manduria,43897,S327552,Area Gioco,2008-07-11,406.40085,79.29013,"MULTIPOLYGON (((9.18094 45.40278, 9.18118 45.4...",0.0,Parco delle Abbazie
355,5_227,5,227,via Manduria,43897,S327552,Area Gioco,2008-07-11,406.40085,79.29013,"MULTIPOLYGON (((9.18094 45.40278, 9.18118 45.4...",64.0,Ronchetto delle Rane


In [36]:
# Keeping only relevant variables
PolyPlaygrounds = PolyPlaygrounds[["località", "area_mq", "perim_m", "obj_id",
                                   "municipio", "geometry", "Neighborhood"]]


### There are no missing values

In [41]:
# Rows with NaN
nan_rows = PolyPlaygrounds[PolyPlaygrounds.isna().any(axis=1)]

nan_rows

Unnamed: 0,località,area_mq,perim_m,obj_id,municipio,geometry,Neighborhood


In [43]:
# Saving the file
PolyPlaygrounds.to_file("PolyPlaygrounds.geojson", driver="GeoJSON")

## Sport Venues

In [45]:
# Load the GeoDataFrame
sport_venues = gpd.read_file("geojson_files/ds34_impianti_sportivi_final.geojson")

# Ensure both GeoDataFrames have the same CRS
if gdf_combined.crs != sport_venues.crs:
    sport_venues = sport_venues.to_crs(gdf_combined.crs)

# Perform the spatial join
PolySportVenues = gpd.sjoin(sport_venues, gdf_combined, how="left", predicate="within")
PolySportVenues = PolySportVenues[~PolySportVenues["Neighborhood"].isna()]

# Inspect the resulting GeoDataFrame
PolySportVenues

Unnamed: 0,FAX,Nome,Indirizzo,Zona,info,URL,TELEFONO1,ID_NIL,NIL,Location,LONG_X_4326,LAT_Y_4326,geometry,index_right,Neighborhood
0,,NON ATTRIBUITO,VIA USSI STEFANO 4,9,CENTRO SPORTIVO,,,13,GRECO - SEGNANO,"(45.5071015469858, 9.20835578748649)",9.208356,45.507102,POINT (9.20836 45.5071),30.0,Greco
1,,S.S. BAGGIO 2,VIA OLIVIERI ALESSIO 11,7,CALCIO A 11,,024599310,56,FORZE ARMATE,"(45.4655714935197, 9.11156409171595)",9.111564,45.465571,POINT (9.11156 45.46557),84.0,Forze Armate
2,0239210739,LIDO DI MILANO,PLE LOTTO LORENZO 15,8,MINI GOLF,www.milanosport.it,0239266100,66,QT 8,"(45.4805639255761, 9.14194021229098)",9.141940,45.480564,POINT (9.14194 45.48056),57.0,Qt 8
3,,COOP. AGRISPORT SEZIONE CALCIO,VIA DEL RICORDO 58,2,CENTRO SPORTIVO,,0227207742,17,ADRIANO,"(45.5093199390653, 9.23794027433961)",9.237940,45.509320,POINT (9.23794 45.50932),1.0,Adriano
4,,SOCIETA GINNASTICA MILANO 2000,VIA MARTIGNONI ADELE 1 A,2,CENTRO SPORTIVO,,02603288,12,MACIACHINI - MAGGIOLINA,"(45.492566239195, 9.19613835069693)",9.196138,45.492566,POINT (9.19614 45.49257),37.0,Maciachini - Maggiolina
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1036,,A.S.D. PICCOLA SCUOLA DI CIRCO,VIA CENISIO 2,8,ATTIVITA CIRCENSE,,,70,GHISOLFA,"(45.4876030737289, 9.1732839281514)",9.173284,45.487603,POINT (9.17328 45.4876),26.0,Ghisolfa
1037,,A.S.D. RUGBY MILANO,VIA LICATA 41,3,CALCIO A 11,,,18,CIMIANO - ROTTOLE - Q.RE FELTRE,"(45.4986815761724, 9.25111177885147)",9.251112,45.498682,POINT (9.25111 45.49868),50.0,Parco Lambro - Cimiano
1038,,A.S.D. RUGBY MILANO,VIA LICATA 41,3,RUGBY,,,18,CIMIANO - ROTTOLE - Q.RE FELTRE,"(45.4986815797783, 9.25111177630766)",9.251112,45.498682,POINT (9.25111 45.49868),50.0,Parco Lambro - Cimiano
1039,0248713719,AREA EX PALAZZETTO DELLO SPORT,VIA DEI PICCOLOMINI SNC,7,PARCHEGGIO,www.sansiro.net,0248713713,60,STADIO - IPPODROMI,"(45.4781768596385, 9.12081301575318)",9.120813,45.478177,POINT (9.12081 45.47818),66.0,San Siro


In [46]:
# Find duplicates based on the 'geometry' column
duplicates1 = PolySportVenues[PolySportVenues.duplicated(subset=['info','geometry'], keep=False)]

# Inspect the duplicate rows
duplicates1.sort_values('LONG_X_4326')

Unnamed: 0,FAX,Nome,Indirizzo,Zona,info,URL,TELEFONO1,ID_NIL,NIL,Location,LONG_X_4326,LAT_Y_4326,geometry,index_right,Neighborhood
816,,PISTA DI ALLENAMENTO DI TRENNO,VIA IPPODROMO 134,7,SPORT EQUESTRE,,,60,STADIO - IPPODROMI,"(45.4845996617231, 9.11348119612238)",9.113481,45.4846,POINT (9.11348 45.4846),66.0,San Siro
815,,PISTA ALLENAMENTO DELLA MAURA,VIA IPPODROMO 134,7,SPORT EQUESTRE,,,60,STADIO - IPPODROMI,"(45.4845996617231, 9.11348119612238)",9.113481,45.4846,POINT (9.11348 45.4846),66.0,San Siro
1030,,FIN FEDERAZIONE ITALIANA NUOTO LAMPUGNANO,VIA LAMPUGNANO 76,8,NUOTO,,023088390,65,Q.RE GALLARATESE - Q.RE SAN LEONARDO - LAMPUGNANO,"(45.4906906592273, 9.11513731383607)",9.115137,45.490691,POINT (9.11514 45.49069),25.0,Gallaratese
493,,FIN FEDERAZIONE ITALIANA NUOTO LAMPUGNANO,VIA LAMPUGNANO 76,8,NUOTO,,023088390,65,Q.RE GALLARATESE - Q.RE SAN LEONARDO - LAMPUGNANO,"(45.4906906592273, 9.11513731383607)",9.115137,45.490691,POINT (9.11514 45.49069),25.0,Gallaratese
241,,U.S. TRIESTINA 1946,VIA FLEMING ALESSANDRO 13,7,CALCIO A 11,,0240090479,56,FORZE ARMATE,"(45.4684543338854, 9.12000353479145)",9.120004,45.468454,POINT (9.12 45.46845),84.0,Forze Armate
932,,U.S. TRIESTINA 1946,VIA FLEMING ALESSANDRO 13,7,CALCIO A 11,,0240090479,56,FORZE ARMATE,"(45.4684543338854, 9.12000353479145)",9.120004,45.468454,POINT (9.12 45.46845),84.0,Forze Armate
21,,EMILIO COLOMBO,VIA DEL CARDELLINO 15,6,CENTRO SPORTIVO,,02410976,53,LORENTEGGIO,"(45.4503203790094, 9.12235445586481)",9.122354,45.45032,POINT (9.12235 45.45032),35.0,Lorenteggio
478,,EMILIO COLOMBO,VIA DEL CARDELLINO 15,6,CENTRO SPORTIVO,,,53,LORENTEGGIO,"(45.4503203790094, 9.12235445586481)",9.122354,45.45032,POINT (9.12235 45.45032),35.0,Lorenteggio
475,,EMILIO COLOMBO,VIA DEL CARDELLINO 15,6,CENTRO SPORTIVO,,,53,LORENTEGGIO,"(45.4503203790094, 9.12235445586481)",9.122354,45.45032,POINT (9.12235 45.45032),35.0,Lorenteggio
13,248376073.0,MILANO SPORT SPA,VIA DEL CARDELLINO 3,6,NUOTO,www.milanosport.it,02417948,53,LORENTEGGIO,"(45.448274461699, 9.12372636565381)",9.123726,45.448274,POINT (9.12373 45.44827),35.0,Lorenteggio


In [50]:
# Removing the dupes
PolySportVenues = PolySportVenues.drop_duplicates(subset=['info','geometry'])
PolySportVenues

Unnamed: 0,FAX,Nome,Indirizzo,Zona,info,URL,TELEFONO1,ID_NIL,NIL,Location,LONG_X_4326,LAT_Y_4326,geometry,index_right,Neighborhood
0,,NON ATTRIBUITO,VIA USSI STEFANO 4,9,CENTRO SPORTIVO,,,13,GRECO - SEGNANO,"(45.5071015469858, 9.20835578748649)",9.208356,45.507102,POINT (9.20836 45.5071),30.0,Greco
1,,S.S. BAGGIO 2,VIA OLIVIERI ALESSIO 11,7,CALCIO A 11,,024599310,56,FORZE ARMATE,"(45.4655714935197, 9.11156409171595)",9.111564,45.465571,POINT (9.11156 45.46557),84.0,Forze Armate
2,0239210739,LIDO DI MILANO,PLE LOTTO LORENZO 15,8,MINI GOLF,www.milanosport.it,0239266100,66,QT 8,"(45.4805639255761, 9.14194021229098)",9.141940,45.480564,POINT (9.14194 45.48056),57.0,Qt 8
3,,COOP. AGRISPORT SEZIONE CALCIO,VIA DEL RICORDO 58,2,CENTRO SPORTIVO,,0227207742,17,ADRIANO,"(45.5093199390653, 9.23794027433961)",9.237940,45.509320,POINT (9.23794 45.50932),1.0,Adriano
4,,SOCIETA GINNASTICA MILANO 2000,VIA MARTIGNONI ADELE 1 A,2,CENTRO SPORTIVO,,02603288,12,MACIACHINI - MAGGIOLINA,"(45.492566239195, 9.19613835069693)",9.196138,45.492566,POINT (9.19614 45.49257),37.0,Maciachini - Maggiolina
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1035,0262345191,PALESTRA MASCAGNI,VIA MASCAGNI PIETRO 6,1,PALESTRA,www.milanosport.it,0262345,4,GUASTALLA,"(45.4658020924376, 9.20136672218329)",9.201367,45.465802,POINT (9.20137 45.4658),31.0,Guastalla
1036,,A.S.D. PICCOLA SCUOLA DI CIRCO,VIA CENISIO 2,8,ATTIVITA CIRCENSE,,,70,GHISOLFA,"(45.4876030737289, 9.1732839281514)",9.173284,45.487603,POINT (9.17328 45.4876),26.0,Ghisolfa
1038,,A.S.D. RUGBY MILANO,VIA LICATA 41,3,RUGBY,,,18,CIMIANO - ROTTOLE - Q.RE FELTRE,"(45.4986815797783, 9.25111177630766)",9.251112,45.498682,POINT (9.25111 45.49868),50.0,Parco Lambro - Cimiano
1039,0248713719,AREA EX PALAZZETTO DELLO SPORT,VIA DEI PICCOLOMINI SNC,7,PARCHEGGIO,www.sansiro.net,0248713713,60,STADIO - IPPODROMI,"(45.4781768596385, 9.12081301575318)",9.120813,45.478177,POINT (9.12081 45.47818),66.0,San Siro


In [52]:
# Find duplicates based on the 'geometry' column
duplicates2 = PolySportVenues[PolySportVenues.duplicated(subset=['geometry'], keep=False)]

# Inspect the duplicate rows
duplicates2.sort_values(by = 'LONG_X_4326')

Unnamed: 0,FAX,Nome,Indirizzo,Zona,info,URL,TELEFONO1,ID_NIL,NIL,Location,LONG_X_4326,LAT_Y_4326,geometry,index_right,Neighborhood
112,,SANTA MARCELLINA,VIA MOSCA ANTONIO 185,7,CENTRO SPORTIVO,,,54,MUGGIANO,"(45.4510450947062, 9.07018691135817)",9.070187,45.451045,POINT (9.07019 45.45105),41.0,Muggiano
482,,SANTA MARCELLINA,VIA MOSCA ANTONIO 185,7,TENNIS,,,54,MUGGIANO,"(45.4510450947062, 9.07018691135817)",9.070187,45.451045,POINT (9.07019 45.45105),41.0,Muggiano
480,,SANTA MARCELLINA,VIA MOSCA ANTONIO 185,7,CALCIO A 11,,,54,MUGGIANO,"(45.4510450947062, 9.07018691135817)",9.070187,45.451045,POINT (9.07019 45.45105),41.0,Muggiano
481,,SANTA MARCELLINA,VIA MOSCA ANTONIO 185,7,PALLACANESTRO,,,54,MUGGIANO,"(45.4510450947062, 9.07018691135817)",9.070187,45.451045,POINT (9.07019 45.45105),41.0,Muggiano
966,,G.S. MUGGIANO,VIA MUGGIANO 14,7,CALCIO A 7,ww.grupposportivomuggiano.it,,54,MUGGIANO,"(45.4481443742503, 9.07037129658034)",9.070371,45.448144,POINT (9.07037 45.44814),41.0,Muggiano
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
438,0270200295,MARIO SAINI,VIA CORELLI ARCANGELO 136,3,NUOTO,www.milanosport.it,027561280,24,PARCO FORLANINI - CAVRIANO,"(45.4699676940051, 9.26340502898979)",9.263405,45.469968,POINT (9.26341 45.46997),49.0,Parco Forlanini - Ortica
444,0270200295,MARIO SAINI,VIA CORELLI ARCANGELO 136,3,RUGBY,www.milanosport.it,027561280,24,PARCO FORLANINI - CAVRIANO,"(45.4699676940051, 9.26340502898979)",9.263405,45.469968,POINT (9.26341 45.46997),49.0,Parco Forlanini - Ortica
435,0270200295,MARIO SAINI,VIA CORELLI ARCANGELO 136,3,CALCIO A 5,www.milanosport.it,027561280,24,PARCO FORLANINI - CAVRIANO,"(45.4699676940051, 9.26340502898979)",9.263405,45.469968,POINT (9.26341 45.46997),49.0,Parco Forlanini - Ortica
436,0270200295,MARIO SAINI,VIA CORELLI ARCANGELO 136,3,GINNASTICA,www.milanosport.it,027561280,24,PARCO FORLANINI - CAVRIANO,"(45.4699676940051, 9.26340502898979)",9.263405,45.469968,POINT (9.26341 45.46997),49.0,Parco Forlanini - Ortica


In [54]:
PolySportVenues['info'].unique()

array(['CENTRO SPORTIVO', 'CALCIO A 11', 'MINI GOLF', 'ATLETICA',
       'PISCINA', 'NUOTO', 'PALESTRA', 'PALLACANESTRO', 'TENNIS',
       'GINNASTICA', 'CALCIO A 5', 'BOCCIODROMO', 'CENTRO BALNEARE',
       'BODY BUILDING', 'BASEBALL', 'FITNESS MACCHINE', 'CALCIO A 7',
       'GINNASTICA ATTREZZISTICA', 'TENNIS TAVOLO', 'PALLAVOLO', 'BOXE',
       'SCHERMA', 'PALLAMANO', 'ATTIVITA SUBACQUEA', 'BEACH VOLLEY',
       'HOCKEY IN LINE', 'PATTINAGGIO A ROTELLE',
       'PATTINAGGIO SU GHIACCIO', 'ROLLER BLADE', 'RUGBY', 'SCI DI FONDO',
       'SOFTBALL', "TIRO CON L'ARCO", 'TUFFI', 'FOOTBALL AMERICANO',
       'SQUASH', 'CANOTTAGGIO', 'BRIDGE', 'BOCCE', 'PALLANUOTO', 'HOCKEY',
       'SPORT EQUESTRE', 'DANZA', 'GOLF', 'TIRO A SEGNO', 'CALCIO',
       'BADMINTON', 'BALLO', 'BMX', 'CICLISMO', 'FITNESS',
       'BILIARDO-BOCCETTE', 'WINSURF', 'ATTIVITA CIRCENSE', 'PARCHEGGIO'],
      dtype=object)

In [56]:
# Group by 'geometry' and check if "CENTRO SPORTIVO" appears
has_centro_sportivo = (
    PolySportVenues.groupby("geometry")["info"]
       .apply(lambda infos: "CENTRO SPORTIVO" in infos.values)
)

# True  -> That geometry has at least one row with info = "CENTRO SPORTIVO"
# False -> That geometry does NOT have any row with info = "CENTRO SPORTIVO"

missing_centro_sportivo = has_centro_sportivo[~has_centro_sportivo]
print(missing_centro_sportivo)

geometry
POINT (9.12373 45.44827)    False
POINT (9.12219 45.44701)    False
POINT (9.15029 45.45089)    False
POINT (9.16548 45.45686)    False
POINT (9.16021 45.46101)    False
                            ...  
POINT (9.19409 45.43687)    False
POINT (9.20918 45.44905)    False
POINT (9.21353 45.44354)    False
POINT (9.18353 45.43266)    False
POINT (9.17316 45.42512)    False
Name: info, Length: 124, dtype: bool


In [58]:
only_one_centro_sportivo = (
    PolySportVenues.groupby("geometry")["info"]
       .apply(lambda group_info: len(group_info) == 1 and group_info.iloc[0] == "CENTRO SPORTIVO")
)

# True means that geometry has exactly one row and it is "CENTRO SPORTIVO"
# False otherwise.

geoms_only_one_centro = only_one_centro_sportivo[only_one_centro_sportivo].index
print(geoms_only_one_centro)

Index([ POINT (9.13050541386769 45.455718774036),
       POINT (9.11136807777138 45.4582460489244),
       POINT (9.20835578748649 45.5071015469858),
       POINT (9.22909319309963 45.4476095042998),
       POINT (9.19055665156447 45.4473595824136),
        POINT (9.16774499725052 45.445597424879),
       POINT (9.18790696740882 45.4319819313502)],
      dtype='geometry', name='geometry')


In [60]:
group_sizes = PolySportVenues.groupby("geometry")["info"].transform("size")


#    - Keep if geometry size == 1 (single-entry group)
#    - OR keep if geometry size > 1 AND info != CENTRO SPORTIVO (multi-entry group)
condition = (group_sizes == 1) | ((group_sizes > 1) & (PolySportVenues["info"] != "CENTRO SPORTIVO"))

# filter the original GeoDataFrame
PolySportVenues = PolySportVenues[condition]
PolySportVenues.sort_values('LONG_X_4326')

Unnamed: 0,FAX,Nome,Indirizzo,Zona,info,URL,TELEFONO1,ID_NIL,NIL,Location,LONG_X_4326,LAT_Y_4326,geometry,index_right,Neighborhood
482,,SANTA MARCELLINA,VIA MOSCA ANTONIO 185,7,TENNIS,,,54,MUGGIANO,"(45.4510450947062, 9.07018691135817)",9.070187,45.451045,POINT (9.07019 45.45105),41.0,Muggiano
480,,SANTA MARCELLINA,VIA MOSCA ANTONIO 185,7,CALCIO A 11,,,54,MUGGIANO,"(45.4510450947062, 9.07018691135817)",9.070187,45.451045,POINT (9.07019 45.45105),41.0,Muggiano
481,,SANTA MARCELLINA,VIA MOSCA ANTONIO 185,7,PALLACANESTRO,,,54,MUGGIANO,"(45.4510450947062, 9.07018691135817)",9.070187,45.451045,POINT (9.07019 45.45105),41.0,Muggiano
966,,G.S. MUGGIANO,VIA MUGGIANO 14,7,CALCIO A 7,ww.grupposportivomuggiano.it,,54,MUGGIANO,"(45.4481443742503, 9.07037129658034)",9.070371,45.448144,POINT (9.07037 45.44814),41.0,Muggiano
967,,G.S. MUGGIANO,VIA MUGGIANO 14,7,CALCIO A 11,ww.grupposportivomuggiano.it,,54,MUGGIANO,"(45.4481443742503, 9.07037129658034)",9.070371,45.448144,POINT (9.07037 45.44814),41.0,Muggiano
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
435,0270200295,MARIO SAINI,VIA CORELLI ARCANGELO 136,3,CALCIO A 5,www.milanosport.it,027561280,24,PARCO FORLANINI - CAVRIANO,"(45.4699676940051, 9.26340502898979)",9.263405,45.469968,POINT (9.26341 45.46997),49.0,Parco Forlanini - Ortica
434,0270200295,MARIO SAINI,VIA CORELLI ARCANGELO 136,3,CALCIO A 11,www.milanosport.it,027561280,24,PARCO FORLANINI - CAVRIANO,"(45.4699676940051, 9.26340502898979)",9.263405,45.469968,POINT (9.26341 45.46997),49.0,Parco Forlanini - Ortica
433,0270200295,MARIO SAINI,VIA CORELLI ARCANGELO 136,3,BEACH VOLLEY,www.milanosport.it,027561280,24,PARCO FORLANINI - CAVRIANO,"(45.4699676940051, 9.26340502898979)",9.263405,45.469968,POINT (9.26341 45.46997),49.0,Parco Forlanini - Ortica
441,0270200295,MARIO SAINI,VIA CORELLI ARCANGELO 136,3,PATTINAGGIO A ROTELLE,www.milanosport.it,027561280,24,PARCO FORLANINI - CAVRIANO,"(45.4699676940051, 9.26340502898979)",9.263405,45.469968,POINT (9.26341 45.46997),49.0,Parco Forlanini - Ortica


In [67]:
# Update 'info' column
PolySportVenues["info"] = PolySportVenues["info"].replace({
    "CALCIO A 5": "CALCIO",
    "CALCIO A 7": "CALCIO",
    "CALCIO A 11": "CALCIO",
    "NUOTO": "PISCINA",
    "FITNESS MACCHINE": "FITNESS"
})

# Remove duplicates of CALCIO, PISCINA and FITNESS
PolySportVenues = PolySportVenues.drop_duplicates(subset=['info','geometry'])

# Remove rows where info == "PARCHEGGIO"
PolySportVenues = PolySportVenues[PolySportVenues["info"] != "PARCHEGGIO"].copy()

PolySportVenues

Unnamed: 0,Nome,Indirizzo,info,geometry,Neighborhood
0,NON ATTRIBUITO,VIA USSI STEFANO 4,CENTRO SPORTIVO,POINT (9.20836 45.5071),Greco
1,S.S. BAGGIO 2,VIA OLIVIERI ALESSIO 11,CALCIO,POINT (9.11156 45.46557),Forze Armate
2,LIDO DI MILANO,PLE LOTTO LORENZO 15,MINI GOLF,POINT (9.14194 45.48056),Qt 8
3,PRO PATRIA MILANO S.R.L.,VLE SARCA 205,ATLETICA,POINT (9.21025 45.5188),Bicocca
4,NON ATTRIBUITO,VIA DEGLI ANEMONI 8,CENTRO SPORTIVO,POINT (9.13051 45.45572),Lorenteggio
...,...,...,...,...,...
683,TURNO SETTE,VIA VALLA LORENZO SNC,CENTRO SPORTIVO,POINT (9.18791 45.43198),Stadera
684,PALESTRA MASCAGNI,VIA MASCAGNI PIETRO 6,PALESTRA,POINT (9.20137 45.4658),Guastalla
685,A.S.D. PICCOLA SCUOLA DI CIRCO,VIA CENISIO 2,ATTIVITA CIRCENSE,POINT (9.17328 45.4876),Ghisolfa
686,A.S.D. RUGBY MILANO,VIA LICATA 41,RUGBY,POINT (9.25111 45.49868),Parco Lambro - Cimiano


In [69]:
# Keeping only relevant variables
PolySportVenues = PolySportVenues[["Nome", "Indirizzo", "info",
                             "geometry", "Neighborhood"]].reset_index(drop=True)


### There are no missing values

In [72]:
# Rows with NaN
nan_rows = PolySportVenues[PolySportVenues.isna().any(axis=1)]

nan_rows

Unnamed: 0,Nome,Indirizzo,info,geometry,Neighborhood


In [74]:
# Saving the file
PolySportVenues.to_file("PolySportVenues.geojson", driver="GeoJSON")

## Public Schools

In [77]:
# Load the GeoDataFrame
publicschools = gpd.read_file("geojson_files/ds1305_elenco_scuole_statali_as2020_21_final_.geojson")

# Ensure both GeoDataFrames have the same CRS
if gdf_combined.crs != publicschools.crs:
    publicschools = publicschools.to_crs(gdf_combined.crs)

# Perform the spatial join
PolyPublicSchools = gpd.sjoin(publicschools, gdf_combined, how="left", predicate="within")
PolyPublicSchools = PolyPublicSchools[~PolyPublicSchools["Neighborhood"].isna()]

# Inspect the resulting GeoDataFrame
PolyPublicSchools

Unnamed: 0,ANNOSCOLASTICO,AREAGEOGRAFICA,CODICEISTITUTORIFERIMENTO,DENOMINAZIONEISTITUTORIFERIMENTO,CODICESCUOLA,DENOMINAZIONESCUOLA,INDIRIZZOSCUOLA,DESCRIZIONECARATTERISTICASCUOLA,DESCRIZIONETIPOLOGIAGRADOISTRUZIONESCUOLA,INDICAZIONESEDEDIRETTIVO,...,CAP,MUNICIPIO,ID_NIL,NIL,Location,LONG_X_4326,LAT_Y_4326,geometry,index_right,Neighborhood
0,202021,NORD OVEST,MIPC20000G,TITO LIVIO,MIPC20000G,TITO LIVIO,VIA CIRCO 4,NORMALE,LICEO CLASSICO,SI,...,20123,1,1,DUOMO,"(45.46163700000005, 9.180742500000065)",9.180743,45.461637,POINT (9.18074 45.46164),21.0,Duomo
1,202021,NORD OVEST,MIIC8BB008,IC A. DIAZ/MILANO,MIEE8BB02B,E.DE MARCHI,VIA SANT'ORSOLA 15,NORMALE,SCUOLA PRIMARIA,NO,...,20123,1,1,DUOMO,"(45.46259100000003, 9.180513000000076)",9.180513,45.462591,POINT (9.18051 45.46259),21.0,Duomo
2,202021,NORD OVEST,MIIC8BB008,IC A. DIAZ/MILANO,MIEE8BB01A,A.DIAZ - MILANO,VIA CROCEFISSO 15,NORMALE,SCUOLA PRIMARIA,NO,...,20122,1,1,DUOMO,"(45.45694800000007, 9.185886000000039)",9.185886,45.456948,POINT (9.18589 45.45695),21.0,Duomo
3,202021,NORD OVEST,MIIC8D3009,I.C. A. SCARPA MILANO,MIEE8D302C,FONDAZIONE IRCCS-IST. NAZ. DEI TUMORI,VIA VENEZIAN 1,C/O IST. OSPEDALIERO,SCUOLA PRIMARIA,NO,...,20121,1,1,DUOMO,"(45.46728000000007, 9.197793000000047)",9.197793,45.467280,POINT (9.19779 45.46728),21.0,Duomo
4,202021,NORD OVEST,MIIC8BD00X,MILANO SPIGA ROSSARICASTIGLIONI,MIMM8BD011,ICS MILANO SPIGA (PARINI SMS),VIA S.SPIRITO 21,NORMALE,SCUOLA PRIMO GRADO,NO,...,20121,1,1,DUOMO,"(45.470574000000056, 9.195606000000055)",9.195606,45.470574,POINT (9.19561 45.47057),21.0,Duomo
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
467,202021,NORD OVEST,MIIC8CF006,IC C. CANTU'/MILANO,MIAA8CF013,VIA DORA BALTEA,VIA DORA BALTEA 24,NORMALE,SCUOLA INFANZIA,NO,...,20161,9,83,BRUZZANO,"(45.52858800000007, 9.167278500000066)",9.167279,45.528588,POINT (9.16728 45.52859),10.0,Bruzzano
468,202021,NORD OVEST,MIIC8CF006,IC C. CANTU'/MILANO,MIEE8CF018,C.CANTU' - MILANO,VIA DEI BRASCHI 12,NORMALE,SCUOLA PRIMARIA,NO,...,20161,9,83,BRUZZANO,"(45.52641900000003, 9.176679000000036)",9.176679,45.526419,POINT (9.17668 45.52642),10.0,Bruzzano
469,202021,NORD OVEST,MIIC8CF006,IC C. CANTU'/MILANO,MIIC8CF006,IC C. CANTU'/MILANO,VIA DEI BRASCHI 12,NORMALE,ISTITUTO COMPRENSIVO,SI,...,20161,9,83,BRUZZANO,"(45.52641900000003, 9.176679000000036)",9.176679,45.526419,POINT (9.17668 45.52642),10.0,Bruzzano
470,202021,NORD OVEST,MIIC8CF006,IC C. CANTU'/MILANO,MIMM8CF017,SABA,VIA DEL VOLGA 3,NORMALE,SCUOLA PRIMO GRADO,NO,...,20161,9,83,BRUZZANO,"(45.52551900000003, 9.172372500000051)",9.172373,45.525519,POINT (9.17237 45.52552),10.0,Bruzzano


In [79]:
# Find duplicates based on the 'geometry' column
duplicates = PolyPublicSchools[PolyPublicSchools.duplicated(subset=['geometry'], keep= False)]

# Inspect the duplicate rows
duplicates

Unnamed: 0,ANNOSCOLASTICO,AREAGEOGRAFICA,CODICEISTITUTORIFERIMENTO,DENOMINAZIONEISTITUTORIFERIMENTO,CODICESCUOLA,DENOMINAZIONESCUOLA,INDIRIZZOSCUOLA,DESCRIZIONECARATTERISTICASCUOLA,DESCRIZIONETIPOLOGIAGRADOISTRUZIONESCUOLA,INDICAZIONESEDEDIRETTIVO,...,CAP,MUNICIPIO,ID_NIL,NIL,Location,LONG_X_4326,LAT_Y_4326,geometry,index_right,Neighborhood
4,202021,NORD OVEST,MIIC8BD00X,MILANO SPIGA ROSSARICASTIGLIONI,MIMM8BD011,ICS MILANO SPIGA (PARINI SMS),VIA S.SPIRITO 21,NORMALE,SCUOLA PRIMO GRADO,NO,...,20121,1,1,DUOMO,"(45.470574000000056, 9.195606000000055)",9.195606,45.470574,POINT (9.19561 45.47057),21.0,Duomo
5,202021,NORD OVEST,MIIC8BD00X,MILANO SPIGA ROSSARICASTIGLIONI,MIIC8BD00X,MILANO SPIGA ROSSARICASTIGLIONI,VIA S. SPIRITO 21,NORMALE,ISTITUTO COMPRENSIVO,SI,...,20121,1,1,DUOMO,"(45.470574000000056, 9.195606000000055)",9.195606,45.470574,POINT (9.19561 45.47057),21.0,Duomo
9,202021,NORD OVEST,MIIC814009,IC CAVALIERI/MILANO,MIEE81402C,AZ. OSP. FATEBENEFRATELLI E OFTALMICO,CORSO DI PORTA NUOVA 23,C/O IST. OSPEDALIERO,SCUOLA PRIMARIA,NO,...,20121,1,2,BRERA,"(45.478575000000035, 9.19216350000005)",9.192163,45.478575,POINT (9.19216 45.47858),9.0,Brera
14,202021,NORD OVEST,MIIC814009,IC CAVALIERI/MILANO,MIMM81402B,OSPEDALE FATEBENEFRATELLI - MI,CORSO PORTA NUOVA 23,C/O IST. OSPEDALIERO,SCUOLA PRIMO GRADO,NO,...,20121,1,2,BRERA,"(45.478575000000035, 9.19216350000005)",9.192163,45.478575,POINT (9.19216 45.47858),9.0,Brera
18,202021,NORD OVEST,MIPC110009,E.SETTI CARRARO DALLA CHIESA C/O EDUC.,MIPC110009,E.SETTI CARRARO DALLA CHIESA C/O EDUC.,VIA DELLA PASSIONE 12,ANN. A EDUCANDATO,LICEO CLASSICO,NO,...,20122,1,4,GUASTALLA,"(45.46498500000007, 9.202140000000043)",9.202140,45.464985,POINT (9.20214 45.46498),31.0,Guastalla
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
463,202021,NORD OVEST,MIIS038002,GIUSEPPE LUIGI LAGRANGE,MITF03801E,GIUSEPPE LUIGI LAGRANGE,VIA LITTA MODIGNANI65,NORMALE,ISTITUTO TECNICO INDUSTRIALE,NO,...,20161,9,81,BOVISASCA,"(45.52202700000004, 9.15642900000006)",9.156429,45.522027,POINT (9.15643 45.52203),7.0,Bovisasca
464,202021,NORD OVEST,MIIS03600A,VILFREDO FEDERICO PARETO,MITD03601L,VILFREDO FEDERICO PARETO,VIA LITTA MODIGNANI 55,NORMALE,ISTITUTO TECNICO COMMERCIALE,NO,...,20161,9,81,BOVISASCA,"(45.52285500000005, 9.160749000000067)",9.160749,45.522855,POINT (9.16075 45.52286),7.0,Bovisasca
465,202021,NORD OVEST,MIIS03600A,VILFREDO FEDERICO PARETO,MIRA03601A,I.P.A.A. (SEZ.AS.) V. F. PARETO,VIA LITTA MODIGNANI 55,NORMALE,IST PROF PER L'AGRICOLTURA E L'AMBIENTE,NO,...,20161,9,81,BOVISASCA,"(45.52285500000005, 9.160749000000067)",9.160749,45.522855,POINT (9.16075 45.52286),7.0,Bovisasca
468,202021,NORD OVEST,MIIC8CF006,IC C. CANTU'/MILANO,MIEE8CF018,C.CANTU' - MILANO,VIA DEI BRASCHI 12,NORMALE,SCUOLA PRIMARIA,NO,...,20161,9,83,BRUZZANO,"(45.52641900000003, 9.176679000000036)",9.176679,45.526419,POINT (9.17668 45.52642),10.0,Bruzzano


In [81]:
# There are schools with more than one grade
PolyPublicSchools[PolyPublicSchools['DENOMINAZIONEISTITUTORIFERIMENTO']=='MILANO SPIGA ROSSARICASTIGLIONI']

Unnamed: 0,ANNOSCOLASTICO,AREAGEOGRAFICA,CODICEISTITUTORIFERIMENTO,DENOMINAZIONEISTITUTORIFERIMENTO,CODICESCUOLA,DENOMINAZIONESCUOLA,INDIRIZZOSCUOLA,DESCRIZIONECARATTERISTICASCUOLA,DESCRIZIONETIPOLOGIAGRADOISTRUZIONESCUOLA,INDICAZIONESEDEDIRETTIVO,...,CAP,MUNICIPIO,ID_NIL,NIL,Location,LONG_X_4326,LAT_Y_4326,geometry,index_right,Neighborhood
4,202021,NORD OVEST,MIIC8BD00X,MILANO SPIGA ROSSARICASTIGLIONI,MIMM8BD011,ICS MILANO SPIGA (PARINI SMS),VIA S.SPIRITO 21,NORMALE,SCUOLA PRIMO GRADO,NO,...,20121,1,1,DUOMO,"(45.470574000000056, 9.195606000000055)",9.195606,45.470574,POINT (9.19561 45.47057),21.0,Duomo
5,202021,NORD OVEST,MIIC8BD00X,MILANO SPIGA ROSSARICASTIGLIONI,MIIC8BD00X,MILANO SPIGA ROSSARICASTIGLIONI,VIA S. SPIRITO 21,NORMALE,ISTITUTO COMPRENSIVO,SI,...,20121,1,1,DUOMO,"(45.470574000000056, 9.195606000000055)",9.195606,45.470574,POINT (9.19561 45.47057),21.0,Duomo
6,202021,NORD OVEST,MIIC8BD00X,MILANO SPIGA ROSSARICASTIGLIONI,MIEE8BD012,ICS MILANO SPIGA SPIGA,VIA DELLA SPIGA 29,NORMALE,SCUOLA PRIMARIA,NO,...,20121,1,1,DUOMO,"(45.471123000000034, 9.195651000000055)",9.195651,45.471123,POINT (9.19565 45.47112),21.0,Duomo
12,202021,NORD OVEST,MIIC8BD00X,MILANO SPIGA ROSSARICASTIGLIONI,MIEE8BD023,IC MILANO SPIGA A. DA GIUSSANO,BASTIONI DI PORTA NUOVA 4,NORMALE,SCUOLA PRIMARIA,NO,...,20121,1,2,BRERA,"(45.48002400000007, 9.19118700000007)",9.191187,45.480024,POINT (9.19119 45.48002),9.0,Brera


In [83]:
# Keeping only relevant variables
PolyPublicSchools = PolyPublicSchools[["DENOMINAZIONESCUOLA", "INDIRIZZOSCUOLA", "DESCRIZIONECARATTERISTICASCUOLA",
                                       "DESCRIZIONETIPOLOGIAGRADOISTRUZIONESCUOLA", "MUNICIPIO",
                                       "geometry", "Neighborhood"]]


### There are no missing values

In [86]:
# Rows with NaN
nan_rows = PolyPublicSchools[PolyPublicSchools.isna().any(axis=1)]

nan_rows

Unnamed: 0,DENOMINAZIONESCUOLA,INDIRIZZOSCUOLA,DESCRIZIONECARATTERISTICASCUOLA,DESCRIZIONETIPOLOGIAGRADOISTRUZIONESCUOLA,MUNICIPIO,geometry,Neighborhood


## Private Schools

In [89]:
# Load the GeoDataFrame
privateschools = gpd.read_file("geojson_files/ds1582_elenco_scuole_paritarie_2020-21.geojson")

# Ensure both GeoDataFrames have the same CRS
if gdf_combined.crs != privateschools.crs:
    privateschools = privateschools.to_crs(gdf_combined.crs)

# Perform the spatial join
PolyPrivateSchools = gpd.sjoin(privateschools, gdf_combined, how="left", predicate="within")
PolyPrivateSchools = PolyPrivateSchools[~PolyPrivateSchools["Neighborhood"].isna()]

# Inspect the resulting GeoDataFrame
PolyPrivateSchools

Unnamed: 0,ANNOSCOLASTICO,CODICESCUOLA,AREAGEOGRAFICA,DENOMINAZIONESCUOLA,INDIRIZZOSCUOLA,CODICECOMUNESCUOLA,DESCRIZIONECOMUNE,DESCRIZIONETIPOLOGIAGRADOISTRUZIONESCUOLA,INDIRIZZOEMAILSCUOLA,INDIRIZZOPECSCUOLA,...,CAP,MUNICIPIO,ID_NIL,NIL,Location,LONG_X_4326,LAT_Y_4326,geometry,index_right,Neighborhood
0,2020-21,MI1AEN500Q,NORD OVEST,SCUOLA DELL'INFANZIA DE CURTIS,VIA DE CURTIS 7,F205,MILANO,SCUOLA INFANZIA NON STATALE,ED.UE11@comune.milano.it,Non Disponibile,...,20128,2,17,ADRIANO,"(45.51610500000004, 9.241659000000027)",9.241659,45.516105,POINT (9.24166 45.5161),1.0,Adriano
1,2020-21,MI1A03600L,NORD OVEST,SCUOLA DELL'INFANZIA COMUNALE VIA CESARI42,VIA CESARI42,F205,MILANO,SCUOLA INFANZIA NON STATALE,Non Disponibile,ED.INFANZIAUE74@COMUNE.MILANO.IT,...,20162,9,14,NIGUARDA - CA' GRANDA - PRATO CENTENARO - Q.RE...,"(45.519363000000055, 9.185922000000062)",9.185922,45.519363,POINT (9.18592 45.51936),43.0,Niguarda - Cà Granda
3,2020-21,MIPS16500L,NORD OVEST,LICEO SCIENTIFICO S. AMBROGIO,V.COPERNICO 9,F205,MILANO,SCUOLA SEC. SECONDO GRADO NON STATALE,Non Disponibile,Non Disponibile,...,20125,2,10,STAZIONE CENTRALE - PONTE SEVESO,"(45.48806100000007, 9.201118500000064)",9.201119,45.488061,POINT (9.20112 45.48806),14.0,Centrale
4,2020-21,MI1M06000L,NORD OVEST,SCUOLA SECONDARIA DI 1Â° GR. ISTITUTO GONZAGA,V.VITRUVIO 41,F205,MILANO,SCUOLA SEC. PRIMO GRADO NON STATALE,Non Disponibile,ISTITUTOGONZAGA@PEC.GONZAGA-MILANO.IT,...,20124,2,10,STAZIONE CENTRALE - PONTE SEVESO,"(45.482589000000075, 9.206046000000072)",9.206046,45.482589,POINT (9.20605 45.48259),14.0,Centrale
5,2020-21,MI1A09300R,NORD OVEST,SCUOLA DELL'INFANZIA COMUNALE VIA MILESI2,VIA MILESI2,F205,MILANO,SCUOLA INFANZIA NON STATALE,Non Disponibile,Non Disponibile,...,20152,7,56,FORZE ARMATE,"(45.45990000000006, 9.103392000000042)",9.103392,45.459900,POINT (9.10339 45.4599),84.0,Forze Armate
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
456,2020-21,MI1A020003,NORD OVEST,SCUOLA DELL'INFANZIA COMUNALE VIA BEZZECCA16,VIA BEZZECCA16,F205,MILANO,SCUOLA INFANZIA NON STATALE,ED.UE22@comune.milano.it,Non Disponibile,...,20135,4,26,XXII MARZO,"(45.46074600000003, 9.212449500000048)",9.212449,45.460746,POINT (9.21245 45.46075),83.0,XXII Marzo
457,2020-21,MIPLVI500L,NORD OVEST,LICEO LINGUISTICO SCUOLA EUROPA,VIALE MAJNO 39,F205,MILANO,SCUOLA SEC. SECONDO GRADO NON STATALE,info@scuolaeuropa.it,Non Disponibile,...,20122,1,4,GUASTALLA,"(45.47240100000005, 9.205083000000059)",9.205083,45.472401,POINT (9.20508 45.4724),31.0,Guastalla
458,2020-21,MI1A240002,NORD OVEST,"SCUOLA DELL'INFANZIA ""BUON PASTORE""",VIA SAN VITTORE 29,F205,MILANO,SCUOLA INFANZIA NON STATALE,ist.buonpastore@tin.it,Non Disponibile,...,20123,1,7,MAGENTA - S. VITTORE,"(45.46344600000003, 9.169632000000036)",9.169632,45.463446,POINT (9.16963 45.46345),38.0,Magenta - San Vittore
459,2020-21,MI1A088009,NORD OVEST,SCUOLA DELL'INFANZIA COMUNALE VIA MASSAUA15,VIA MASSAUA15,F205,MILANO,SCUOLA INFANZIA NON STATALE,Non Disponibile,Non Disponibile,...,20146,7,52,BANDE NERE,"(45.460503000000074, 9.147406500000045)",9.147407,45.460503,POINT (9.14741 45.4605),4.0,Bande Nere


In [91]:
# Find duplicates based on the 'geometry' column
duplicates = PolyPrivateSchools[PolyPrivateSchools.duplicated(subset=['geometry'], keep=False)]

# Inspect the duplicate rows
duplicates.sort_values(by='LONG_X_4326')

Unnamed: 0,ANNOSCOLASTICO,CODICESCUOLA,AREAGEOGRAFICA,DENOMINAZIONESCUOLA,INDIRIZZOSCUOLA,CODICECOMUNESCUOLA,DESCRIZIONECOMUNE,DESCRIZIONETIPOLOGIAGRADOISTRUZIONESCUOLA,INDIRIZZOEMAILSCUOLA,INDIRIZZOPECSCUOLA,...,CAP,MUNICIPIO,ID_NIL,NIL,Location,LONG_X_4326,LAT_Y_4326,geometry,index_right,Neighborhood
216,2020-21,MI1M104005,NORD OVEST,SCUOLA SECONDARIA DI 1Â° GR. MADRE BUCCHI,VIA PALMI25,F205,MILANO,SCUOLA SEC. PRIMO GRADO NON STATALE,Non Disponibile,SEGRETERIA@PEC.ISTITUTOMADREBUCCHI.IT,...,20152,7,56,FORZE ARMATE,"(45.45851400000004, 9.098316000000068)",9.098316,45.458514,POINT (9.09832 45.45851),84.0,Forze Armate
301,2020-21,MI1A231007,NORD OVEST,SCUOLA DELL'INFANZIA PARROCCHIALE MADRE BUCCHI,VIA PALMI25,F205,MILANO,SCUOLA INFANZIA NON STATALE,Non Disponibile,CERTIFICATA@PEC.SCUOLAMONTESSORI.COM,...,20152,7,56,FORZE ARMATE,"(45.45851400000004, 9.098316000000068)",9.098316,45.458514,POINT (9.09832 45.45851),84.0,Forze Armate
326,2020-21,MI1E02600X,NORD OVEST,"SCUOLA ELEM. PAR. ""MADRE BUCCHI""",VIA PALMI 25,F205,MILANO,SCUOLA PRIMARIA NON STATALE,Non Disponibile,SEGRETERIA@PEC.ISTITUTOMADREBUCCHI.IT,...,20152,7,56,FORZE ARMATE,"(45.45851400000004, 9.098316000000068)",9.098316,45.458514,POINT (9.09832 45.45851),84.0,Forze Armate
413,2020-21,MI1A17800G,NORD OVEST,"SCUOLA DELL'INFANZIA ""CASA DEI BAMBINI SCUOLA ...",VIA AROSIO3,F205,MILANO,SCUOLA INFANZIA NON STATALE,Non Disponibile,casadeibambinimontessori@lamiapec.it,...,20148,7,60,STADIO - IPPODROMI,"(45.47332800000004, 9.124870500000043)",9.124871,45.473328,POINT (9.12487 45.47333),66.0,San Siro
325,2020-21,MI1E01300T,NORD OVEST,"SCUOLA ELEM.PAR.""CASA DEI BAMBINI SCUOLA MONTE...",VIA AROSIO N.3,F205,MILANO,SCUOLA PRIMARIA NON STATALE,Non Disponibile,casadeibambinimontessori@lamiapec.it,...,20148,7,60,STADIO - IPPODROMI,"(45.47332800000004, 9.124870500000043)",9.124871,45.473328,POINT (9.12487 45.47333),66.0,San Siro
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
437,2020-21,MIPS295009,NORD OVEST,LICEO SCIENTIFICO FONDAZIONE SACRO CUORE,VIA ROMBON 78,F205,MILANO,SCUOLA SEC. SECONDO GRADO NON STATALE,Non Disponibile,FONDAZIONESACROCUORE@EDUPEC.IT,...,20134,3,23,LAMBRATE - ORTICA,"(45.48618000000005, 9.252153000000021)",9.252153,45.486180,POINT (9.25215 45.48618),33.0,Lambrate
72,2020-21,MIPC06500L,NORD OVEST,LICEO CLASSICO FONDAZIONE SACRO CUORE,VIA ROMBON 78,F205,MILANO,SCUOLA SEC. SECONDO GRADO NON STATALE,Non Disponibile,FONDAZIONESACROCUORE@EDUPEC.IT,...,20134,3,23,LAMBRATE - ORTICA,"(45.48618000000005, 9.252153000000021)",9.252153,45.486180,POINT (9.25215 45.48618),33.0,Lambrate
205,2020-21,MI1M07300P,NORD OVEST,SCUOLA SECONDARIA DI 1Â° GR. FONDAZIONE SACRO ...,VIA ROMBON78,F205,MILANO,SCUOLA SEC. PRIMO GRADO NON STATALE,Non Disponibile,FONDAZIONESACROCUORE@EDUPEC.IT,...,20134,3,23,LAMBRATE - ORTICA,"(45.48618000000005, 9.252153000000021)",9.252153,45.486180,POINT (9.25215 45.48618),33.0,Lambrate
257,2020-21,MIPC25500Q,NORD OVEST,LICEO CLASSICO SAN RAFFAELE,VIA OLGETTINA 46,F205,MILANO,SCUOLA SEC. SECONDO GRADO NON STATALE,direzione@liceosanraffaele.it,Non Disponibile,...,20132,3,18,CIMIANO - ROTTOLE - Q.RE FELTRE,"(45.50873400000006, 9.266647500000033)",9.266647,45.508734,POINT (9.26665 45.50873),50.0,Parco Lambro - Cimiano


In [93]:
# There are schools with different school tracks for the same schools
PolyPrivateSchools[PolyPrivateSchools['INDIRIZZOSCUOLA']=='V.COPERNICO 9'][['DESCRIZIONETIPOLOGIAGRADOISTRUZIONESCUOLA','DENOMINAZIONESCUOLA']]

Unnamed: 0,DESCRIZIONETIPOLOGIAGRADOISTRUZIONESCUOLA,DENOMINAZIONESCUOLA
3,SCUOLA SEC. SECONDO GRADO NON STATALE,LICEO SCIENTIFICO S. AMBROGIO
451,SCUOLA SEC. SECONDO GRADO NON STATALE,LICEO CLASSICO S. AMBROGIO


In [95]:
# Keeping only relevant variables
PolyPrivateSchools = PolyPrivateSchools[["DENOMINAZIONESCUOLA", "INDIRIZZOSCUOLA",
                                       "DESCRIZIONETIPOLOGIAGRADOISTRUZIONESCUOLA", "MUNICIPIO",
                                       "geometry", "Neighborhood"]].copy()
PolyPrivateSchools["DESCRIZIONECARATTERISTICASCUOLA"] = "PARITARIA"

### There are no missing values

In [98]:
# Rows with NaN
nan_rows = PolyPrivateSchools[PolyPrivateSchools.isna().any(axis=1)]

nan_rows

Unnamed: 0,DENOMINAZIONESCUOLA,INDIRIZZOSCUOLA,DESCRIZIONETIPOLOGIAGRADOISTRUZIONESCUOLA,MUNICIPIO,geometry,Neighborhood,DESCRIZIONECARATTERISTICASCUOLA


## Concat Public and Private Schools

In [101]:
PolyPrivateSchools = PolyPrivateSchools[[
    "DENOMINAZIONESCUOLA", "INDIRIZZOSCUOLA", "DESCRIZIONECARATTERISTICASCUOLA",
    "DESCRIZIONETIPOLOGIAGRADOISTRUZIONESCUOLA", "MUNICIPIO",
    "geometry", "Neighborhood"
]]

PolyPublicSchools = PolyPublicSchools[[
    "DENOMINAZIONESCUOLA", "INDIRIZZOSCUOLA", "DESCRIZIONECARATTERISTICASCUOLA",
    "DESCRIZIONETIPOLOGIAGRADOISTRUZIONESCUOLA", "MUNICIPIO",
    "geometry", "Neighborhood"
]]

# Combine the two GeoDataFrames
PolySchools = gpd.GeoDataFrame(pd.concat([PolyPrivateSchools, PolyPublicSchools], ignore_index=True))

# Inspect the combined GeoDataFrame
print(PolySchools.head())
print(f"Total rows: {len(PolySchools)}")

# Saving the file
PolySchools.to_file("PolySchools.geojson", driver="GeoJSON")

                             DENOMINAZIONESCUOLA  INDIRIZZOSCUOLA  \
0                 SCUOLA DELL'INFANZIA DE CURTIS  VIA DE CURTIS 7   
1     SCUOLA DELL'INFANZIA COMUNALE VIA CESARI42     VIA CESARI42   
2                  LICEO SCIENTIFICO S. AMBROGIO    V.COPERNICO 9   
3  SCUOLA SECONDARIA DI 1Â° GR. ISTITUTO GONZAGA    V.VITRUVIO 41   
4      SCUOLA DELL'INFANZIA COMUNALE VIA MILESI2      VIA MILESI2   

  DESCRIZIONECARATTERISTICASCUOLA DESCRIZIONETIPOLOGIAGRADOISTRUZIONESCUOLA  \
0                       PARITARIA               SCUOLA INFANZIA NON STATALE   
1                       PARITARIA               SCUOLA INFANZIA NON STATALE   
2                       PARITARIA     SCUOLA SEC. SECONDO GRADO NON STATALE   
3                       PARITARIA       SCUOLA SEC. PRIMO GRADO NON STATALE   
4                       PARITARIA               SCUOLA INFANZIA NON STATALE   

  MUNICIPIO                  geometry          Neighborhood  
0         2   POINT (9.24166 45.5161)           

## Universities

In [104]:
# Load the GeoDataFrame
universities = gpd.read_file("geojson_files/ds94_universita__final.geojson")

# Ensure both GeoDataFrames have the same CRS
if gdf_combined.crs != universities.crs:
    universities = universities.to_crs(gdf_combined.crs)

# Perform the spatial join
PolyUniversity = gpd.sjoin(universities, gdf_combined, how="left", predicate="within")
PolyUniversity = PolyUniversity[~PolyUniversity["Neighborhood"].isna()]

# Inspect the resulting GeoDataFrame
PolyUniversity

Unnamed: 0,COD_VIA,CIVICO,BARRATO,X,Y,DENOMINAZ,CATEGORIA,TIPOLOGIA,SOTTOTIPO,PROPRIETA,...,T_SEDE,MUNICIPIO,ID_NIL,NIL,Location,LONG_X_4326,LAT_Y_4326,geometry,index_right,Neighborhood
0,1523,3,,1514766.63459,5039559.3158,UNIVERSITA' DEGLI STUDI DI MILANO,ISTRUZIONE,UNIVERSITA',UNIVERSITA',PUBBLICA,...,EX ISTITUTO,9,14,NIGUARDA - CA' GRANDA - PRATO CENTENARO - Q.RE...,"(45.5092296526866, 9.18868084450537)",9.188681,45.509230,POINT (9.18868 45.50923),43.0,Niguarda - Cà Granda
1,4012,1,,1515116.2967,5033570.58859,UNIVERSITA' DEGLI STUDI DI MILANO,ISTRUZIONE,UNIVERSITA',UNIVERSITA',PUBBLICA,...,EX ISTITUTO,1,5,PORTA VIGENTINA - PORTA LODOVICA,"(45.4553179955712, 9.19297324638358)",9.192973,45.455318,POINT (9.19297 45.45532),80.0,Vigentina
2,2177,42,,1517013.18358,5036277.91279,UNIVERSITA' DEGLI STUDI DI MILANO,ISTRUZIONE,UNIVERSITA',UNIVERSITA',PUBBLICA,...,SEDE DIDATTICA,3,21,BUENOS AIRES - PORTA VENEZIA - PORTA MONFORTE,"(45.4796430022321, 9.21732651188301)",9.217327,45.479643,POINT (9.21733 45.47964),11.0,Buenos Aires - Venezia
3,2177,42,,1517013.18358,5036277.91279,UNIVERSITA' DEGLI STUDI DI MILANO,ISTRUZIONE,UNIVERSITA',UNIVERSITA',PUBBLICA,...,EX ISTITUTO,3,21,BUENOS AIRES - PORTA VENEZIA - PORTA MONFORTE,"(45.4796430022321, 9.21732651188301)",9.217327,45.479643,POINT (9.21733 45.47964),11.0,Buenos Aires - Venezia
4,530,1,,1514606.84139,5034224.99129,UNIVERSITA' DEGLI STUDI DI MILANO,ISTRUZIONE,UNIVERSITA',UNIVERSITA',PUBBLICA,...,SEDE DIDATTICA,1,1,DUOMO,"(45.4612190660429, 9.18647705815211)",9.186477,45.461219,POINT (9.18648 45.46122),21.0,Duomo
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
706,122,28,,1514697.15561,5035450.18718,ACCADEMIA DI BELLE ARTI DI BRERA,ISTRUZIONE,UNIVERSITA',UNIVERSITA',PUBBLICA,...,SEDE DIDATTICA,1,2,BRERA,"(45.4722451291283, 9.18766875739009)",9.187669,45.472245,POINT (9.18767 45.47225),9.0,Brera
707,122,28,,1514697.15561,5035450.18718,ACCADEMIA DI BELLE ARTI DI BRERA,ISTRUZIONE,UNIVERSITA',UNIVERSITA',PUBBLICA,...,SEDE DIDATTICA,1,2,BRERA,"(45.4722451291283, 9.18766875739009)",9.187669,45.472245,POINT (9.18767 45.47225),9.0,Brera
708,122,28,,1514697.15561,5035450.18718,ACCADEMIA DI BELLE ARTI DI BRERA,ISTRUZIONE,UNIVERSITA',UNIVERSITA',PUBBLICA,...,SEDE DIDATTICA,1,2,BRERA,"(45.4722451291283, 9.18766875739009)",9.187669,45.472245,POINT (9.18767 45.47225),9.0,Brera
709,122,28,,1514697.15561,5035450.18718,ACCADEMIA DI BELLE ARTI DI BRERA,ISTRUZIONE,UNIVERSITA',UNIVERSITA',PUBBLICA,...,SEDE DIDATTICA,1,2,BRERA,"(45.4722451291283, 9.18766875739009)",9.187669,45.472245,POINT (9.18767 45.47225),9.0,Brera


In [106]:
# Find duplicates based on the 'geometry' column
duplicates = PolyUniversity[PolyUniversity.duplicated(subset=['geometry'], keep=False)]

# Inspect the duplicate rows
duplicates.sort_values(by = 'X')

Unnamed: 0,COD_VIA,CIVICO,BARRATO,X,Y,DENOMINAZ,CATEGORIA,TIPOLOGIA,SOTTOTIPO,PROPRIETA,...,T_SEDE,MUNICIPIO,ID_NIL,NIL,Location,LONG_X_4326,LAT_Y_4326,geometry,index_right,Neighborhood
262,6575,66,,1509973.54799,5035564.35701,UNIVERSITA' DEGLI STUDI DI MILANO,ISTRUZIONE,UNIVERSITA',UNIVERSITA',PUBBLICA,...,SEDE DIDATTICA,7,60,STADIO - IPPODROMI,"(45.4733558900766, 9.12724128805707)",9.127241,45.473356,POINT (9.12724 45.47336),66.0,San Siro
261,6575,66,,1509973.54799,5035564.35701,UNIVERSITA' DEGLI STUDI DI MILANO,ISTRUZIONE,UNIVERSITA',UNIVERSITA',PUBBLICA,...,SEDE DIDATTICA,7,60,STADIO - IPPODROMI,"(45.4733558900766, 9.12724128805707)",9.127241,45.473356,POINT (9.12724 45.47336),66.0,San Siro
31,6575,66,,1509973.54799,5035564.35701,UNIVERSITA' DEGLI STUDI DI MILANO,ISTRUZIONE,UNIVERSITA',UNIVERSITA',PUBBLICA,...,SEDE DIDATTICA,7,60,STADIO - IPPODROMI,"(45.4733558900766, 9.12724128805707)",9.127241,45.473356,POINT (9.12724 45.47336),66.0,San Siro
690,5291,19,,1511866.328,5032420.898,SAE INSTITUTE,ISTRUZIONE,UNIVERSITA',UNIVERSITA',PRIVATA,...,ISTITUTO,6,45,MONCUCCO - SAN CRISTOFORO,"(45.4450321504593, 9.15138079547624)",9.151381,45.445032,POINT (9.15138 45.44503),65.0,San Cristoforo
691,5291,19,,1511866.328,5032420.898,SAE INSTITUTE,ISTRUZIONE,UNIVERSITA',UNIVERSITA',PRIVATA,...,SEDE CORSO,6,45,MONCUCCO - SAN CRISTOFORO,"(45.4450321504593, 9.15138079547624)",9.151381,45.445032,POINT (9.15138 45.44503),65.0,San Cristoforo
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
617,2530,58,,1520932.72524,5039319.11175,VITA-SALUTE SAN RAFFAELE,ISTRUZIONE,,,PRIVATA,...,SEDE DIDATTICA,3,18,CIMIANO - ROTTOLE - Q.RE FELTRE,"(45.5069101894498, 9.26760542541493)",9.267605,45.506910,POINT (9.26761 45.50691),50.0,Parco Lambro - Cimiano
616,2530,58,,1520932.72524,5039319.11175,VITA-SALUTE SAN RAFFAELE,ISTRUZIONE,,,PRIVATA,...,FACOLTA',3,18,CIMIANO - ROTTOLE - Q.RE FELTRE,"(45.5069101894498, 9.26760542541493)",9.267605,45.506910,POINT (9.26761 45.50691),50.0,Parco Lambro - Cimiano
615,2530,58,,1520932.72524,5039319.11175,VITA-SALUTE SAN RAFFAELE,ISTRUZIONE,,,PRIVATA,...,FACOLTA',3,18,CIMIANO - ROTTOLE - Q.RE FELTRE,"(45.5069101894498, 9.26760542541493)",9.267605,45.506910,POINT (9.26761 45.50691),50.0,Parco Lambro - Cimiano
622,2530,58,,1520932.72524,5039319.11175,VITA-SALUTE SAN RAFFAELE,ISTRUZIONE,,,PRIVATA,...,SEDE DIDATTICA,3,18,CIMIANO - ROTTOLE - Q.RE FELTRE,"(45.5069101894498, 9.26760542541493)",9.267605,45.506910,POINT (9.26761 45.50691),50.0,Parco Lambro - Cimiano


In [108]:
# There is one row for each faculty of the university
PolyUniversity[PolyUniversity['X']=='1509973.54799'][['DENOMINAZ','FACOLTA','T_LAUREA']]

Unnamed: 0,DENOMINAZ,FACOLTA,T_LAUREA
31,UNIVERSITA' DEGLI STUDI DI MILANO,MEDICINA,EDUCAZIONE PROFESSIONALE
261,UNIVERSITA' DEGLI STUDI DI MILANO,MEDICINA,MEDICINA E CHIRURGIA
262,UNIVERSITA' DEGLI STUDI DI MILANO,MEDICINA,INFERMERIESTICA


In [110]:
# Removing the dupes, but keeping the different faculties
PolyUniversity = PolyUniversity.drop_duplicates(subset=['DENOMINAZ','FACOLTA','geometry'])
PolyUniversity

Unnamed: 0,COD_VIA,CIVICO,BARRATO,X,Y,DENOMINAZ,CATEGORIA,TIPOLOGIA,SOTTOTIPO,PROPRIETA,...,T_SEDE,MUNICIPIO,ID_NIL,NIL,Location,LONG_X_4326,LAT_Y_4326,geometry,index_right,Neighborhood
0,1523,3,,1514766.63459,5039559.3158,UNIVERSITA' DEGLI STUDI DI MILANO,ISTRUZIONE,UNIVERSITA',UNIVERSITA',PUBBLICA,...,EX ISTITUTO,9,14,NIGUARDA - CA' GRANDA - PRATO CENTENARO - Q.RE...,"(45.5092296526866, 9.18868084450537)",9.188681,45.509230,POINT (9.18868 45.50923),43.0,Niguarda - Cà Granda
1,4012,1,,1515116.2967,5033570.58859,UNIVERSITA' DEGLI STUDI DI MILANO,ISTRUZIONE,UNIVERSITA',UNIVERSITA',PUBBLICA,...,EX ISTITUTO,1,5,PORTA VIGENTINA - PORTA LODOVICA,"(45.4553179955712, 9.19297324638358)",9.192973,45.455318,POINT (9.19297 45.45532),80.0,Vigentina
2,2177,42,,1517013.18358,5036277.91279,UNIVERSITA' DEGLI STUDI DI MILANO,ISTRUZIONE,UNIVERSITA',UNIVERSITA',PUBBLICA,...,SEDE DIDATTICA,3,21,BUENOS AIRES - PORTA VENEZIA - PORTA MONFORTE,"(45.4796430022321, 9.21732651188301)",9.217327,45.479643,POINT (9.21733 45.47964),11.0,Buenos Aires - Venezia
4,530,1,,1514606.84139,5034224.99129,UNIVERSITA' DEGLI STUDI DI MILANO,ISTRUZIONE,UNIVERSITA',UNIVERSITA',PUBBLICA,...,SEDE DIDATTICA,1,1,DUOMO,"(45.4612190660429, 9.18647705815211)",9.186477,45.461219,POINT (9.18648 45.46122),21.0,Duomo
5,530,1,,1514609.14441,5034224.81499,UNIVERSITA' DEGLI STUDI DI MILANO,ISTRUZIONE,UNIVERSITA',UNIVERSITA',PUBBLICA,...,SEDE DIDATTICA,1,1,DUOMO,"(45.4612174312071, 9.18650650998961)",9.186507,45.461217,POINT (9.18651 45.46122),21.0,Duomo
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
680,3072,4,,1516395.58579,5034265.4612,IED ISTITUTO EUROPEO DESIGN,ISTRUZIONE,UNIVERSITA',UNIVERSITA',PRIVATA,...,SEDE DIDATTICA,4,26,XXII MARZO,"(45.4615437598319, 9.20935752663003)",9.209358,45.461544,POINT (9.20936 45.46154),83.0,XXII Marzo
687,3072,4,,1516395.58579,5034265.4612,IED ISTITUTO EUROPEO DESIGN,ISTRUZIONE,UNIVERSITA',UNIVERSITA',PRIVATA,...,SEDE DIDATTICA,4,26,XXII MARZO,"(45.4615437598319, 9.20935752663003)",9.209358,45.461544,POINT (9.20936 45.46154),83.0,XXII Marzo
690,5291,19,,1511866.328,5032420.898,SAE INSTITUTE,ISTRUZIONE,UNIVERSITA',UNIVERSITA',PRIVATA,...,ISTITUTO,6,45,MONCUCCO - SAN CRISTOFORO,"(45.4450321504593, 9.15138079547624)",9.151381,45.445032,POINT (9.15138 45.44503),65.0,San Cristoforo
693,122,28,,1514697.15561,5035450.18718,ACCADEMIA DI BELLE ARTI DI BRERA,ISTRUZIONE,UNIVERSITA',UNIVERSITA',PUBBLICA,...,ACCADEMIA,1,2,BRERA,"(45.4722451291283, 9.18766875739009)",9.187669,45.472245,POINT (9.18767 45.47225),9.0,Brera


In [114]:
# Keeping only relevant variables
PolyUniversity = PolyUniversity[["DENOMINAZ", "MUNICIPIO", "INDIRIZZO", "FACOLTA", "PROPRIETA",
                             "geometry", "Neighborhood"]].reset_index(drop=True)

### There are no missing values

In [117]:
# Rows with NaN
nan_rows = PolyUniversity[PolyUniversity.isna().any(axis=1)]

nan_rows

Unnamed: 0,DENOMINAZ,MUNICIPIO,INDIRIZZO,FACOLTA,PROPRIETA,geometry,Neighborhood


In [119]:
# Saving the file
PolyUniversity.to_file("PolyUniversity.geojson", driver="GeoJSON")

## Coworking spaces

In [122]:
# Load the GeoDataFrame
coworking = gpd.read_file("geojson_files/coworking_joined_f.geojson")

# Ensure both GeoDataFrames have the same CRS
if gdf_combined.crs != coworking.crs:
    coworking = coworking.to_crs(gdf_combined.crs)

# Perform the spatial join
PolyCoworking = gpd.sjoin(coworking, gdf_combined, how="left", predicate="within")
PolyCoworking = PolyCoworking[~PolyCoworking["Neighborhood"].isna()]

# Inspect the resulting GeoDataFrame
PolyCoworking

Unnamed: 0,SPAZIO,TIPOLOGIA,Sede,E_MAIL,TELEFONO_1,TELEFONO_2,WEB,Aperto da,Orario di apertura,Numero postazioni,...,CAP,MUNICIPIO,ID_NIL,NIL,Location,LONG_X_4326,LAT_Y_4326,geometry,index_right,Neighborhood
0,A&B STAZIONE CENTRALE,SPAZIO COWORKING,"Via Sammartini, 33",sammartini@abserv.it,02 83991190,,www.abserv.it,marzo 2015,"Tutti i giorni, h24",10 (100 mq),...,20125,2,10,STAZIONE CENTRALE - PONTE SEVESO,"(45.490085099999995, 9.206211999999999)",9.206212,45.490085,POINT (9.20621 45.49009),14.0,Centrale
1,A&B PIAZZA ABBIATEGRASSO,SPAZIO COWORKING,"Via Valla, 16",valla@abserv.it,02 847421,,www.abserv.it,gennaio 2018,"Tutti i giorni, h24",10 (200 mq),...,20141,5,42,STADERA - CHIESA ROSSA - Q.RE TORRETTA - CONCA...,"(45.4343426, 9.1852725)",9.185272,45.434343,POINT (9.18527 45.43434),70.0,Stadera
2,AVANZI COWORKING,SPAZIO COWORKING,"Via Ampere, 61/A",coworking@avanzi.org,02 305160,,https://avanzi.org/coworking/,marzo 2011,Da lunedì a venerdì 09.00 - 18.00,120 (1000 mq),...,20131,3,22,CITTA' STUDI,"(45.4839086, 9.2262693)",9.226269,45.483909,POINT (9.22627 45.48391),16.0,Città Studi
3,OHWORKING MILANO ISOLA,SPAZIO COWORKING,"Via Medardo Rosso, 18",info@ohworking.com,02 97377020,,ohworking.com,marzo 2020,Da lunedì a venerdì 09.00 -18.00. Sabato e dom...,20 (250 mq),...,20159,9,11,ISOLA,"(45.4897117, 9.1851459)",9.185146,45.489712,POINT (9.18515 45.48971),32.0,Isola
4,CAMPO TEATRALE,SPAZIO COWORKING,"Via Casoretto, 41a",coworking@campoteatrale.it,02 26113133,,www.campoteatrale.it/coworking,ottobre 2001,Da lunedì a venerdì 9.00- 19.00,16 (622 mq),...,20131,3,20,LORETO - CASORETTO - NOLO,"(45.489377499999996, 9.2314013)",9.231401,45.489378,POINT (9.2314 45.48938),36.0,Loreto
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
105,M36,SPAZIO COWORKING,"Via Meda, 36",info@meda36.it,3930059260,,www.creativitybox.it,novembre 2099,"Dal lunedì al venerdì, 9.00- 19.00",18 (160mq),...,20141,5,43,TIBALDI,"(45.4416028, 9.1782342)",9.178234,45.441603,POINT (9.17823 45.4416),72.0,Tibaldi
106,M45,SPAZIO COWORKING,"Via Meda, 45",info@meda45.it,02 36524276,,www.coworkmilano.it,maggio 2021,"Dal lunedì al venerdì, 9.00- 19.00",35 (450mq),...,20141,5,43,TIBALDI,"(45.4400386, 9.179542399999999)",9.179542,45.440039,POINT (9.17954 45.44004),72.0,Tibaldi
107,UPSIDE COWORKING,SPAZIO COWORKING,"Via Marco Fabio Quintiliano, 24",upsidecoworking@gmail.com,02 66667710,,www.upsidecoworking.com,agosto 2021,"Tutti i giorni, h24",10 (148mq),...,20138,4,30,TALIEDO - MORSENCHIO - Q.RE FORLANINI,"(45.450351999999995, 9.2450297)",9.245030,45.450352,POINT (9.24503 45.45035),40.0,Mecenate
108,OFFICEFORYOU NOTO 10,SPAZIO COWORKING,"Via Noto, 10",info@officeforyou.it,02 49421229,,https://www.officeforyou.it/,gennaio 2021,"Tutti i giorni, h24",52 (420mq),...,20141,5,38,VIGENTINO - Q.RE FATIMA,"(45.4304638, 9.1995392)",9.199539,45.430464,POINT (9.19954 45.43046),61.0,Ripamonti


In [124]:
# Find duplicates based on the 'geometry' column
duplicates = PolyCoworking[PolyCoworking.duplicated(subset=['geometry'], keep=False)]

# Inspect the duplicate rows
duplicates # not real duplicates

Unnamed: 0,SPAZIO,TIPOLOGIA,Sede,E_MAIL,TELEFONO_1,TELEFONO_2,WEB,Aperto da,Orario di apertura,Numero postazioni,...,CAP,MUNICIPIO,ID_NIL,NIL,Location,LONG_X_4326,LAT_Y_4326,geometry,index_right,Neighborhood
29,POLIHUB S.c.a.r.l.,SPAZIO COWORKING,"Via C. Durando, 39",polihub@legalmail.it,02 9177 3000,,www.polihub.it,aprile 2013,"Tutti i giorni, h24",10 ( 390 mq),...,20158,9,77,BOVISA,"(45.5066147, 9.1635609)",9.163561,45.506615,POINT (9.16356 45.50661),8.0,Bovisa
36,TECHINNOVA S.P.A.,SPAZIO COWORKING,"Via G. Durando, 38/a",segreteria@techinnova.eu,02 36706284,,https://techinnova.eu/,marzo 2015,"Dal lunedì al venerdì, 9.00-18.00",24 (400 mq),...,20158,9,77,BOVISA,"(45.5066147, 9.1635609)",9.163561,45.506615,POINT (9.16356 45.50661),8.0,Bovisa
50,BEPRIME24,SPAZIO COWORKING,"Via Santa Maria Valle, 3",info@doriko.it,3485194511,3494901549.0,www.beprime24.it,luglio 2020,"Fulltime: tutti i giorni, h 24. NetworkingPro:...",50 (350 mq),...,20123,1,1,DUOMO,"(45.460157699999996, 9.1842233)",9.184223,45.460158,POINT (9.18422 45.46016),21.0,Duomo
81,REGUS MILANO CARROBBIO,SPAZIO COWORKING,"Via S.Maria Valle, 3",info.italia@regus.com,,,https://www.regus.com/it-it/italy/milan/via-sa...,luglio 2008,"Da lunedì a venerdì, 9.00 -18.00. Tutti i gior...",240 (1447 mq),...,20123,1,1,DUOMO,"(45.460157699999996, 9.1842233)",9.184223,45.460158,POINT (9.18422 45.46016),21.0,Duomo


In [126]:
# Keeping only relevant variables
PolyCoworking = PolyCoworking[["SPAZIO", "MUNICIPIO", "Sede", "Orario di apertura", "Numero postazioni",
                             "geometry", "Neighborhood"]]

### There are four entries with no info about the opening hour, slightly lowering the quality of the data

In [129]:
# Rows with NaN
nan_rows = PolyCoworking[PolyCoworking.isna().any(axis=1)]

nan_rows

Unnamed: 0,SPAZIO,MUNICIPIO,Sede,Orario di apertura,Numero postazioni,geometry,Neighborhood
7,CONLAB - SPAZIO DI COWORKING,1,"Via S. Vittore, 43",,16 (260mq),POINT (9.16812 45.46392),Magenta - San Vittore
8,CO-SOCIAL,5,"Via U. Dini, 7",,10 (250mq),POINT (9.17987 45.42924),Stadera
27,MARE CULTURALE URBANO,7,"Via Gabetti, 15",,15 (150mq),POINT (9.12263 45.46818),Forze Armate
47,COWO BICOCCA 16,9,Viale Sarca n. 336/F Edificio 16,,20 (250 mq),POINT (9.21911 45.52308),Bicocca


In [131]:
# Saving the file
PolyCoworking.to_file("PolyCoworking.geojson", driver="GeoJSON")

## Libraries

In [134]:
# Load the GeoDataFrame
libraries = gpd.read_file("geojson_files/ds1306-biblioteche.geojson")

# Ensure both GeoDataFrames have the same CRS
if gdf_combined.crs != libraries.crs:
    libraries = libraries.to_crs(gdf_combined.crs)

# Perform the spatial join
PolyLibraries = gpd.sjoin(libraries, gdf_combined, how="left", predicate="within")
PolyLibraries = PolyLibraries[~PolyLibraries["Neighborhood"].isna()]

# Inspect the resulting GeoDataFrame
PolyLibraries

Unnamed: 0,Biblioteche - Sede,Indirizzo,CAP,telefono,Orari,E-mail,Mezzi pubblici,Note,MUNICIPIO,ID_NIL,NIL,Location,long,lat,geometry,index_right,Neighborhood
0,Accursio,Piazzale Accursio 5,20151,02 88464300,"lunedi: chiuso, martedi e giovedi: 9.30-14.30,...",c.biblioaccursio@comune.milano.it,"bus 48, 57, 69, 90, 91 / tram 14, 1",Ritiro prenotazioni su appuntamento,8,71,VILLAPIZZONE - CAGNOLA - BOLDINASCO,"(45.491398, 9.14615)",9.14615,45.491398,POINT (9.14615 45.4914),81.0,Villapizzone
1,Affori,Viale Affori 21,20161,0288462522 - 0288462526,"lunedi: chiuso, martedi e giovedi:14.00-19.00,...",c.biblioaffori@comune.milano.it,n.d.,Ritiro prenotazioni su appuntamento,9,80,AFFORI,"(45.516629, 9.167394)",9.167394,45.516629,POINT (9.16739 45.51663),2.0,Affori
2,Baggio (sede provv. P.zza Stovani 3),sede provv. P.zza Stovani 3,20153,02 88465804,"lunedi: chiuso, martedi e giovedi:9.00-13.30, ...",c.bibliobaggio@comune.milano.it,n.d.,per ritirare le prenotazioni o accedere agli s...,7,55,BAGGIO - Q.RE DEGLI OLMI - Q.RE VALSESIA,"(45.463178, 9.091209)",9.091209,45.463178,POINT (9.09121 45.46318),3.0,Baggio
3,Bibliobus,Via Bezzecca 18,20135,3357762410,Dal lunedi al sabato dalle 9.00 alle 12.30,c.bibliobus@comune.milano.it,n.d.,sara' possibile ritirare i libri prenotati pri...,4,26,XXII MARZO,"(45.460661, 9.213189)",9.213189,45.460661,POINT (9.21319 45.46066),83.0,XXII Marzo
4,Biblioteca Crescenzago (sede provv. P.le Gover...,sede provv. P.le Governo Provvisorio 9,20123,02 88465808,"lunedi: chiuso, martedi:9.30-14.30, mercoledi ...",c.bibliocrescenzago@comune.milano.it,"bus 44, 51, 53, 56, 75 / M2 (Cimiano)",Ritiro prenotazioni su appuntamento,2,19,PADOVA - TURRO - CRESCENZAGO,"(45.49825, 9.225293)",9.225293,45.49825,POINT (9.22529 45.49825),45.0,Padova
5,Calvairate,Via Ciceri Visconti 1,20137,02 88465801,n.d.,c.bibliocalvairate@comune.milano.it,n.d.,Ritiro prenotazioni su appuntamento,4,28,UMBRIA - MOLISE - CALVAIRATE,"(45.455762, 9.22065)",9.22065,45.455762,POINT (9.22065 45.45576),78.0,Umbria - Molise
6,Cassina Anna,Via Sant'Arnaldo 17,20161,02 884.65800,"lunedi: chiuso, martedi e giovedi:9.30-14.30, ...",c.bibliocassinaanna@comune.milano.it,n.d.,Possibilita' di restituire i documenti presso ...,9,83,BRUZZANO,"(45.524983, 9.177166)",9.177166,45.524983,POINT (9.17717 45.52498),10.0,Bruzzano
7,Chiesa Rossa,Via San Domenico Savio 3,20142,02 88465991,"lunedi:chiusa, dal martedi al sabato:10.30-19.00",c.bibliochiesarossa@comune.milano.it,n.d.,restituire i libri e i multimediali in un cont...,5,42,STADERA - CHIESA ROSSA - Q.RE TORRETTA - CONCA...,"(45.43043, 9.174036)",9.174036,45.43043,POINT (9.17404 45.43043),70.0,Stadera
8,Dergano Bovisa,Via Baldinucci 76,20158,02 88465807,"lunedi: chiuso, martedi e giovedi: 9.30-14.30,...",c.bibliosantambrogio@comune.milano.it,n.d.,Puoi ritirarli solo su appuntamento concordand...,9,77,BOVISA,"(45.503666, 9.168445)",9.168445,45.503666,POINT (9.16844 45.50367),8.0,Bovisa
9,Fra Cristoforo,Via Fra Cristoforo 6,20142,02 88465806,"lunedi:14.30-19.00, dal martedi al venerdi:9.0...",c.bibliofracristoforo@comune.milano.it,n.d.,"per la restituzione, Ã¨ necessario recarsi in ...",5,42,STADERA - CHIESA ROSSA - Q.RE TORRETTA - CONCA...,"(45.436191, 9.171878)",9.171878,45.436191,POINT (9.17188 45.43619),70.0,Stadera


In [136]:
# Find duplicates based on the 'geometry' column
duplicates = PolyLibraries[PolyLibraries.duplicated(subset=['geometry'], keep=False)]

# Inspect the duplicate rows
duplicates

Unnamed: 0,Biblioteche - Sede,Indirizzo,CAP,telefono,Orari,E-mail,Mezzi pubblici,Note,MUNICIPIO,ID_NIL,NIL,Location,long,lat,geometry,index_right,Neighborhood


In [138]:
# Keeping only relevant variables
PolyLibraries = PolyLibraries[["Biblioteche - Sede", "MUNICIPIO", "Indirizzo",
                             "geometry", "Neighborhood"]]

### There are no missing values

In [141]:
# Rows with NaN
nan_rows = PolyLibraries[PolyLibraries.isna().any(axis=1)]

nan_rows

Unnamed: 0,Biblioteche - Sede,MUNICIPIO,Indirizzo,geometry,Neighborhood


In [143]:
# Saving the file
PolyLibraries.to_file("PolyLibraries.geojson", driver="GeoJSON")

## Suburban railway

In [146]:
# Load the GeoDataFrame
trains = gpd.read_file("geojson_files/sistema_ferroviario_urbano_layer_0_stazioni__final.geojson")

# Ensure both GeoDataFrames have the same CRS
if gdf_combined.crs != trains.crs:
    trains = trains.to_crs(gdf_combined.crs)

# Perform the spatial join
PolyTrains = gpd.sjoin(trains, gdf_combined, how="left", predicate="within")
PolyTrains = PolyTrains[~PolyTrains["Neighborhood"].isna()]

# Inspect the resulting GeoDataFrame
PolyTrains

Unnamed: 0,Stazione,Ubicazione,Linee,Note,MUNICIPIO,ID_NIL,NIL,LONG_X_4326,LAT_Y_4326,Location,geometry,index_right,Neighborhood
0,Milano Lancetti,Sotterranea,S1 S2 S5 S6 S13,Dati indicativi non ufficiali.,9,78,FARINI,9.177123,45.493741,"(45.4937410910517, 9.17712322085963)",POINT (9.17712 45.49374),23.0,Farini
1,Milano Centrale,Superficie,Interregionali,Dati indicativi non ufficiali.,2,10,STAZIONE CENTRALE - PONTE SEVESO,9.204197,45.486161,"(45.486161235226, 9.20419675790644)",POINT (9.2042 45.48616),14.0,Centrale
2,Milano Porta Garibaldi Sup.,Superficie,S11,Dati indicativi non ufficiali.,9,9,PORTA GARIBALDI - PORTA NUOVA,9.186751,45.485593,"(45.485593393828, 9.1867506782341)",POINT (9.18675 45.48559),24.0,GARIBALDI REPUBBLICA
3,Milano Lambrate,Superficie,S9,Dati indicativi non ufficiali.,3,22,CITTA' STUDI,9.236898,45.484872,"(45.4848723299094, 9.23689818170729)",POINT (9.2369 45.48487),16.0,Città Studi
4,Milano Porta Garibaldi Sott.,Sotterranea,S1 S2 S5 S6 S13,Dati indicativi non ufficiali.,9,9,PORTA GARIBALDI - PORTA NUOVA,9.186162,45.484334,"(45.4843344249528, 9.18616229197573)",POINT (9.18616 45.48433),24.0,GARIBALDI REPUBBLICA
5,Milano Repubblica,Sotterranea,S1 S2 S5 S6 S13,Dati indicativi non ufficiali.,2,10,STAZIONE CENTRALE - PONTE SEVESO,9.198682,45.480436,"(45.4804361647545, 9.19868242499003)",POINT (9.19868 45.48044),14.0,Centrale
6,Milano Porta Venezia,Sotterranea,S1 S2 S5 S6 S13,Dati indicativi non ufficiali.,3,21,BUENOS AIRES - PORTA VENEZIA - PORTA MONFORTE,9.208824,45.47579,"(45.4757903172868, 9.20882392812955)",POINT (9.20882 45.47579),11.0,Buenos Aires - Venezia
7,Milano Cadorna,Superficie,S3 S4,Dati indicativi non ufficiali.,1,7,MAGENTA - S. VITTORE,9.175635,45.468178,"(45.4681776130004, 9.17563533371968)",POINT (9.17564 45.46818),38.0,Magenta - San Vittore
8,Milano Porta Vittoria,Sotterranea,S1 S2 S5 S6 S13,Dati indicativi non ufficiali.,4,28,UMBRIA - MOLISE - CALVAIRATE,9.22361,45.45962,"(45.4596197232533, 9.22360963993889)",POINT (9.22361 45.45962),78.0,Umbria - Molise
9,Milano Porta Genova,Superficie,Interregionali,Dati indicativi non ufficiali.,6,50,PORTA GENOVA,9.169245,45.45317,"(45.4531702945483, 9.16924529212752)",POINT (9.16925 45.45317),74.0,Tortona


In [148]:
# Find duplicates based on the 'geometry' column
duplicates = PolyTrains[PolyTrains.duplicated(subset=['geometry'], keep=False)]

# Inspect the duplicate rows
duplicates

Unnamed: 0,Stazione,Ubicazione,Linee,Note,MUNICIPIO,ID_NIL,NIL,LONG_X_4326,LAT_Y_4326,Location,geometry,index_right,Neighborhood


In [150]:
# Keeping only relevant variables
PolyTrains = PolyTrains[["Stazione", "Linee", "geometry", "Neighborhood"]].rename(columns={"Stazione": "Nome"})
PolyTrains["Mezzo"] = "Treno"

### There are no missing values

In [153]:
# Rows with NaN
nan_rows = PolyTrains[PolyTrains.isna().any(axis=1)]

nan_rows

Unnamed: 0,Nome,Linee,geometry,Neighborhood,Mezzo


## Metro stops

In [156]:
# Load the GeoDataFrame
metros = gpd.read_file("geojson_files/tpl_metrofermate.geojson")

# Ensure both GeoDataFrames have the same CRS
if gdf_combined.crs != metros.crs:
    metros = metros.to_crs(gdf_combined.crs)

# Perform the spatial join
PolyMetros = gpd.sjoin(metros, gdf_combined, how="left", predicate="within")
PolyMetros = PolyMetros[~PolyMetros["Neighborhood"].isna()]

# Inspect the resulting GeoDataFrame
PolyMetros

Unnamed: 0,id_amat,nome,linee,geometry,index_right,Neighborhood
0,869,BOLIVAR,4,POINT (9.15315 45.45532),74.0,Tortona
1,870,CALIFORNIA,4,POINT (9.16006 45.45755),74.0,Tortona
2,871,SEGNERI,4,POINT (9.13057 45.44654),27.0,Giambellino
3,872,TOLSTOJ,4,POINT (9.14802 45.4536),27.0,Giambellino
4,873,CONI ZUGNA,4,POINT (9.16491 45.45909),74.0,Tortona
...,...,...,...,...,...,...
123,993,BANDE NERE,1,POINT (9.13648 45.4615),4.0,Bande Nere
126,996,AMENDOLA,1,POINT (9.15097 45.47368),75.0,Tre Torri
127,997,AFFORI FN,3,POINT (9.16825 45.52168),2.0,Affori
128,998,AFFORI CENTRO,3,POINT (9.17398 45.51355),2.0,Affori


In [158]:
# Find duplicates based on the 'geometry' column
duplicates = PolyMetros[PolyMetros.duplicated(subset=['geometry'], keep=False)]

# Inspect the duplicate rows
duplicates

Unnamed: 0,id_amat,nome,linee,geometry,index_right,Neighborhood


In [160]:
# Keeping only relevant variables
PolyMetros = PolyMetros[["nome", "linee", "geometry", "Neighborhood"]].rename(columns={"nome": "Nome",
                                                                                       "linee": "Linee"})
PolyMetros["Mezzo"] = "Metro"

### There are no missing values

In [163]:
# Rows with NaN
nan_rows = PolyMetros[PolyMetros.isna().any(axis=1)]

nan_rows

Unnamed: 0,Nome,Linee,geometry,Neighborhood,Mezzo


## Bus stops

In [166]:
# Load the GeoDataFrame
busses = gpd.read_file("geojson_files/tpl_fermate.geojson")

# Ensure both GeoDataFrames have the same CRS
if gdf_combined.crs != busses.crs:
    busses = busses.to_crs(gdf_combined.crs)

# Perform the spatial join
PolyBusses = gpd.sjoin(busses, gdf_combined, how="left", predicate="within")
PolyBusses = PolyBusses[~PolyBusses["Neighborhood"].isna()]

# Inspect the resulting GeoDataFrame
PolyBusses

Unnamed: 0,id_amat,ubicazione,linee,geometry,index_right,Neighborhood
2,10018,"C.so Sempione, 83 prima di Via E. Filiberto",48,POINT (9.15805 45.48628),56.0,Portello
4,10022,Via Mosca prima V.Val Cavallina,63,POINT (9.07122 45.45214),41.0,Muggiano
5,10023,Via Mosca prima di Via Spinazzola,63,POINT (9.07367 45.45359),41.0,Muggiano
6,10024,"Via Mosca, 42 dopo Via Spinazzola",63,POINT (9.07352 45.45362),41.0,Muggiano
8,10026,Via Bergognone dopo via Foppa,68,POINT (9.161 45.45767),74.0,Tortona
...,...,...,...,...,...,...
4681,20159,S.Cristoforo M4,154,POINT (9.12916 45.44497),27.0,Giambellino
4682,20160,V.le Papiniano,154,POINT (9.16914 45.45823),38.0,Magenta - San Vittore
4683,20161,S. Sofia,154,POINT (9.1931 45.45739),80.0,Vigentina
4684,20162,F. Sforza,154,POINT (9.1953 45.45932),31.0,Guastalla


In [167]:
# Find duplicates based on the 'geometry' column
duplicates = PolyBusses[PolyBusses.duplicated(subset=['geometry'], keep=False)]

# Inspect the duplicate rows
duplicates # we keep the stop that is on the border between two neighborhoods for both of them

Unnamed: 0,id_amat,ubicazione,linee,geometry,index_right,Neighborhood
1098,11396,"Via Manduria, 102",79,POINT (9.17952 45.40155),0.0,Parco delle Abbazie
1098,11396,"Via Manduria, 102",79,POINT (9.17952 45.40155),64.0,Ronchetto delle Rane


In [169]:
# Keeping only relevant variables
PolyBusses = PolyBusses[["ubicazione", "linee", "geometry", "Neighborhood"]].rename(columns={"ubicazione": "Nome",
                                                                                       "linee": "Linee"})
PolyBusses["Mezzo"] = "Bus"

### There are no missing values

In [173]:
# Rows with NaN
nan_rows = PolyBusses[PolyBusses.isna().any(axis=1)]

nan_rows

Unnamed: 0,Nome,Linee,geometry,Neighborhood,Mezzo


## Concat Train Metro Bus

In [176]:
# Ensure all GeoDataFrames have the same columns
gdf1 = PolyTrains[["Nome", "Linee", "Mezzo", "geometry", "Neighborhood"]]
gdf2 = PolyMetros[["Nome", "Linee", "Mezzo", "geometry", "Neighborhood"]]
gdf3 = PolyBusses[["Nome", "Linee", "Mezzo", "geometry", "Neighborhood"]]

# Concatenate the three GeoDataFrames
PolyTransport = gpd.GeoDataFrame(pd.concat([gdf1, gdf2, gdf3], ignore_index=True))

# Inspect the combined GeoDataFrame
print(PolyTransport.head())
print(f"Total rows: {len(PolyTransport)}")

# Saving the file
PolyTransport.to_file("PolyTransport.geojson", driver="GeoJSON")

                           Nome            Linee  Mezzo  \
0               Milano Lancetti  S1 S2 S5 S6 S13  Treno   
1               Milano Centrale   Interregionali  Treno   
2   Milano Porta Garibaldi Sup.              S11  Treno   
3               Milano Lambrate               S9  Treno   
4  Milano Porta Garibaldi Sott.  S1 S2 S5 S6 S13  Treno   

                   geometry          Neighborhood  
0  POINT (9.17712 45.49374)                Farini  
1   POINT (9.2042 45.48616)              Centrale  
2  POINT (9.18675 45.48559)  GARIBALDI REPUBBLICA  
3   POINT (9.2369 45.48487)           Città Studi  
4  POINT (9.18616 45.48433)  GARIBALDI REPUBBLICA  
Total rows: 3189


## Supermarkets

In [179]:
# Load the GeoDataFrame
supermarkets = gpd.read_file("C:/Users/edoar/Downloads/supermarkets.geojson")

# Ensure both GeoDataFrames have the same CRS
if gdf_combined.crs != supermarkets.crs:
    supermarkets = supermarkets.to_crs(gdf_combined.crs)

# Perform the spatial join
PolySupermarkets = gpd.sjoin(supermarkets, gdf_combined, how="left", predicate="within")
PolySupermarkets = PolySupermarkets[~PolySupermarkets["Neighborhood"].isna()]

# Inspect the resulting GeoDataFrame
PolySupermarkets

Unnamed: 0,id,@id,access,addr:city,addr:country,addr:floor,addr:hamlet,addr:housename,addr:housenumber,addr:postcode,...,toilets,toilets:access,toilets:wheelchair,type,website,wheelchair,zero_waste,geometry,index_right,Neighborhood
2,way/23911120,way/23911120,,,,,,,,,...,,,,,,yes,,"POLYGON ((9.15524 45.44501, 9.15522 45.44481, ...",65.0,San Cristoforo
3,way/24396508,way/24396508,,,,,,,,,...,,,,,https://www.esselunga.it/default.aspx?idPage=4...,,,"POLYGON ((9.24271 45.45815, 9.24274 45.45817, ...",40.0,Mecenate
4,way/26009942,way/26009942,,,,,,,,,...,,,,,https://www.esselunga.it/cms/negozi/ricerca-ne...,,,"POLYGON ((9.24774 45.47779, 9.24768 45.47794, ...",33.0,Lambrate
6,way/26971361,way/26971361,,,,,,,,,...,,,,,,,,"POLYGON ((9.12748 45.46851, 9.12728 45.46821, ...",4.0,Bande Nere
7,way/27134228,way/27134228,,,,,,,,,...,,,,,,,,"POLYGON ((9.13725 45.47133, 9.13721 45.47086, ...",69.0,Selinunte
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
733,node/11773518633,node/11773518633,,,,,,,,,...,,,,,,,,POINT (9.17162 45.46887),38.0,Magenta - San Vittore
734,node/11773889816,node/11773889816,,,,,,,,,...,,,,,,,,POINT (9.19694 45.50837),43.0,Niguarda - Cà Granda
738,node/11926789093,node/11926789093,,,,,,,,,...,,,,,,,,POINT (9.20447 45.45109),55.0,Porta Romana
739,node/11932616778,node/11932616778,,,,,,,,,...,,,,,,,,POINT (9.1736 45.48113),67.0,Sarpi


In [181]:
# Find duplicates based on the 'geometry' column
duplicates = PolySupermarkets[PolySupermarkets.duplicated(subset=['geometry'], keep=False)]

# Inspect the duplicate rows
duplicates

Unnamed: 0,id,@id,access,addr:city,addr:country,addr:floor,addr:hamlet,addr:housename,addr:housenumber,addr:postcode,...,toilets,toilets:access,toilets:wheelchair,type,website,wheelchair,zero_waste,geometry,index_right,Neighborhood


In [183]:
# Keeping only relevant variables
PolySupermarkets = PolySupermarkets[["name", "geometry", "Neighborhood"]]

### There are four supermarkets with a not available name listed

In [186]:
# Rows with NaN
nan_rows = PolySupermarkets[PolySupermarkets.isna().any(axis=1)]

nan_rows

Unnamed: 0,name,geometry,Neighborhood
151,,"POLYGON ((9.10075 45.50395, 9.10101 45.50384, ...",Gallaratese
409,,POINT (9.22037 45.46562),XXII Marzo
527,,POINT (9.18773 45.47306),Brera
716,,POINT (9.22689 45.47083),Città Studi


In [188]:
# Saving the file
PolySupermarkets.to_file("PolySupermarkets.geojson", driver="GeoJSON")