In [229]:
# import necessary packaages
import numpy as np
import pandas as pd
import geopandas as gpd
import geoplot
import geoplot.crs as gcrs
import folium
import contextily as cx

# define functions
def trim_all_columns(df):
    """
    Trim whitespace from ends of each value across all series in dataframe
    """
    trim_strings = lambda x: x.strip() if isinstance(x, str) else x
    return df.map(trim_strings)

# import data sets
    # import geojson of base map of statistical areas
base_map_data = gpd.read_file("./Data/aree-statistiche.geojson")
    
    # import population data
population = pd.read_csv("./Data/popolazione-per-quartiere-zona-sesso-eta-cittadinanza-provincia-di-nascita-serie.csv", sep = ';', on_bad_lines='warn')
# clean the imported population data
population.columns = population.columns.str.strip()
population.columns = population.columns.str.replace('\ufeff', '')
trim_all_columns(population)
# rename the year columns with a more descriptive suffix
years = [str(year) for year in range(1987, 2023 + 1)]
population.rename(columns={year: f"{year}_Population" for year in years}, inplace=True)

    # import airbnb data
airbnb = pd.read_csv("./Data/bologna-rilevazione-airbnb.csv", on_bad_lines='warn')

    # import wifi data
wifi = pd.read_csv("./Data/bolognawifi-elenco-hot-spot.csv", on_bad_lines='warn')

# scratchpad to view data


['1987_Population', '1988_Population', '1989_Population', '1990_Population', '1991_Population', '1992_Population', '1993_Population', '1994_Population', '1995_Population', '1996_Population', '1997_Population', '1998_Population', '1999_Population', '2000_Population', '2001_Population', '2002_Population', '2003_Population', '2004_Population', '2005_Population', '2006_Population', '2007_Population', '2008_Population', '2009_Population', '2010_Population', '2011_Population', '2012_Population', '2013_Population', '2014_Population', '2015_Population', '2016_Population', '2017_Population', '2018_Population', '2019_Population', '2020_Population', '2021_Population', '2022_Population', '2023_Population']


In [239]:
# clean and combine the datasets

    # population
# aggregate population at the neighborhood level
pop_agg = population.groupby('Quartiere')[list(f"{year}_Population" for year in years)].sum()
# join the base map data with the population data
base_map_w_pop = base_map_data.set_index('quartiere').join(pop_agg)

    # AIRBNB
# extract the latitude and longitude of each airbnb
airbnb[['lat','long']] = airbnb['Coordinate'].str.split(',',expand=True)
# turn the airbnb data into a geo data frame
airbnb_geo = gpd.GeoDataFrame(
    airbnb, geometry=gpd.points_from_xy(airbnb.long, airbnb.lat), crs="EPSG:4326"
)

    # WIFI
# turn the wifi data into a geo data frame
wifi_geo = gpd.GeoDataFrame(
    wifi, geometry=gpd.points_from_xy(wifi.x, wifi.y), crs="EPSG:4326"
)
# aggregate the count of wifi hotspots at the neighborhood level
wifi_agg = gpd.overlay(base_map_w_pop, wifi_geo, how='intersection', keep_geom_type=False).groupby('cod_quar').count()
# rename and subset the data just to the count of the hotspots per neighborhood
wifi_agg = wifi_agg.rename(columns={'Hostname':'Hotspot_Count'})['Hotspot_Count']

# scratchpad to view data
#print(pop_agg.index)
#print(base_map_data.set_index('quartiere').index.unique())
#print(pop_agg.index.unique())
#base_map_w_pop
#gpd.overlay(base_map_w_pop, wifi_geo, how='intersection', keep_geom_type=False)
wifi_agg

cod_quar
11    16
12    37
13    98
14    35
15    96
16    21
Name: Hotspot_Count, dtype: int64

In [243]:
# map the data
    # create a base map at the neighborhood level
base = base_map_w_pop.dissolve('quartiere')
    
    # plot airbnbs
#airbnb_geo.plot(ax = base_map_fig, color = "red", markersize = 1)

    # plot wifi per 1000 inhabitants
# add the wifi points to the neighborhood base map
base_w_wifi = base.set_index('cod_quar').join(wifi_agg)
# aggregate the wifi connection points (hotspots) per 1000 inhabitants
base_w_wifi['Hotspots_per_1000'] = base_w_wifi['Hotspot_Count']/(base_w_wifi['2023_Population'] / 1000)
# create an interactive chlorpleth of the hotspots per 1000 people at the neighborhood level
wifi_per_cap_chlor = base_w_wifi.explore(column = 'Hotspots_per_1000', cmap = 'RdBu', tooltip = ('cod_quar', '2023_Population', 'Hotspot_Count', 'Hotspots_per_1000'), tiles = 'CartoDB positron', legend=True)
# display the chlorpleth with the hotspots overlaid
wifi_geo.explore(m=wifi_per_cap_chlor)