In [1]:
import folium
import requests
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point, LineString, Polygon

ERROR 1: PROJ: proj_create_from_database: Open of /opt/conda/share/proj failed


In [2]:
import warnings
warnings.filterwarnings("ignore")

# Data Retrieving

In [3]:
# Retrieve user preferences
user_preferences = {
    'coffee_shops': True,
    'bakery': True,
    'vintage': True,
    'library': True,
    'tourism': True,
    'nightlife': True,
    'sports': True,
    'theatre': True,
    'restaurants': True
}

In [4]:
def build_overpass_query(user_preferences):
    query = "[out:json];\n"
    query += "("
    for interest, selected in user_preferences.items():
        if selected:
            if interest == 'coffee_shops':
                query += f"node[\"amenity\"=\"cafe\"](55.55, 12.45, 55.72, 12.70);"
            elif interest == 'bakery':
                query += f"node[\"shop\"=\"bakery\"](55.55, 12.45, 55.72, 12.70);"
            elif interest == 'restaurants':
                query += f"node[\"amenity\"=\"restaurant\"](55.55, 12.45, 55.72, 12.70);"
            elif interest == 'vintage':
                query += f"node[\"shop\"=\"second_hand\"](55.55, 12.45, 55.72, 12.70);"
            elif interest == 'library':
                query += f"node[\"amenity\"=\"library\"](55.55, 12.45, 55.72, 12.70);"
            elif interest == 'tourism':
                query += f"node[\"tourism\"=\"museum\"](55.55, 12.45, 55.72, 12.70);"
                query += f"node[\"tourism\"=\"attraction\"](55.55, 12.45, 55.72, 12.70);"
            elif interest == 'nightlife':
                query += f"node[\"amenity\"=\"pub\"](55.55, 12.45, 55.72, 12.70);"
                query += f"node[\"amenity\"=\"nightclub\"](55.55, 12.45, 55.72, 12.70);"
                query += f"node[\"amenity\"=\"bar\"](55.55, 12.45, 55.72, 12.70);"     
            elif interest == 'sports':
                query += "node[\"leisure\"=\"sports_centre\"](55.55, 12.45, 55.72, 12.70);"
            elif interest == 'theatre':
                query += "node[\"amenity\"=\"theatre\"](55.55, 12.45, 55.72, 12.70);"
                
                          
    query += ");\n"
    query += "(._;>;);\n"
    query += "out;"
    #print(query)
    return query

In [5]:
def get_data_from_overpass(query):
    # Send the query to Overpass API
    response = requests.get("http://overpass-api.de/api/interpreter", params={"data": query})
    data = response.json()
    return data

def create_geopandas_dataframe(data):
    features = []
    all_tags = set()  # Keep track of all unique tag keys
    for element in data['elements']:
        if 'lat' in element and 'lon' in element:
            feature = {
                'geometry': Point(element['lon'], element['lat'])
            }
            for key, value in element['tags'].items():
                feature[key] = value
                all_tags.add(key)
            features.append(feature)
    # Create a GeoPandas DataFrame
    gdf = gpd.GeoDataFrame(features, geometry='geometry',crs="EPSG:4326")
    
    # Create a column for each unique tag key and populate it with the corresponding value
    for tag_key in all_tags:
        values = [feature.get(tag_key) for feature in features]
        gdf[tag_key] = values
    return gdf

In [7]:
# Build Overpass query
overpass_query = build_overpass_query(user_preferences)
for preference, value in user_preferences.items():
    overpass_query = build_overpass_query({preference:value})
    data = get_data_from_overpass(overpass_query)
    gdf = create_geopandas_dataframe(data)
    # Save GeoDataFrame to GeoJSON file
    gdf.to_file(f"data/{preference}.geojson", driver='GeoJSON')

### Amenity

In [8]:
coffee_shops_gdf = gpd.read_file("data/coffee_shops.geojson")
library_gdf = gpd.read_file("data/library.geojson")
restaurants_gdf = gpd.read_file("data/restaurants.geojson")
nightlife_gdf = gpd.read_file("data/nightlife.geojson")
theatre_gdf = gpd.read_file("data/theatre.geojson")

### Shops

In [9]:
vintage_gdf = gpd.read_file("data/vintage.geojson")
bakery_gdf = gpd.read_file("data/bakery.geojson")

### Tourism

In [10]:
tourism_gdf = gpd.read_file("data/tourism.geojson")

### Leisure

In [11]:
sports_gdf = gpd.read_file("data/sports.geojson")

# Data Cleaning

In [12]:
sports_gdf_n = sports_gdf[['leisure','name', 'website', 'phone', 'opening_hours', 'wheelchair']]
sports_gdf_n['cuisine'] = None
sports_gdf_n['diet:vegan'] = None
sports_gdf_n['diet:vegetarian'] = None
sports_gdf_n['geometry'] =  sports_gdf['geometry'].copy()
sports_gdf_n.rename(columns={'leisure': 'interest'}, inplace=True)
#sports_gdf_n

In [13]:
coffee_shops_gdf_n = coffee_shops_gdf[['amenity','name', 'website', 'phone', 'opening_hours', 'wheelchair' ,'cuisine', 'diet:vegan', 'diet:vegetarian', 'geometry']]
#coffee_shops_gdf_n.head()

In [14]:
restaurants_gdf_n = restaurants_gdf[['amenity','name', 'website', 'phone', 'opening_hours', 'wheelchair','cuisine', 'diet:vegan', 'diet:vegetarian', 'geometry']]
#restaurants_gdf_n

In [15]:
nightlife_gdf_n = nightlife_gdf[['amenity','name', 'website', 'phone', 'opening_hours', 'wheelchair','cuisine', 'diet:vegan', 'diet:vegetarian', 'geometry']]
#nightlife_gdf_n

In [16]:
theatre_gdf_n = theatre_gdf[['amenity','name', 'website', 'phone', 'opening_hours', 'wheelchair']]
theatre_gdf_n['cuisine'] = None
theatre_gdf_n['diet:vegan'] = None
theatre_gdf_n['diet:vegetarian'] = None
theatre_gdf_n['geometry'] = theatre_gdf['geometry'].copy()
#theatre_gdf_n

In [17]:
library_gdf_n = library_gdf[['amenity','name', 'website', 'phone', 'opening_hours', 'wheelchair']]
library_gdf_n['cuisine'] = None
library_gdf_n['diet:vegan'] = None
library_gdf_n['diet:vegetarian'] = None
library_gdf_n['geometry'] = library_gdf['geometry'].copy()
#library_gdf_n

In [18]:
dfs = [theatre_gdf_n, coffee_shops_gdf_n, library_gdf_n, nightlife_gdf_n, restaurants_gdf_n]  # List of DataFrames to concatenate

# Concatenate all the amenities
amenity_df = pd.concat(dfs, axis=0, ignore_index=True)
amenity_df.rename(columns={'amenity': 'interest'}, inplace=True)

In [19]:
#amenity_df

In [20]:
# Cleaning tourism gdf
# Add new columns with None values to the tourism_gdf
new_columns = ['cuisine', 'diet:vegan', 'diet:vegetarian']
for column in new_columns:
    tourism_gdf[column] = None

columns_to_keep = ['tourism','name', 'website','phone','opening_hours','wheelchair', 'cuisine', 'diet:vegan','diet:vegetarian','geometry']
tourism_gdf = tourism_gdf[columns_to_keep]
tourism_gdf = tourism_gdf.rename(columns={"tourism": "interest"})

#tourism_gdf.head()

In [21]:
# Cleaning the shop categories: bakery_gdf and vintage_gdf
# Define the columns to keep in the shops
columns_to_keep_shop = ['shop','name', 'website','phone','opening_hours','wheelchair', 'cuisine', 'diet:vegan','diet:vegetarian','geometry']

# Select only the specified columns
bakery_gdf = bakery_gdf[columns_to_keep_shop]
#bakery_gdf.head()

In [22]:
# Add new columns with None values to the vintage_gdf
new_columns = ['diet:vegan', 'diet:vegetarian']
for column in new_columns:
    vintage_gdf[column] = None

# Select only the specified columns
vintage_gdf = vintage_gdf[columns_to_keep_shop]
#vintage_gdf.head()

In [23]:
# Merging shop categories
shops_gdf = bakery_gdf.append(vintage_gdf, ignore_index=True)
shops_gdf = shops_gdf.rename(columns={"shop": "interest"})

In [24]:
# Merging all the data
dfs1 = [amenity_df, sports_gdf_n, shops_gdf, tourism_gdf]  # List of DataFrames to concatenate

# Concatenate all the amenities
cleaned = pd.concat(dfs1, axis=0, ignore_index=True)

In [25]:
cleaned_gdf = gpd.GeoDataFrame(cleaned, geometry='geometry')
cleaned_gdf.to_file("data/cleaned_data.geojson", driver='GeoJSON')

In [27]:
cleaned_gdf.columns

Index(['interest', 'name', 'website', 'phone', 'opening_hours', 'wheelchair',
       'cuisine', 'diet:vegan', 'diet:vegetarian', 'geometry'],
      dtype='object')

In [28]:
cleaned_gdf['interest'].unique()

array(['theatre', 'cafe', 'library', 'pub', 'bar', 'nightclub',
       'restaurant', 'sports_centre', 'bakery', 'second_hand', 'museum',
       'attraction'], dtype=object)

In [29]:
cleaned_gdf.shape

(2762, 10)