From https://github.com/blackmad/neighborhoods/blob/master/paris.geojson we export the geojson code for Paris districts.

Note that the initial raw content would only provide one document when uploaded as a collection in MongoDB Compass. Hence, we followed the second procedure established in troubleshooting section of lectures (https://github.com/Ironhack-data-bcn-january-2023/lectures/blob/main/troubleshooting/importing-into-db/importing-geojsons-into-mongodb.md), and manually removed the first curly brackets pair of the code, in order to maintain only the list with all polygons instead of a multipolygonal dictionary. After that, we successfully uploaded the JSON file to Paris collection in MongoDB.

# With geojson not including features (imported from Mongo DB)

In [115]:
# Libraries
from pymongo import MongoClient
import os
import requests
import json
from dotenv import load_dotenv
from bson.json_util import dumps
import pandas as pd
import folium
from folium import Choropleth, Circle, Marker, Icon, Map, TileLayer
from bs4 import BeautifulSoup
import shapely
from shapely import Polygon
from shapely.geometry import shape, Point

from paris_geoqueries import *

In [48]:
load_dotenv()

True

In [49]:
# Foursquare key to get API queries.
foursquare_key = os.getenv('fsq_key')

In [2]:
# Paris collection
client = MongoClient('localhost:27017')
db = client.get_database('ironhack')
paris = db.get_collection('paris')
paris = list(paris.find())

In [113]:
# Arrondissements scraping:
url = 'https://en.wikipedia.org/wiki/Arrondissements_of_Paris'
html = requests.get(url)
soup = BeautifulSoup(html.content, "html.parser")
table = soup.find_all("table", attrs = {"class":"wikitable"})
arrondissements_info = pd.read_html(table[0].prettify())[0]
arrondissements_info['Area (km  2  )'] = arrondissements_info['Area (km  2  )'].apply(lambda x: x.split(' km')[0])
arrondissements_info

Unnamed: 0,Coat of arms,"Arrondissement (R for Right Bank , L for Left Bank )",Name,Area (km 2 ),Population (2017 estimate),Density (2017) (inhabitants per km 2 ),Peak of population,Mayor (2020–2026)
0,,1st (I er ) R Administratively part of Pa...,Louvre,5.59 km 2 (2.16 sq mi),100196,17924,before 1861,Ariel Weil ( PS )
1,,2nd (II e ) R Administratively part of Pa...,Bourse,5.59 km 2 (2.16 sq mi),100196,17924,before 1861,Ariel Weil ( PS )
2,,3rd (III e ) R Administratively part of P...,Temple,5.59 km 2 (2.16 sq mi),100196,17924,before 1861,Ariel Weil ( PS )
3,,4th (IV e ) R Administratively part of Pa...,Hôtel-de-Ville,5.59 km 2 (2.16 sq mi),100196,17924,before 1861,Ariel Weil ( PS )
4,,5th (V e ) L,Panthéon,2.541 km 2 (0.981 sq mi),59631,23477,1911,Florence Berthout ( DVD )
5,,6th (VI e ) L,Luxembourg,2.154 km 2 (0.832 sq mi),41976,19524,1911,Jean-Pierre Lecoq ( LR )
6,,7th (VII e ) L,Palais-Bourbon,4.088 km 2 (1.578 sq mi),52193,12761,1926,Rachida Dati ( LR )
7,,8th (VIII e ) R,Élysée,3.881 km 2 (1.498 sq mi),37368,9631,1891,Jeanne d'Hauteserre ( LR )
8,,9th (IX e ) R,Opéra,2.179 km 2 (0.841 sq mi),60071,27556,1901,Delphine Bürkli ( DVD )
9,,10th (X e ) R,Entrepôt,2.892 km 2 (1.117 sq mi),90836,31431,1881,Alexandra Cordebard ( PS )


In [4]:
paris

[{'_id': ObjectId('63e7a74af7a1278c06ef1757'),
  'type': 'Feature',
  'properties': {'name': 'Bourse',
   'cartodb_id': 2,
   'created_at': '2013-02-26T07:07:16.384Z',
   'updated_at': '2013-02-26T18:36:18.682Z'},
  'geometry': {'type': 'MultiPolygon',
   'coordinates': [[[[2.339999, 48.87196],
      [2.34789, 48.870689],
      [2.35433, 48.869308],
      [2.350979, 48.863411],
      [2.330292, 48.868294],
      [2.328211, 48.86972],
      [2.328072, 48.869923],
      [2.339999, 48.87196]]]]}},
 {'_id': ObjectId('63e7a74af7a1278c06ef1758'),
  'type': 'Feature',
  'properties': {'name': 'Temple',
   'cartodb_id': 3,
   'created_at': '2013-02-26T07:07:16.384Z',
   'updated_at': '2013-02-26T18:36:24.060Z'},
  'geometry': {'type': 'MultiPolygon',
   'coordinates': [[[[2.36236, 48.867905],
      [2.364764, 48.866436],
      [2.366694, 48.86319],
      [2.368454, 48.85582],
      [2.364335, 48.856441],
      [2.361631, 48.857262],
      [2.358626, 48.858757],
      [2.356825, 48.860111],
   

In [197]:
district_list = [paris[i]['properties']['name'] for i in range(len(paris))]
print(district_list)

['Bourse', 'Temple', 'Panthéon', 'Luxembourg', 'Palais-Bourbon', 'Élysée', 'Opéra', 'Enclos-St-Laurent', 'Popincourt', 'Gobelins', 'Observatoire', 'Vaugirard', 'Passy', 'Batignolles-Monceau', 'Butte-Montmartre', 'Buttes-Chaumont', 'Louvre', 'Hôtel-de-Ville', 'Reuilly', 'Ménilmontant']


In [14]:
# Feature geojson is the one used to plot
with open('feature.geojson') as geo_file:
    geo_feature = json.load(geo_file)

# ON THE MAP, AFTER MAKING SOME DATAFRAME WITH VARIABLES AND FREQUENCIES AND DEFINING THE PONDERATION, PLOT COLORED DISTRICTS WITH SOME SCALE!

In [202]:
paris_map = Map(location = [48.86, 2.35], zoom_start = 11)
folium.Choropleth(
    geo_data=geo_feature,
    key_on="feature.properties.name",
).add_to(paris_map)
paris_map

# FOURSQUARE API REQUESTS!!! 
WILL PROVIDE SOME COORDINATES THAT WILL HAVE TO BE PASSED TO DISTRICT CHECKER!

In [173]:
def foursquare_query (query, place, limit=10):
    url = f"https://api.foursquare.com/v3/places/search?query={query}&near={place}&limit={limit}"

    headers = {
        "accept": "application/json",
        "Authorization": foursquare_key
    }

    # full response
    response = requests.get(url, headers=headers).json()['results']
    # name of the establishment
    response[0]['name']
    # coordinates
    response[0]['geocodes']['main']['latitude']
    response[0]['geocodes']['main']['longitude']

    request_points = []
    for i in range(len(response)):
        request_points.append(Point(response[i]['geocodes']['main']['longitude'], response[i]['geocodes']['main']['latitude']))

    d = {'Coordinates': request_points, 'Name': response[i]['name'], 'Type': response[i]['categories'][0]['name']}
    df = pd.DataFrame(data=d)
    return df

In [174]:
foursquare_query('Starbucks', 'Paris', 2)

Unnamed: 0,Coordinates,Name,Type
0,POINT (2.354761 48.856877),Starbucks,Coffee Shop
1,POINT (2.348125 48.858843),Starbucks,Coffee Shop


In [150]:
dogs = foursquare_query('Dog hairdresser', 'Paris', 5)


In [222]:
party = foursquare_query('Party clubs', 'Paris', 50)

In [223]:
party

Unnamed: 0,Coordinates,Name,Type
0,POINT (2.344213 48.864026),Federation Francoph des Clubs Pyramide Ffcp,Organization
1,POINT (2.353804 48.845937),Federation Francoph des Clubs Pyramide Ffcp,Organization
2,POINT (2.375769 48.853512),Federation Francoph des Clubs Pyramide Ffcp,Organization
3,POINT (2.28846 48.876257),Federation Francoph des Clubs Pyramide Ffcp,Organization
4,POINT (2.388956 48.896342),Federation Francoph des Clubs Pyramide Ffcp,Organization
5,POINT (2.347822 48.851513),Federation Francoph des Clubs Pyramide Ffcp,Organization
6,POINT (2.303828 48.902613),Federation Francoph des Clubs Pyramide Ffcp,Organization
7,POINT (2.361356 48.853002),Federation Francoph des Clubs Pyramide Ffcp,Organization
8,POINT (2.346487 48.871101),Federation Francoph des Clubs Pyramide Ffcp,Organization
9,POINT (2.368574 48.867735),Federation Francoph des Clubs Pyramide Ffcp,Organization


# IMPORTANT PIECE OF CODE: CHECK IF A POINT BELONGS TO A DISTRICT!!!

In [213]:
def spot_finder (df): # accepts as much df as variables of interest for the new office location.
    '''
    Function that counts instances per district in Paris.
    Takes the dataframe obtained in the 4 square geoquery
    Returns a dictionary with the count of establishments per district.
    '''

    # We append the list of districts as keys in a dict, and set a default value of 0 for each key.
    district_list = [paris[i]['properties']['name'] for i in range(len(paris))]
    dict_count = {}
    for i in district_list:
        dict_count[i] = 0

    with open('feature.geojson') as geo_file:
        geo_feature = json.load(geo_file)

    # Iteration through each pair of coordinates to see what Paris district they match, being districts defined in geo_feature file.
    for establishment in df['Coordinates']:
        for feature in geo_feature['features']:
            polygon = shape(feature['geometry'])
            if polygon.contains(establishment):
                # Addition to the correspondent district in case the coordinate matches.
                if feature["properties"]["name"] in dict_count:
                    dict_count[feature["properties"]["name"]] += 1

    count_df = pd.DataFrame.from_dict(dict_count, orient="index").reset_index(drop=False)
    count_df.rename(columns={'index': 'District', 0: 'Count'}, inplace=True, errors='raise')

    return count_df

In [224]:
party_district_distribution = spot_finder (party)
party_district_distribution

Unnamed: 0,District,Count
0,Bourse,1
1,Temple,0
2,Panthéon,2
3,Luxembourg,0
4,Palais-Bourbon,0
5,Élysée,2
6,Opéra,2
7,Enclos-St-Laurent,0
8,Popincourt,2
9,Gobelins,4


In [226]:
# Feature geojson is the one used to plot
def district_distribution (count_df):
    '''
    Function that plots the establishments distribution in Paris' districts.
    Takes the count dataframe obtained in the function above, with the count of establishments per district
    Returns the map plot of this distribution.
    '''
    paris_map = Map(location = [48.86, 2.35], zoom_start = 11)
    folium.Choropleth(
        geo_data=geo_feature,
        data=count_df,
        columns=count_df.columns,
        key_on="feature.properties.name",
    ).add_to(paris_map)
    
    return paris_map

In [227]:
district_distribution(party_district_distribution)

# Distance criteria
Airports, dog hairdresser and basketball stadiums

In [247]:
def distance_criteria (query, limit=10):
    



    test_list = [distance_query(query, district, limit) for district in district_list]
    return test_list


In [248]:
x = distance_criteria('Airport', 3)

In [333]:
def distance_criteria (query):
    '''
    Function defined to run into distance criteria function, running specific queries at 4 square to get distances from
    each district to the place queried.
    It takes the query as argument
    Returns a dataframe with the distances ordered and the attribution of points according to the ponderation system.
    '''

    # created a dict that stores the center point of each district, to take it as reference for distances
    district_centre_dict = {}
    for feature in geo_feature['features']:
        polygon = shape(feature['geometry'])
        district_centre_dict[feature['properties']['name']] = (polygon.centroid.y,polygon.centroid.x)

    # dict to store distances from district center to closest queried place
    distance_from_centre = {}

    # url for the API query
    for district, centre_point in district_centre_dict.items():
        url = f"https://api.foursquare.com/v3/places/search?query={query}&ll={centre_point[0]}%2C{centre_point[1]}&sort=DISTANCE&limit=1"

        headers = {
            "accept": "application/json",
            "Authorization": foursquare_key
        }
        # full response
        dist_resp = requests.get(url, headers=headers).json()['results'][0]['distance']

        # we get the distance of first response since we sort by distance and the criteria here is to get closest queried items.
        distance_from_centre[district] = dist_resp

    distance_from_centre = pd.DataFrame.from_dict(distance_from_centre, orient='index').reset_index(drop=False)
    distance_from_centre.rename(columns = {'index': 'District', 0: 'Distance'}, inplace = True, errors = 'raise')
    distance_from_centre.sort_values(by = ['Distance'], ascending = False, inplace = True)
    distance_from_centre.reset_index(inplace = True, drop = True)
    distance_from_centre.reset_index(inplace = True, drop = False)
    distance_from_centre.rename(columns = {'index': 'Points'}, inplace = True, errors = 'raise')

    return distance_from_centre

In [334]:
dist_from_center_airport = distance_criteria('Airport')

In [335]:
dist_from_center_airport

Unnamed: 0,Points,District,Distance
0,0,Reuilly,2421
1,1,Butte-Montmartre,1398
2,2,Buttes-Chaumont,1291
3,3,Panthéon,1110
4,4,Luxembourg,1062
5,5,Popincourt,1056
6,6,Vaugirard,851
7,7,Opéra,842
8,8,Batignolles-Monceau,833
9,9,Palais-Bourbon,714
