# PHARMA FRANCISE  - new locations in Paris
###  A new pharmacy chain - opportunity solutions for building location

In [None]:
# First, install and import all required libraries
import numpy as np
import pandas as pd
from geopy.geocoders import Nominatim
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium
import requests
from pandas.io.json import json_normalize

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

 ### Defining a large area - the region, the city


#### Search best region (Départements) in France by population density

##### We found the The National Institute of Statistics and Economic Studies of France
##### \(<a href='https://www.insee.fr/fr/statistiques/2119468?sommaire=2119504#departements'>L’Institut national de la statistique et des études économiques</a>\)
    
##### We downloaded the <a href='https://www.insee.fr/fr/statistiques/fichier/2387611/ensemble.xls'>France Department Population</a> and <a href='https://www.insee.fr/fr/statistiques/fichier/2387611/dep75.xls'>Paris (Ile-de-France)</a>

![](project_images/France_assembly_vote.svg "France regions") 
@ By Gtaf (fichier d'origine Naturals) - Own work d'après Naturals, CC BY-SA 4.0, https://commons.wikimedia.org/w/index.php?curid=34089638

In [None]:
# Import and display population data
france_dep_pop = pd.read_excel('https://www.insee.fr/fr/statistiques/fichier/2387611/ensemble.xls')
# Explore data
france_dep_pop.head(15)

In [None]:
# Remove unusefull rows
columns = france_dep_pop.iloc[6]
france_pop = france_dep_pop.iloc[7:,:]
france_pop.columns = columns
france_pop = france_pop.reset_index(drop=True)

In [None]:
france_pop.sort_values(by='Population totale', ascending=False)

In [None]:
department_max_pop = france_pop[france_pop['Population totale'] == france_pop['Population totale'].max()][['Nom de la région','Population totale']]

In [None]:
department_max_pop['Nom de la région']

##### On Wikipedia we found Ile-de-France population density

![](project_images/1124px-Île-de-France_region_locator_map2.svg.png "Ile-de_France region")@By Superbenjamin - Own work, CC BY-SA 4.0, https://commons.wikimedia.org/w/index.php?curid=45218020

In [None]:
# We need to import read_html
from pandas.io.html import read_html

# Read the page
page = 'https://en.wikipedia.org/wiki/%C3%8Ele-de-France#Population_density'
wikitables = read_html(page,  attrs={"class":"wikitable"})

print ("Extracted {num} wikitables".format(num=len(wikitables)))

##### Import Ile-de-France data

In [None]:
Ile_pop = pd.DataFrame(wikitables[0])
Ile_pop.head()

In [None]:
# Finding the max population city: Paris
max_pop_city = Ile_pop[Ile_pop['Population (2011)[37]'] == Ile_pop['Population (2011)[37]'].max()]['Department']
max_pop_city

#### We request the Paris coordinates

In [98]:
address = 'Paris, Île-de-France, France métropolitaine, France'

geolocator = Nominatim(user_agent="paris_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Paris are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Paris are 48.8566101, 2.3514992.


#### And the Paris Arondisments and Quarters (neighborhoods)

![](project_images/Population_density_map_of_Paris_in_2012.svg.png "Population density map of_Paris")@ By Paris 16 - Own work, CC BY-SA 4.0, https://commons.wikimedia.org/w/index.php?curid=38412578

In [145]:
page = 'https://en.wikipedia.org/wiki/Quarters_of_Paris'
wikitables = read_html(page,  attrs={"class":"wikitable"})
print ("Extracted {num} wikitables".format(num=len(wikitables)))

Extracted 1 wikitables


In [144]:
p_quarters = pd.DataFrame(wikitables[0])
p_quarters.head()

Unnamed: 0,Arrondissement(Districts),Quartiers(Quarters),Quartiers(Quarters).1,Population in1999[3],Area(hectares)[3],Map
0,"1st arrondissement(Called ""du Louvre"")",1st,Saint-Germain-l'Auxerrois,1672,86.9,
1,"1st arrondissement(Called ""du Louvre"")",2nd,Les Halles,8984,41.2,
2,"1st arrondissement(Called ""du Louvre"")",3rd,Palais-Royal,3195,27.4,
3,"1st arrondissement(Called ""du Louvre"")",4th,Place-Vendôme,3044,26.9,
4,"2nd arrondissement(Called ""de la Bourse"")",5th,Gaillon,1345,18.8,


#### And request pharmacies location in Paris area

In [91]:
import yaml

with open("./project_data/config.yaml", "r") as f:
    cfg = yaml.safe_load(f)
 
params = dict(
  client_id=cfg['client_id'],
  client_secret=cfg['client_secret'],
  v=cfg['version'],
  ll='{}, {}'.format(latitude, longitude),
  intent='match',
  radius=100000,
  categoryId = '4bf58dd8d48988d10f951735'
)

import json, requests
url = 'https://api.foursquare.com/v2/venues/explore'
resp = requests.get(url=url, params=params)

In [102]:
data = resp.json()
f_pharma_loc = data['response']['groups'][0]['items']

In [None]:
f_pharma_loc

In [None]:
pharma_venues = []
for i, x in enumerate(f_pharma_loc):
    for key, val in x.items():
        if key == 'venue':
            pharma_venues.append(val)
            
print(pharma_venues)

#### After understanding the investor intention and the importance of optimal conditions of the buildings locations we have to:

   - Defining a large area - the city, the country
   - Establishing the layers according to the investor criteria
       - areas with heavy pedestrian traffic like large intersections, malls, markets etc. Foursquare request  
       - hospitals, existing clinics. Foursquare request
       - pharmacies in the area. Foursquare request
       - operating hours. Foursquare request
   - Data Exploration, Feature engineering for optimal definition of features (criteria)
   - Classification of layers  
   - Clustering according to criteria
       - of traffic
       - of distance
           - proximity - hospitals, markets
           - away - existing pharmacies
   - View with Folium Maps
   - Presentation of the conclusions to the investor


#### The investor has choosen Paris, France

In [None]:
# Now, let's get Paris coordinates with Foursquare API
