# PHARMA FRANCISE  - new locations in Paris
###  A new pharmacy chain - opportunity solutions for building location

In [162]:
# First, install and import all required libraries
import numpy as np
import pandas as pd
from geopy.geocoders import Nominatim
import geocoder # to get coordinates

import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium
import requests
from pandas.io.json import json_normalize

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

 ### Defining a large area - the region, the city


#### Search best region (Départements) in France by population density

##### We found the The National Institute of Statistics and Economic Studies of France
##### \(<a href='https://www.insee.fr/fr/statistiques/2119468?sommaire=2119504#departements'>L’Institut national de la statistique et des études économiques</a>\)
    
##### We downloaded the <a href='https://www.insee.fr/fr/statistiques/fichier/2387611/ensemble.xls'>France Department Population</a> and <a href='https://www.insee.fr/fr/statistiques/fichier/2387611/dep75.xls'>Paris (Ile-de-France)</a>

![](project_images/France_assembly_vote.svg "France regions") 
@ By Gtaf (fichier d'origine Naturals) - Own work d'après Naturals, CC BY-SA 4.0, https://commons.wikimedia.org/w/index.php?curid=34089638

In [None]:
# Import and display population data
france_dep_pop = pd.read_excel('https://www.insee.fr/fr/statistiques/fichier/2387611/ensemble.xls')
# Explore data
france_dep_pop.head(15)

In [None]:
# Remove unusefull rows
columns = france_dep_pop.iloc[6]
france_pop = france_dep_pop.iloc[7:,:]
france_pop.columns = columns
france_pop = france_pop.reset_index(drop=True)

In [None]:
france_pop.sort_values(by='Population totale', ascending=False)

In [None]:
department_max_pop = france_pop[france_pop['Population totale'] == france_pop['Population totale'].max()][['Nom de la région','Population totale']]

In [None]:
department_max_pop['Nom de la région']

##### On Wikipedia we found Ile-de-France population density

![](project_images/1124px-Île-de-France_region_locator_map2.svg.png "Ile-de_France region")@By Superbenjamin - Own work, CC BY-SA 4.0, https://commons.wikimedia.org/w/index.php?curid=45218020

In [None]:
# We need to import read_html
from pandas.io.html import read_html

# Read the page
page = 'https://en.wikipedia.org/wiki/%C3%8Ele-de-France#Population_density'
wikitables = read_html(page,  attrs={"class":"wikitable"})

print ("Extracted {num} wikitables".format(num=len(wikitables)))

##### Import Ile-de-France data

In [None]:
Ile_pop = pd.DataFrame(wikitables[0])
Ile_pop.head()

In [None]:
# Finding the max population city: Paris
max_pop_city = Ile_pop[Ile_pop['Population (2011)[37]'] == Ile_pop['Population (2011)[37]'].max()]['Department']
max_pop_city

#### We request the Paris coordinates

In [98]:
address = 'Paris, Île-de-France, France métropolitaine, France'

geolocator = Nominatim(user_agent="paris_explorer")
location = geolocator.geocode(address)
paris_latitude = location.latitude
paris_longitude = location.longitude
print('The geograpical coordinate of Paris are {}, {}.'.format(paris_latitude, paris_longitude))

The geograpical coordinate of Paris are 48.8566101, 2.3514992.


In [193]:
paris_latitude = location.latitude
paris_longitude = location.longitude

#### And the Paris Arondisments and Quarters (neighborhoods)

![](project_images/Population_density_map_of_Paris_in_2012.svg.png "Population density map of_Paris")@ By Paris 16 - Own work, CC BY-SA 4.0, https://commons.wikimedia.org/w/index.php?curid=38412578

In [156]:
page = 'https://en.wikipedia.org/wiki/Quarters_of_Paris'
wikitables = read_html(page,  attrs={"class":"wikitable"})
print ("Extracted {num} wikitables".format(num=len(wikitables)))

Extracted 1 wikitables


In [182]:
p_quarters = pd.DataFrame(wikitables[0])
p_quarters.head()

Unnamed: 0,Arrondissement(Districts),Quartiers(Quarters),Quartiers(Quarters).1,Population in1999[3],Area(hectares)[3],Map
0,"1st arrondissement(Called ""du Louvre"")",1st,Saint-Germain-l'Auxerrois,1672,86.9,
1,"1st arrondissement(Called ""du Louvre"")",2nd,Les Halles,8984,41.2,
2,"1st arrondissement(Called ""du Louvre"")",3rd,Palais-Royal,3195,27.4,
3,"1st arrondissement(Called ""du Louvre"")",4th,Place-Vendôme,3044,26.9,
4,"2nd arrondissement(Called ""de la Bourse"")",5th,Gaillon,1345,18.8,


In [184]:
quarters = p_quarters['Quartiers(Quarters).1']
p_quarters = pd.DataFrame(quarters)

In [185]:
columns = ['Quarters']
p_quarters.columns = columns

In [165]:
def get_latlng(quarter):
    # initialize your variable to None
    lat_lng_coords = None
    # loop until you get the coordinates
    while(lat_lng_coords is None):
        g = geocoder.arcgis('{}, Paris, France'.format(quarter))
        lat_lng_coords = g.latlng
    return lat_lng_coords
coords = [ get_latlng(quarter) for quarter in list(p_quarters) ]

In [187]:
quarters_coords = pd.DataFrame(coords, columns=['Latitude', 'Longitude'])

In [188]:
# Now, merge the coordinates into the original dataframe
p_quarters['Latitude'] = quarters_coords['Latitude']
p_quarters['Longitude'] = quarters_coords['Longitude']

In [192]:
print(p_quarters.shape)
p_quarters.head()

(80, 3)


Unnamed: 0,Quarters,Latitude,Longitude
0,Saint-Germain-l'Auxerrois,48.85971,2.34024
1,Les Halles,48.86319,2.34201
2,Palais-Royal,48.8635,2.33876
3,Place-Vendôme,48.86778,2.33011
4,Gaillon,48.86902,2.33445


In [191]:
# Save the Quarters as .csv
p_quarters.to_csv("./project_data/p_quarters.csv", index=False)

#### Let's create a map of Paris quarters using Folium

In [206]:
map_quarters = folium.Map(location=[paris_latitude, paris_longitude], zoom_start=12, tiles="OpenStreetMap")

# add markers to map
for lat, lng, neighborhood in zip(p_quarters['Latitude'], p_quarters['Longitude'], p_quarters['Quarters']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='red',
        fill=True,
        fill_color='#FF0000', #3186cc

        fill_opacity=0.4).add_to(map_quarters)  
    
map_quarters

In [207]:
# And save the map as HTML file
map_quarters.save('./project_maps/map_quarters.html')

In [208]:
import yaml

with open("./project_data/config.yaml", "r") as f:
    cfg = yaml.safe_load(f)

In [219]:
CLIENT_ID=cfg['client_id']
CLIENT_SECRET=cfg['client_secret']
VERSION=cfg['version']
radius = 2000
LIMIT = 150

venues = []

for latitude, longitude, neighborhood in zip(p_quarters['Latitude'], p_quarters['Longitude'], p_quarters['Quarters']):
    
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        latitude, 
        longitude,
        radius, 
        LIMIT)

    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    for venue in results:
        venues.append((
            neighborhood,
            latitude, 
            longitude, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))

In [220]:
venues_df = pd.DataFrame(venues)

venues_df.columns = ['Neighborhood', 'Latitude', 'Longitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']

print(venues_df.shape)
venues_df.head()

(7803, 7)


Unnamed: 0,Neighborhood,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,Saint-Germain-l'Auxerrois,48.85971,2.34024,Cour Carrée du Louvre,48.86036,2.338543,Pedestrian Plaza
1,Saint-Germain-l'Auxerrois,48.85971,2.34024,Place du Louvre,48.859841,2.340822,Plaza
2,Saint-Germain-l'Auxerrois,48.85971,2.34024,La Vénus de Milo (Vénus de Milo),48.859943,2.337234,Exhibit
3,Saint-Germain-l'Auxerrois,48.85971,2.34024,Pont des Arts,48.858565,2.337635,Bridge
4,Saint-Germain-l'Auxerrois,48.85971,2.34024,Musée du Louvre,48.860847,2.33644,Art Museum


In [221]:
# Save the venues as .csv
venues_df.to_csv("./project_data/venues_df.csv", index=False)

In [None]:
venues_df.groupby(["Neighborhood"]).count()

In [242]:

pharma_no = len(list((x for x in venues_df['VenueCategory'].tolist() if x == 'Pharmacy')))
pharma_no

11

#### 11 are not enough. Now let's search(request) for pharmacies in every neighborhood

In [286]:
CLIENT_ID=cfg['client_id']
CLIENT_SECRET=cfg['client_secret']
VERSION=cfg['version']
radius = 2000
LIMIT = 150
categoryId = '4bf58dd8d48988d10f951735' # Pharmacies Id

venues = []

for latitude, longitude, neighborhood in zip(p_quarters['Latitude'], p_quarters['Longitude'], p_quarters['Quarters']):
    
    url = "https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}&categoryId={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        latitude, 
        longitude,
        radius, 
        LIMIT,
        categoryId
)

    results = requests.get(url).json()['response']['venues']
    
    for venue in results:
        venues.append((
            neighborhood,
            latitude, 
            longitude, 
            venue['name'], 
            venue['location']['lat'], 
            venue['location']['lng']))

In [289]:
all_neighb_pharma_df = pd.DataFrame(venues)

all_neighb_pharma_df.columns = ['Neighborhood', 'Latitude', 'Longitude', 'VenueName', 'VenueLatitude', 'VenueLongitude']

print(all_neighb_pharma_df.shape)
all_neighb_pharma_df.head()

(3666, 6)


Unnamed: 0,Neighborhood,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude
0,Saint-Germain-l'Auxerrois,48.85971,2.34024,Pharmacie du Pont Neuf,48.860037,2.345189
1,Saint-Germain-l'Auxerrois,48.85971,2.34024,Pharmacie de la Place de la République,48.86646,2.364269
2,Saint-Germain-l'Auxerrois,48.85971,2.34024,Plus Pharmacie Bader,48.852454,2.343392
3,Saint-Germain-l'Auxerrois,48.85971,2.34024,City Pharma,48.852754,2.333343
4,Saint-Germain-l'Auxerrois,48.85971,2.34024,Pharmacie des Galeries,48.873959,2.331094


In [291]:
all_neighb_pharma_df['VenueCategory'] = 'Pharmacy'


#### 3666 are enough, aren't it? NO, it's a joke. Because it contains many many duplicates. Let's remove them!

In [313]:
print(all_neighb_pharma_df.shape)
all_neighb_pharma_df.drop_duplicates(subset='VenueName', keep='first', inplace=True)
print(all_neighb_pharma_df.shape)

(3666, 7)
(375, 7)


In [314]:
# Now, save all pharmacies as .csv
all_neighb_pharma_df.to_csv("./project_data/all_neighb_pharma_df.csv", index=False)

#### Now, search for Hospitals in Paris!

In [294]:
CLIENT_ID=cfg['client_id']
CLIENT_SECRET=cfg['client_secret']
VERSION=cfg['version']
radius = 2000
LIMIT = 150
categoryId = '4bf58dd8d48988d196941735' # Hospitals Id

venues = []

for latitude, longitude, neighborhood in zip(p_quarters['Latitude'], p_quarters['Longitude'], p_quarters['Quarters']):
    
    url = "https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}&categoryId={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        latitude, 
        longitude,
        radius, 
        LIMIT,
        categoryId
)

    results = requests.get(url).json()['response']['venues']
    
    for venue in results:
        venues.append((
            neighborhood,
            latitude, 
            longitude, 
            venue['name'], 
            venue['location']['lat'], 
            venue['location']['lng']))

In [298]:
hospitals_df = pd.DataFrame(venues)

hospitals_df.columns = ['Neighborhood', 'Latitude', 'Longitude', 'VenueName', 'VenueLatitude', 'VenueLongitude']

print(hospitals_df.shape)
hospitals_df.head()

(2782, 6)


Unnamed: 0,Neighborhood,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude
0,Saint-Germain-l'Auxerrois,48.85971,2.34024,Clinique du Louvre,48.859652,2.341171
1,Saint-Germain-l'Auxerrois,48.85971,2.34024,American Hospital of Paris,48.870697,2.331678
2,Saint-Germain-l'Auxerrois,48.85971,2.34024,Hospital Nnecker,48.870183,2.351002
3,Saint-Germain-l'Auxerrois,48.85971,2.34024,Centre MST - Hôpital Saint-Louis,48.873544,2.365277
4,Saint-Germain-l'Auxerrois,48.85971,2.34024,Urgences Médico Judiciaires - Hôpital Hôtel-Dieu,48.854185,2.347411


In [299]:
hospitals_df['VenueCategory'] = 'Hospital'

In [315]:
# Drop duplicates
hospitals_df.drop_duplicates(subset='VenueName', keep='first', inplace=True)
hospitals_df.shape

(247, 7)

In [316]:
# Save hospitals as .csv
hospitals_df.to_csv("./project_data/hospitals_df.csv", index=False)

#### Now, let's merge the dataframes

In [317]:
all_venues_pharma_hosp = pd.concat([all_neighb_pharma_df, hospitals_df, venues_df])

In [318]:
print(all_venues_pharma_hosp.shape)
print(all_venues_pharma_hosp.head())

(8425, 7)
                Neighborhood  Latitude  Longitude  \
0  Saint-Germain-l'Auxerrois  48.85971    2.34024   
1  Saint-Germain-l'Auxerrois  48.85971    2.34024   
2  Saint-Germain-l'Auxerrois  48.85971    2.34024   
3  Saint-Germain-l'Auxerrois  48.85971    2.34024   
4  Saint-Germain-l'Auxerrois  48.85971    2.34024   

                                VenueName  VenueLatitude  VenueLongitude  \
0                  Pharmacie du Pont Neuf      48.860037        2.345189   
1  Pharmacie de la Place de la République      48.866460        2.364269   
2                    Plus Pharmacie Bader      48.852454        2.343392   
3                             City Pharma      48.852754        2.333343   
4                  Pharmacie des Galeries      48.873959        2.331094   

  VenueCategory  
0      Pharmacy  
1      Pharmacy  
2      Pharmacy  
3      Pharmacy  
4      Pharmacy  


#### After understanding the investor intention and the importance of optimal conditions of the buildings locations we have to:

   - Defining a large area - the city, the country
   - Establishing the layers according to the investor criteria
       - areas with heavy pedestrian traffic like large intersections, malls, markets etc. Foursquare request  
       - hospitals, existing clinics. Foursquare request
       - pharmacies in the area. Foursquare request
       - operating hours. Foursquare request
   - Data Exploration, Feature engineering for optimal definition of features (criteria)
   - Classification of layers  
   - Clustering according to criteria
       - of traffic
       - of distance
           - proximity - hospitals, markets
           - away - existing pharmacies
   - View with Folium Maps
   - Presentation of the conclusions to the investor
