# The notebook:
 * Load city equipment data from <a href="https://api.paris.fr">Paris API</a> and <a href="http://donnees.ville.montreal.qc.ca/dataset?res_format=JSON">Montreal API</a>
 * Clean data on csv files with header ("id","name","lat","lon","address","zipCode","websiteUrl","id_cat","cat","city","type")
 * Concatenation of the dataset
 * Save the data in a csv file

** Table of content**
 * [1) Paris API](#parisapi)
 * [2) Montreal API](#montrealapi)
 * [3) Concatenation](#concatenation)

# <a id="parisapi">1) Paris API<a>

In [1]:
import pandas as pd
import requests

In [2]:
def get_equipement(id_):
    equipement = requests.get(url="https://api.paris.fr/api/data/1.0/Equipements/get_equipement/?token="+
                             token+"&id="+str(id_)).json()["data"]
    return equipement

def get_websiteurl(name,id_):
    return "http://equipement.paris.fr/"+name.lower().replace(" ","-").replace("'","-")+"-"+id_

def get_dataframe_equipements(id_cat,cat):
    equipements = requests.get(
                                  url="https://api.paris.fr/api/data/1.0/Equipements/get_equipements/?token="+
                                   token+"&cid="+id_cat+"&offset=1&limit=1000").json()["data"]
    equipements_ =  [[e["idequipements"],e["name"],e["lat"],e["lon"],e["address"],
                     e["zipCode"],get_websiteurl(e["name"],str(e["idequipements"])),id_cat,cat]
                     for e in [get_equipement(int(x["id"]))[0] for x in equipements]]
    return pd.DataFrame(data=equipements_ , 
                        columns=["id","name","lat","lon","address","zipCode","websiteUrl","id_cat","cat"])

In [3]:
# Put the token of paris api
token="fbce1d372430cfac0c1abbd73323fdf840d76b44b697b039576815a278b8f883"

In [4]:
categories = requests.get(url="https://api.paris.fr/api/data/1.0/Equipements/get_categories/?token="+token)
for c in categories.json()["data"]:
    if ("piscine" in c["name"].lower()):
        print ("id", c["idcategories"],c["name"])
    if ("tennis" in c["name"].lower()):
        print ("id", c["idcategories"],c["name"])

('id', 27, u'Piscines')
('id', 280, u'Tennis')


In [5]:
categories.json()["data"][:3]

[{u'idcategories': 47, u'name': u'Ateliers Beaux-Arts'},
 {u'idcategories': 329, u'name': u'Autres lieux'},
 {u'idcategories': 332, u'name': u'Bains-douches'}]

In [6]:
piscines_paris = requests.get(url="https://api.paris.fr/api/data/1.0/Equipements/get_equipements/?token="+
                                token+"&cid=27&offset=1&limit=1000").json()["data"]
tennis_paris = requests.get(url="https://api.paris.fr/api/data/1.0/Equipements/get_equipements/?token="+
                                token+"&cid=280&offset=1&limit=1000").json()["data"]

In [7]:
print(get_equipement("2919")[0]["name"])
print(get_equipement("2968")[0]["name"])

Piscine Saint-Germain
Tennis Château des Rentiers


In [8]:
print ("number of piscines", len(piscines_paris))
print ("number of tennis", len(tennis_paris))

('number of piscines', 37)
('number of tennis', 39)


## Create dataframe

In [9]:
df_piscines_paris = get_dataframe_equipements("27","Swimming Pool")
df_tennis_paris = get_dataframe_equipements("280","Tennis")

In [10]:
df_piscines_paris.to_csv("../data_api/paris_api/piscines.csv",index=False,encoding="utf-8")
df_tennis_paris.to_csv("../data_api/paris_api/tennis.csv",index=False,encoding="utf-8")

## Create column state (Visited/Not Yet visited)

In [11]:
df_concat_paris = pd.concat([df_piscines_paris,df_tennis_paris])
l_visited=[ 2916,  2918,  2919,  2920,  2921,  2923,  2924,  2925,  2926,
        2927,  2928,  2929,  2930,  2931,  2932,  2933,  2934,
        2936,  2937,  2939,  2941,
        2945,  2946,  2947,  2948,  2949,  2950,  3324,  3325,  4012,
        5041, 17349, 2940, 2935, 2967]
df_concat_paris["state"]="Not yet visited"
df_concat_paris.loc[df_concat_paris["id"].isin(l_visited),"state"]="Visited"
df_concat_paris["city"]="Paris"

# <a id='montrealapi'>2) Montreal API</a>

In [12]:
df_piscines_montreal = pd.read_csv("../data_api/montreal_api/piscines.csv")

In [13]:
df_piscines_mnew = df_piscines_montreal[df_piscines_montreal["TYPE"]
                     .isin(["Piscine intérieure","Piscine extérieure",
                            "Piscine extérieure, Jeux d'eau"])].drop_duplicates(["LONG","LAT"])
df_piscines_mnew["ID_UEV"]= [str(i)+"m"+str(idx) for idx,i in enumerate(df_piscines_mnew["ID_UEV"].values)]
df_piscines_mnew= df_piscines_mnew[["ID_UEV","NOM","LAT","LONG","ADRESSE","ARRONDISSE","TYPE"]]
df_piscines_mnew.columns=["id","name","lat","lon","address","zipCode","type"]
df_piscines_mnew["cat"]="Swimming Pool"
df_piscines_mnew["state"]="Not yet visited"
df_piscines_mnew["city"]="Montréal"

# <a id='concatenation'>3) Concatenation</a>

In [14]:
df_concat_cityequip = pd.concat([df_concat_paris,df_piscines_mnew])
df_concat_cityequip= df_concat_cityequip[["id","name","lat","lon","address",
                                          "zipCode","websiteUrl","id_cat","cat","type","city","state"]]

In [16]:
df_concat_cityequip.to_csv("../data_website_equipments/csv/equipments.csv",index=False,encoding="utf-8")
