## Imports

In [1]:
import requests
import pandas as pd
import numpy as np
import folium
from folium.plugins import MarkerCluster

import matplotlib.pyplot as plt

In [2]:
# !curl "https://opendata.tpg.ch/api/explore/v2.1/catalog/datasets/montees-par-arret-par-ligne/exports/parquet?lang=fr&timezone=Europe%2FBerlin" >> "../data/montees-par-arret-par-ligne.parquet"

In [3]:
# !curl "https://opendata.tpg.ch/api/explore/v2.1/catalog/datasets/montees-par-arret-par-ligne/exports/json?lang=fr&timezone=Europe%2FBerlin" >> "../data/montees-par-arret-par-ligne.json"

In [4]:
# !curl "https://opendata.tpg.ch/api/explore/v2.1/catalog/datasets/arrets/exports/json?lang=fr&timezone=Europe%2FBerlin" >> "../data/arrets.json"

## Montees par arret

In [5]:
data_montees = pd.read_parquet('../data/montees-par-arret-par-ligne.parquet')
data_montees.head()

Unnamed: 0,date,ligne,ligne_type_act,jour_semaine,horaire_type,arret,arret_code_long,indice_semaine,indice_jour_semaine,nb_de_montees,nb_de_descentes,mois_annee,coordonnees
0,2023-03-20,55,SECONDAIRE,1-Lundi,NORMAL,Vireloup,VRLP01,12,1,9.63,0.0,2023-03,b'\x01\x01\x00\x00\x00\xfbw}\xe6\xac\x7f\x18@w...
1,2023-03-20,56,SECONDAIRE,1-Lundi,NORMAL,Bergère,BRGE01,12,1,16.37,1.0,2023-03,b'\x01\x01\x00\x00\x00dw\x81\x92\x02;\x18@\xc0...
2,2023-03-20,56,SECONDAIRE,1-Lundi,NORMAL,Blandonnet,BLDO01,12,1,64.38,12.81,2023-03,b'\x01\x01\x00\x00\x00\x0b\x99+\x83jc\x18@/\x8...
3,2023-03-20,56,SECONDAIRE,1-Lundi,NORMAL,Hôpital de La Tour,HTOU04,12,1,23.16,2.07,2023-03,b'\x01\x01\x00\x00\x00\xa8\xc8!\xe2\xe6D\x18@\...
4,2023-03-20,56,SECONDAIRE,1-Lundi,NORMAL,ICC,ICC00,12,1,0.0,2.96,2023-03,b'\x01\x01\x00\x00\x00\xe7p\xad\xf6\xb0g\x18@\...


In [6]:
data_montees.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3847795 entries, 0 to 3847794
Data columns (total 13 columns):
 #   Column               Dtype  
---  ------               -----  
 0   date                 object 
 1   ligne                object 
 2   ligne_type_act       object 
 3   jour_semaine         object 
 4   horaire_type         object 
 5   arret                object 
 6   arret_code_long      object 
 7   indice_semaine       int64  
 8   indice_jour_semaine  int64  
 9   nb_de_montees        float64
 10  nb_de_descentes      float64
 11  mois_annee           object 
 12  coordonnees          object 
dtypes: float64(2), int64(2), object(9)
memory usage: 381.6+ MB


In [7]:
data_montees.isnull().sum()

date                    0
ligne                   0
ligne_type_act          0
jour_semaine            0
horaire_type            0
arret                   0
arret_code_long         0
indice_semaine          0
indice_jour_semaine     0
nb_de_montees           0
nb_de_descentes         0
mois_annee              0
coordonnees            29
dtype: int64

### Arrets

In [8]:
data_arrets = pd.read_json('../data/arrets.json')
data_arrets.head()

Unnamed: 0,arretcodelong,nomarret,commune,pays,codedidoc,coordonnees,actif
0,_BADNF,Bardonnex Douane - F,SAINT-JULIEN-EN-GENEVOIS,FR,,"{'lon': 6.096618, 'lat': 46.142014}",N
1,_CANDF,Bois Candide-Dne - F,FERNEY-VOLTAIRE,FR,,"{'lon': 6.092343, 'lat': 46.243755}",Y
2,_CZDNF,Croix-de-Rozon-Dne - F,COLLONGES-SOUS-SALÈVE,FR,,"{'lon': 6.137984, 'lat': 46.143688}",N
3,_DOSOF,Soral-Dne - F,VIRY,FR,,"{'lon': 6.03604, 'lat': 46.136703}",Y
4,_GSDNS,Grand-Saconnex-Dne - CH,LE GRAND-SACONNEX,CH,,"{'lon': 6.120933, 'lat': 46.24839}",Y


In [9]:
# transform coordinates
data_arrets["coordonnees_lon"] = data_arrets["coordonnees"].apply(lambda x : x['lon'] if x is not None else None)
data_arrets["coordonnees_lat"] = data_arrets["coordonnees"].apply(lambda x : x['lat'] if x is not None else None)
data_arrets.drop(columns='coordonnees', inplace=True)

# replace actif by boolean

data_arrets.actif = data_arrets.actif.map({"Y":True, "N":False})

In [10]:
data_arrets

Unnamed: 0,arretcodelong,nomarret,commune,pays,codedidoc,actif,coordonnees_lon,coordonnees_lat
0,_BADNF,Bardonnex Douane - F,SAINT-JULIEN-EN-GENEVOIS,FR,,False,6.096618,46.142014
1,_CANDF,Bois Candide-Dne - F,FERNEY-VOLTAIRE,FR,,True,6.092343,46.243755
2,_CZDNF,Croix-de-Rozon-Dne - F,COLLONGES-SOUS-SALÈVE,FR,,False,6.137984,46.143688
3,_DOSOF,Soral-Dne - F,VIRY,FR,,True,6.036040,46.136703
4,_GSDNS,Grand-Saconnex-Dne - CH,LE GRAND-SACONNEX,CH,,True,6.120933,46.248390
...,...,...,...,...,...,...,...,...
4396,ZIPL03,ZIPLO,PLAN-LES-OUATES,CH,8593073.0,True,6.101779,46.165997
4397,ZIPL99,ZIPLO,PLAN-LES-OUATES,CH,8593073.0,False,6.103465,46.167000
4398,ZIPLO02,,,CH,,False,,
4399,ZMON01,ZI de Montréal,VILLE-LA-GRAND,FR,8595870.0,False,6.277687,46.203590


In [11]:
data_arrets.fillna(value=np.nan, inplace=True)

In [12]:
data_arrets.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4401 entries, 0 to 4400
Data columns (total 8 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   arretcodelong    4401 non-null   object 
 1   nomarret         4382 non-null   object 
 2   commune          4326 non-null   object 
 3   pays             4401 non-null   object 
 4   codedidoc        3746 non-null   float64
 5   actif            4401 non-null   bool   
 6   coordonnees_lon  4253 non-null   float64
 7   coordonnees_lat  4253 non-null   float64
dtypes: bool(1), float64(3), object(4)
memory usage: 245.1+ KB


In [13]:
data_arrets.isnull().sum()

arretcodelong        0
nomarret            19
commune             75
pays                 0
codedidoc          655
actif                0
coordonnees_lon    148
coordonnees_lat    148
dtype: int64

In [14]:
data_arrets_actif = data_arrets[data_arrets.actif == True]

coordonnees_centre = [data_arrets_actif.coordonnees_lat.mean(), data_arrets_actif.coordonnees_lon.mean()]

m = folium.Map(location=coordonnees_centre, zoom_start=12) # initialize map centered on the mean of coordinates
marker_cluster = MarkerCluster().add_to(m) # add clustering

for i in range(len(data_arrets_actif)):
    lon = data_arrets_actif.iloc[i]['coordonnees_lon']
    lat = data_arrets_actif.iloc[i]['coordonnees_lat']
    
    if not np.isnan(lon) and not np.isnan(lat):
        popup_html = "<b>Nom:</b> {}<br>".format(data_arrets_actif.iloc[i]['nomarret'])
        popup_html += "<b>Code Arret:</b> {}<br>".format(data_arrets_actif.iloc[i]['arretcodelong'])
        
        folium.Marker(
            location=[lat, lon],
            tooltip=data_arrets_actif.iloc[i]['nomarret'],
            popup=folium.Popup(popup_html, max_width=300)
        ).add_to(marker_cluster)

m

In [15]:
data_arrets_actif = data_arrets[data_arrets.actif == True]

coordonnees_centre = [data_arrets_actif.coordonnees_lat.mean(), data_arrets_actif.coordonnees_lon.mean()]

m = folium.Map(location=coordonnees_centre, zoom_start=12)
marker_cluster = MarkerCluster().add_to(m)

for i in range(len(data_arrets_actif)):
    lon = data_arrets_actif.iloc[i]['coordonnees_lon']
    lat = data_arrets_actif.iloc[i]['coordonnees_lat']
    
    if not np.isnan(lon) and not np.isnan(lat):
        popup_html = "<b>Nom :</b> {}<br>".format(data_arrets_actif.iloc[i]['nomarret'])
        popup_html += "<b>Commune :</b> {}<br>".format(data_arrets_actif.iloc[i]['commune'])
        popup_html += "<b>Pays :</b> {}<br>".format(data_arrets_actif.iloc[i]['pays'])
        popup_html += "<b>Code Arret :</b> {}<br>".format(data_arrets_actif.iloc[i]['arretcodelong'])
        
        if data_arrets_actif.iloc[i]['pays'] == 'CH':
            marker_color = 'red'
        else:
            marker_color = 'blue'
        
        folium.Marker(
            location=[lat, lon],
            tooltip=data_arrets_actif.iloc[i]['nomarret'],
            icon=folium.Icon(color=marker_color, icon="bus", prefix="fa"),
            popup=folium.Popup(popup_html, max_width=300)
        ).add_to(marker_cluster)

m