In [353]:
import requests
import pandas as pd
import json
import math
from datetime import datetime as dt

pd.set_option('display.max_columns', 999)
pd.set_option('display.width', 1000)


In [231]:
import copy

def pandas_explode(df, column_to_explode):
    """
    Similar to Hive's EXPLODE function, take a column with iterable elements, and flatten the iterable to one element 
    per observation in the output table

    :param df: A dataframe to explod
    :type df: pandas.DataFrame
    :param column_to_explode: 
    :type column_to_explode: str
    :return: An exploded data frame
    :rtype: pandas.DataFrame
    """

    # Create a list of new observations
    new_observations = list()

    # Iterate through existing observations
    for row in df.to_dict(orient='records'):

        # Take out the exploding iterable
        explode_values = row[column_to_explode]
        del row[column_to_explode]

        # Create a new observation for every entry in the exploding iterable & add all of the other columns
        for explode_value in explode_values:

            # Deep copy existing observation
            new_observation = copy.deepcopy(row)

            # Add one (newly flattened) value from exploding iterable
            new_observation[column_to_explode] = explode_value

            # Add to the list of new observations
            new_observations.append(new_observation)

    # Create a DataFrame
    return_df = pd.DataFrame(new_observations)

    # Return
    return return_df


In [366]:
# Get all bus stations available for OuiBus / Needs to be updated regularly
def update_stop_list():
    headers = {
    'Authorization': 'Token rvZD7TlqePBokwl0T02Onw',
    }
    # Get v1 stops (all actual stops)
    response = requests.get('https://api.idbus.com/v1/stops', headers=headers)
    stops_df_v1 = pd.DataFrame.from_dict(response.json()['stops'])
    # Get v2 stops (with meta_station like "Paris - All stations")    
    response = requests.get('https://api.idbus.com/v2/stops', headers=headers)
    stops_df_v2 = pd.DataFrame.from_dict(response.json()['stops'])
    
    #Enrich stops list with meta gare infos 
    stops_rich = pandas_explode(stops_df_v2[['id','stops']], 'stops')
    stops_rich['stops'] = stops_rich.apply(lambda x: x.stops['id'], axis=1)
    stops_rich = stops_df_v1.merge(stops_rich, how= 'left', left_on = 'id', right_on = 'stops',
                            suffixes=('', '_meta_gare'))
    # If no meta gare, the id is used
    stops_rich['id_meta_gare'] = stops_rich.id_meta_gare.combine_first(stops_rich.id)
    stops_rich['geoloc'] = stops_rich.apply(lambda x: [x.latitude,x.longitude], axis=1)
    print(f'{stops_rich.shape[0]} Ouibus stops were found, here is an example:\n {stops_rich.sample()}')
    return stops_rich

stops_rich = update_stop_list()

736 Ouibus stops were found, here is an example:
     _carrier_id destinations_ids   id   latitude                   long_name  longitude                  short_name               short_name_de               short_name_en               short_name_es               short_name_fr               short_name_it               short_name_nl      time_zone  id_meta_gare  stops                  geoloc
297         FDA               []  346  41.813917  Feces de Abaixo - Portugal  -7.413791  Feces de Abaixo - Portugal  Feces de Abaixo - Portugal  Feces de Abaixo - Portugal  Feces de Abaixo - Portugal  Feces de Abaixo - Portugal  Feces de Abaixo - Portugal  Feces de Abaixo - Portugal  Europe/Lisbon         346.0    NaN  [41.813917, -7.413791]


In [360]:
stops_rich.head()

Unnamed: 0,_carrier_id,destinations_ids,id,latitude,long_name,longitude,short_name,short_name_de,short_name_en,short_name_es,short_name_fr,short_name_it,short_name_nl,time_zone,id_meta_gare,stops
0,XPB,"[3, 4, 5, 7, 9, 10, 11, 12, 13, 14, 15, 16, 18...",1,48.835687,Paris Centre - Bercy-Seine,2.380161,Paris Centre - Bercy-Seine,Paris Bercy-Seine (Zentrum),Paris City Centre - Bercy Seine,Paris Bercy-Seine (centro de la ciudad),Paris Centre - Bercy-Seine,Parigi Bercy-Seine (centro città),Parijs Bercy-Seine (stadscentrum),Europe/Paris,90.0,1.0
1,CDG,"[5, 7, 9, 11, 12, 15, 16, 18, 19, 20, 21, 22, ...",2,49.010894,Paris - Gare routière Aéroport Roissy CDG,2.558934,Paris - Gare routière Aéroport Roissy CDG,Paris - Flughafen Charles de Gaulle,Paris - Roissy Charles De Gaulle Airport,Paris - Aeropuerto Charles de Gaulle,Paris - Gare routière Aéroport Roissy CDG,Parigi - Aeroporto Charles de Gaulle,Parijs - Luchthaven Charles de Gaulle,Europe/Paris,90.0,2.0
2,GIA,"[13, 16, 18, 21, 29, 34, 35, 41, 76, 95, 97, 1...",3,41.978969,Gérone,2.817311,Gérone,Girona,Girona,Girona,Gérone,Girona,Gerona,Europe/Madrid,3.0,
3,GOA,"[1, 5, 7, 10, 13, 14, 15, 16, 18, 34, 35, 52, ...",4,44.416356,Gênes - Via Fanti d'Italia,8.919053,Gênes - Via Fanti d'Italia,Genua - Via Fanti d'Italia,Genoa - Via Fanti d'Italia,Génova - Via Fanti d'Italia,Gênes - Via Fanti d'Italia,Genova - Via Fanti d'Italia,Genoa - Via Fanti d'Italia,Europe/Rome,4.0,
4,NCE,"[1, 2, 4, 7, 9, 10, 11, 12, 13, 14, 15, 16, 18...",5,43.66484,Nice - Aéroport,7.20958,Nice - Aéroport,Nizza - Flughafen,Nice - Airport,Niza - Aeropuerto,Nice - Aéroport,Nizza - Aeroporto,Nice - Luchthaven,Europe/Paris,210.0,5.0


In [326]:
# Fonction to call Ouibus API
def search_for_all_fares(date, origin_id, destination_id, passengers):
    headers = {
    'Authorization': 'Token rvZD7TlqePBokwl0T02Onw',
    'Content-Type': 'application/json',
    }
    data = {
            "origin_id": origin_id,
            "destination_id": destination_id,
            "date": date,
            "passengers": passengers
          }
    timestamp = dt.now()
    r = requests.post('https://api.idbus.com/v1/search', headers=headers, data=json.dumps(data))
    print(dt.now()-timestamp)
    try: 
        return pd.DataFrame.from_dict(r.json()['trips'])
    except :
        return None
    

In [275]:
# Find the stops close to a geo point
def get_stops_from_geo_loc(lat, long, max_distance_km = 55):
    stops_tmp = stops_rich.copy()
    # compute proxi for distance (since we only need to compare no need to take the earth curve into account...)
    stops_tmp['distance_proxi'] = stops_tmp.apply(lambda x: math.sqrt(((x.latitude - lat)**2)+((x.longitude - long)**2)), axis =1)
    # We get alll station within approx 55 km (<=> 0.5 of distance proxi)
    ratio_distance = max_distance_km/55
    return stops_tmp[stops_tmp.distance_proxi < 0.5*ratio_distance].sort_values(by='distance_proxi')
    

In [362]:

def compute_trips(date, passengers, lat_origin, long_origin, lat_destination, long_destination):
    """
    Meta Fonction takes a geopoint for departure and arrival, 
       1 finds Ouibus status close from departure and arrival
       2 Call API for all meta station of departure and arrival
       3 Returns all available trips
    
    """  
    # Get all stops close to the origin and destination locations
    origin_stops = get_stops_from_geo_loc(lat_origin, long_origin)
    destination_stops = get_stops_from_geo_loc(lat_destination, long_destination)
    # Get the meta gare ids to reduce number of request to API
    origin_meta_gare_ids = origin_stops.id_meta_gare.unique()
    destination_meta_gare_ids = destination_stops.id_meta_gare.unique()
    # Call API for all scenarios
    all_trips = pd.DataFrame()
    for origin_meta_gare_id in origin_meta_gare_ids:
        for destination_meta_gare_id in destination_meta_gare_ids:
            print(f'from {origin_meta_gare_id} to {destination_meta_gare_id}')
            if (origin_meta_gare_id != destination_meta_gare_id):
                all_trips = all_trips.append(search_for_all_fares(date, origin_meta_gare_id, destination_meta_gare_id, passengers))
    
    # Enrich with stops info 
    all_trips = all_trips.merge(stops_rich[['id','geoloc', 'short_name']],
                               left_on = 'origin_id' , right_on = 'id', suffixes = ['','_origin'])
    all_trips = all_trips.merge(stops_rich[['id','geoloc', 'short_name']],
                               left_on = 'destination_id' , right_on = 'id', suffixes = ['','_destination'])
    return all_trips[all_trips.available]

    

In [363]:
# Test meta function 
timeu=dt.now()
tmp = compute_trips('2019-10-09', [{  "id": 1,  "age": 30,  "price_currency": "EUR"}], 49.0, 2.0, 44.8, -0.5)
print(dt.now()-timeu)


from 341.0 to 134.0
0:00:00.090319
from 793.0 to 134.0
0:00:00.077598
from 90.0 to 134.0
0:00:00.525214
from 216.0 to 134.0
0:00:00.066909
from 618.0 to 134.0
0:00:00.072370
from 173.0 to 134.0
0:00:00.075080
0:00:01.027543


In [364]:
# View results
tmp

Unnamed: 0,arrival,available,departure,destination_id,id,legs,origin_id,passengers,price_cents,price_currency,id_origin,geoloc,short_name,id_destinatio,geoloc_destinatio,short_name_destinatio
0,2019-10-09T16:45:00.000+02:00,True,2019-10-09T07:20:00.000+02:00,29,47562419,"[{'origin_id': 1, 'destination_id': 29, 'depar...",1,"[{'id': 1, 'fare_name': 'Adult', 'fare_descrip...",1499,EUR,1,"[48.83568689, 2.380160747]",Paris Centre - Bercy-Seine,29,"[44.82987049, -0.555302468]",Bordeaux
1,2019-10-09T20:50:00.000+02:00,True,2019-10-09T11:00:00.000+02:00,29,47562421,"[{'origin_id': 1, 'destination_id': 29, 'depar...",1,"[{'id': 1, 'fare_name': 'Adult', 'fare_descrip...",1499,EUR,1,"[48.83568689, 2.380160747]",Paris Centre - Bercy-Seine,29,"[44.82987049, -0.555302468]",Bordeaux
2,2019-10-09T21:55:00.000+02:00,True,2019-10-09T14:00:00.000+02:00,29,54472155,"[{'origin_id': 1, 'destination_id': 29, 'depar...",1,"[{'id': 1, 'fare_name': 'Adult', 'fare_descrip...",1999,EUR,1,"[48.83568689, 2.380160747]",Paris Centre - Bercy-Seine,29,"[44.82987049, -0.555302468]",Bordeaux
3,2019-10-10T05:30:00.000+02:00,True,2019-10-09T21:00:00.000+02:00,29,47562425,"[{'origin_id': 1, 'destination_id': 29, 'depar...",1,"[{'id': 1, 'fare_name': 'Adult', 'fare_descrip...",1499,EUR,1,"[48.83568689, 2.380160747]",Paris Centre - Bercy-Seine,29,"[44.82987049, -0.555302468]",Bordeaux
4,2019-10-10T07:40:00.000+02:00,True,2019-10-09T23:00:00.000+02:00,29,47562431,"[{'origin_id': 1, 'destination_id': 29, 'depar...",1,"[{'id': 1, 'fare_name': 'Adult', 'fare_descrip...",1499,EUR,1,"[48.83568689, 2.380160747]",Paris Centre - Bercy-Seine,29,"[44.82987049, -0.555302468]",Bordeaux
5,2019-10-09T20:50:00.000+02:00,True,2019-10-09T11:45:00.000+02:00,29,47576983,"[{'origin_id': 52, 'destination_id': 29, 'depa...",52,"[{'id': 1, 'fare_name': 'Adult', 'fare_descrip...",1799,EUR,52,"[48.7315682, 2.373561859]",Paris - Aéroport Orly - Parking P4C,29,"[44.82987049, -0.555302468]",Bordeaux
6,2019-10-10T07:40:00.000+02:00,True,2019-10-09T23:30:00.000+02:00,29,47576997,"[{'origin_id': 52, 'destination_id': 29, 'depa...",52,"[{'id': 1, 'fare_name': 'Adult', 'fare_descrip...",1799,EUR,52,"[48.7315682, 2.373561859]",Paris - Aéroport Orly - Parking P4C,29,"[44.82987049, -0.555302468]",Bordeaux
7,2019-10-10T05:30:00.000+02:00,True,2019-10-09T21:40:00.000+02:00,29,47574713,"[{'origin_id': 488, 'destination_id': 29, 'dep...",488,"[{'id': 1, 'fare_name': 'Adult', 'fare_descrip...",1499,EUR,488,"[48.725759, 2.256828]",Paris Sud - Massy Palaiseau,29,"[44.82987049, -0.555302468]",Bordeaux
8,2019-10-10T07:40:00.000+02:00,True,2019-10-09T22:10:00.000+02:00,29,47558353,"[{'origin_id': 2, 'destination_id': 29, 'depar...",2,"[{'id': 1, 'fare_name': 'Adult', 'fare_descrip...",1799,EUR,2,"[49.0108938358, 2.5589343505]",Paris - Gare routière Aéroport Roissy CDG,29,"[44.82987049, -0.555302468]",Bordeaux
9,2019-10-09T17:45:00.000+02:00,True,2019-10-09T07:20:00.000+02:00,92,47562357,"[{'origin_id': 1, 'destination_id': 92, 'depar...",1,"[{'id': 1, 'fare_name': 'Adult', 'fare_descrip...",1499,EUR,1,"[48.83568689, 2.380160747]",Paris Centre - Bercy-Seine,92,"[44.804366, -0.632593]",Bordeaux Pessac
