### Rome2Rio 

#### Importing packages that are neccessary

In [282]:
import pandas as pd
from pymongo import MongoClient
import requests as req
import json
from itertools import permutations
import random
import time
import json

#### All the utility functions are defined below.
* They can connect to any DB , given that the URI string is given (Including authentication).
* Return a MongoDB collection given a pymongo database object and a string mentioning the collection name.
* Pluck - From an array of objects return a list containing the elements of a certain attribute present in the given array.

In [283]:
#Defining utility functions over here

# Connect to a database.
def connectToDb(uri, dbName):
    client = MongoClient(uri)
    db = client[dbName]
    return db

#Returns a specific collection.
def getACollection(db, collectionName):
    return db[collectionName]

# Python Implementation of Underscore - Utiltiy Functions
def pluck(array, property):
    return [x[property] for x in array]

#### Rome2Rio Main API call.
To view the rome2rio search api documentation, [click here](https://www.rome2rio.com/documentation/1-4/search/).  
When the api was tested out, we were getting 401 authentication error for few api calls. Not sure as to why though. Sometimes responses comes at the first go, sometimes they don't. That is why we have a retry mechanism with a upper retry count of 50 (Too large a limit. Reduce to 10 or less, if you feel the api fetches responses within few tries.)


In [343]:
# Rome2Rio - api call and parsing functions

#Get a rome2Rio route and return it as a json
def callRome2Rio(oName, dName, oPosLat, oPosLong, dPosLat, dPosLong):
    orgCo = str(oPosLat)+","+str(oPosLong)
    destCo = str(dPosLat)+","+str(dPosLong)
    url = 'http://free.rome2rio.com/api/1.4/json/Search?key=IRlABFW8&oName='+oName+'&dName='+dName+'&oPos='+orgCo+'&dPos='+destCo
    print("The url is ", url)
    retryCount = 0
    re = req.get(url)
    data = {}
    stopProcess = False
    if re.status_code==200:
        data = re.json()
    else:
        print("Something went wrong. The status we got is ", re.status_code)
        retryPass = False
        while retryPass==False and retryCount < 100:
            retryCount+=1
            print("Trying for the ",retryCount," time")
            re = req.get(url)
            if(re.status_code==200):
                retryPass = True
                data = re.json()
            if(re.status_code==444):
                retryPass=True
                print("Wrong destination name");
                data = {}
            if(re.status_code==429):
                retryPass=True
                print("Too-Many requests per hour")
                stopProcess = True
            if(re.status_code==402):
                retryPass=True
                print("Payment Required")
                stopProcess = True
    print("Got data in ",retryCount," retry/retries")  
    return data, stopProcess


#### Rome2Rio main parsing function and its corresponding helper functions.

In [332]:
#Main Parsing function call. 
def parse_rome2rio(data, fromCity, toCity,stagingDb): 
    """ This is the main rome2rio parsing functions.
    Inputs: data -> response from rome2rio which needs to be parsed
            fromCity -> City object from pyt database whose name is the fromCity to rome2rio api call.
            toCity -> City object from pyt database whose name is the toCity to rome2rio api call.
    Outputs: parsedJson -> This is the parsed data 
             notParseblePreferredRouteCount -> This is the count of routes which cannot be parsed due to missing data etc.
    """
    routes = data["routes"]
    vehicles = data["vehicles"]
    places = data["places"]
    airlines = data["airlines"]
    #Zeroth index is always the preferred route w.r.t combination of distance, time and price. Store the rest as alternative routes.
    routeFormed = False
    indexToLook = 0
    preferredRoute = {}
    notParseblePreferredRouteCount = 0
    totalDurationToNotExceed = int(routes[0]["totalDuration"]) * 1.5
    while routeFormed!=True:
        if routes[indexToLook]["totalDuration"] < totalDurationToNotExceed:
            preferredRoute, routeFormed = formRoute(routes[indexToLook], vehicles, places, airlines, fromCity, toCity)
            if routeFormed!=True:
                if indexToLook < len(routes):
                    indexToLook+=1
                    notParseblePreferredRouteCount+=1
                else:
                    print("No route has an indicative price for ", fromCity["name"], " and ", toCity["name"], "route")
                    notParseblePreferredRouteCount+=1
        else:
            #Try to get the preferredRoute from city connection database.
            preferredRoute, routeFormed = getExistingCityConnection(fromCity, toCity, stagingDb)
            if routeFormed!=True:
                print("We can't form a viable route for", fromCity["name"], " and ", toCity["name"])
                routeFormed=True
    alternateRoutes = []
    for route in routes[1:]:
        routeJson, routeFormed = formRoute(route, vehicles, places, airlines, fromCity, toCity)
        alternateRoutes.append(routeJson)
    parsedJson = {
        "fromCity": fromCity["planningid"],
        "toCity": toCity["planningid"],
        "preferredRoute": preferredRoute,
        "alternateRoutes": alternateRoutes,
        "timestamp": time.time()
    }
    # Need to compute cost function for each preferred route
    return parsedJson, notParseblePreferredRouteCount
#--------------------------------------------------------------------------------------------------------------------#
## Forming route 
def formRoute(route, vehicles, places, airlines, fromCity, toCity):
    """ This a part of parsing function where i take an individual route option and form my desired JSON structure.
    Inputs: route - Route for which i need to parse into my desired structure. (This can be a preferred route or an alternate route. Both have same structure)
            vehicles - Array that contains all vehicle types
            places - Array that contains all places within the fromCity and toCity that a route can traverse. This is for all routes
            airlines - Array that contains all airlines that ply within the 2 cities.
    Outputs: routeJson - My desired JSON Structure
             routeFormed - Boolean that returns True if desired structure is formed and False if not.
    """
    routeFormed = False
    allSegments = route["segments"]
    try:
        routePrice = getPrice(route["indicativePrices"], route["name"])
    except KeyError:
        routeFormed = False
        return {}, routeFormed
    preferredMode,flights, trains, bus, car, transfers = parseSegment(route["name"], allSegments, vehicles, places, airlines)
    routeJson = {
        "title": route["name"],
        "fromCity": fromCity["planningid"],
        "toCity": toCity["planningid"],
        "totalDuration": route["totalDuration"],
        "transitDuration": route["totalTransitDuration"],
        "transferDuration": route["totalTransferDuration"],
        "allPrice": route["indicativePrices"],
        "price": routePrice,
        "currencyCode": route["indicativePrices"][0]["currency"],
        "preferredMode": list(set(preferredMode)),
        "flights": flights,
        "trains": trains,
        "bus": bus,
        "car": car,
        "transfers": transfers
    }
    routeFormed = True
    return routeJson, routeFormed
#---------------------------------------------------------------------------------------------------------------------#
#Parsing Segment.
def parseSegment(routeName, allSegments, vehicles, places, airlines):
    """ Given all segments within a route, parse it into my desired JSON Structure.
    Inputs: routeName -> String, that tells me as to the nature of mode of transport (Fly to some place, Train, rideshare etc..)
            allSegments -> Segment array that i need to parse
            vehicles - Array that contains all vehicle types
            places - Array that contains all places within the fromCity and toCity that a route can traverse. This is for all routes
            airlines - Array that contains all airlines that ply within the 2 cities.
    Outputs: preferredMode: contains an array of preferred mode of travel (that covers majority of the distance)
             flights: contains an array of flights in my desired format present within the route, empty if there isn't
             trains:  contains an array of trains in my desired format present within the route, empty if there isn't.
             bus: contains an array of bus in my desired format present within the route, empty if there isn't.
             car: contains an array of flights in my desired format present within the route, empty if there isn't.
             transfers: contains an array of transfers in my desired format present within the route, empty if there isn't(Transfers cover for small distances and it can be in BUS, CAR or TRAIN)
    """
    flights= []
    trains=[]
    bus=[]
    cars=[]
    transfers=[]
    preferredMode=[]
    isAirSegment=False
    car_types = ["rideshare", "car", "shuttle", "taxi", "towncar"]
    for segment in allSegments:
        depPlaceKeys = list(places[segment["depPlace"]])
        arrPlaceKeys = list(places[segment["arrPlace"]])
        segmentKeys = list(segment.keys())
        if segment["segmentKind"] == "air":
            #This has flight data.
            preferredMode.append("flight")
            for flightOption in segment["outbound"]:
                assert places[segment["arrPlace"]]["kind"]=="airport"   
                flight = {
                    "vehicleType": "FLIGHT",
                    "depCountryCode": places[segment["depPlace"]]["countryCode"],
                    "arrCountryCode": places[segment["arrPlace"]]["countryCode"],
                    "noOfStops": len(flightOption["hops"])-1,
                    "operatingDays": flightOption["operatingDays"],
                    "indicativePrice": flightOption["indicativePrices"][0]["price"],
                    "indicativeMaxPrice": flightOption["indicativePrices"][0]["priceHigh"],
                    "indicativeMinPrice": flightOption["indicativePrices"][0]["priceLow"],
                    "currencyCode": flightOption["indicativePrices"][0]["currency"],
                    "distance": segment["distance"],
                    "transitDuration": segment["transitDuration"],
                    "transferDuration": segment["transferDuration"],
                    "totalDuration": segment["transitDuration"] + segment["transferDuration"]
                }
                if "code" in depPlaceKeys:
                    flight["depAirportCode"] = places[segment["depPlace"]]["code"]
                if "code" in arrPlaceKeys:
                    flight["arrAirportCode"] = places[segment["arrPlace"]]["code"]
                flights.append(flight)
        else:
            #This includes surface data (either train, bus, car. also check for comma as it indicates multiple modes of transport)       
            if  vehicles[segment["vehicle"]]["kind"]=="bus":
                busSegment = {
                    "vehicleType": "BUS", 
                    "depPlaceCountryCode": places[segment["depPlace"]]["countryCode"],
                    "depPlaceTitle": places[segment["depPlace"]]["shortName"],    
                    "arrPlaceCountryCode": places[segment["arrPlace"]]["countryCode"],
                    "arrPlaceTitle": places[segment["arrPlace"]]["shortName"],
                    "distance": segment["distance"],  
                    "transitDuration": segment["transitDuration"],
                    "transferDuration": segment["transferDuration"],
                    "totalDuration": segment["transitDuration"] + segment["transferDuration"]
                    }
                if "code" in depPlaceKeys:
                    busSegment["depPlaceCode"] = places[segment["depPlace"]]["code"]
                if "code" in arrPlaceKeys:
                    busSegment["arrPlaceCode"] = places[segment["arrPlace"]]["code"]
                if "indicativePrices" in segmentKeys:
                    busSegment["allPrices"] = segment["indicativePrices"]
                    busSegment["indicativePrice"] = segment["indicativePrices"][0]["price"]
                    busSegment["currencyCode"] = segment["indicativePrices"][0]["currency"]
                #It can be a primary mode of transport or transfer.
                if "bus" in routeName.lower():
                    preferredMode.append("bus")
                    bus.append(busSegment)
                else:
                    transfers.append(busSegment)
                    
            if vehicles[segment["vehicle"]]["kind"] == "train":
                trainSegment = {
                        "vehicleType": "TRAIN",
                        "depPlaceCountryCode": places[segment["depPlace"]]["countryCode"],
                        "depPlaceTitle": places[segment["depPlace"]]["shortName"],
                        "arrPlaceCountryCode": places[segment["arrPlace"]]["countryCode"],
                        "arrPlaceTitle": places[segment["arrPlace"]]["shortName"],
                        "distance": segment["distance"],
                        "transitDuration": segment["transitDuration"],
                        "transferDuration": segment["transferDuration"],
                        "totalDuration": segment["transitDuration"] + segment["transferDuration"]
                    }
                if vehicles[segment["vehicle"]]["name"]!="RER" and "indicativePrices" in segmentKeys:
                    if "priceHigh" in list(segment["indicativePrices"][0].keys()):
                        trainSegment["indicativeMaxPrice"] = segment["indicativePrices"][0]["priceHigh"]
                    if "priceLow" in list(segment["indicativePrices"][0].keys()):
                        trainSegment["indicativeMinPrice"] =  segment["indicativePrices"][0]["priceLow"]
                if "code" in depPlaceKeys:
                    trainSegment["depPlaceCode"] = places[segment["depPlace"]]["code"]
                if "code" in arrPlaceKeys:
                    trainSegment["arrPlaceCode"] = places[segment["arrPlace"]]["code"]
                if "indicativePrices" in segmentKeys:
                    trainSegment["allPrices"] = segment["indicativePrices"]
                    trainSegment["indicativePrice"] = segment["indicativePrices"][0]["price"]
                    trainSegment["currencyCode"] = segment["indicativePrices"][0]["currency"]
                if "train" in routeName.lower():
                    preferredMode.append("train")
                    trains.append(trainSegment)
                else:
                    transfers.append(trainSegment)
            if vehicles[segment["vehicle"]]["kind"] == "car":
                carSegment = {
                        "vehicleType": "CAR",
                        "depPlaceTitle": places[segment["depPlace"]]["shortName"],
                        "arrPlaceTitle": places[segment["arrPlace"]]["shortName"],
                        "distance": segment["distance"],
                        "transitDuration": segment["transitDuration"],
                        "transferDuration": segment["transferDuration"],
                        "totalDuration": segment["transitDuration"] + segment["transferDuration"]
                }
                if "regionCode" in  depPlaceKeys:
                    carSegment["depPlaceCode"] = places[segment["depPlace"]]["regionCode"]
                if "regionCode" in arrPlaceKeys:
                    carSegment["arrPlaceCode"] = places[segment["arrPlace"]]["regionCode"]
                if "indicativePrices" in segmentKeys:
                    carSegment["allPrices"] = segment["indicativePrices"]
                    carSegment["currencyCode"] = segment["indicativePrices"][0]["currency"]
                if "countryCode" in depPlaceKeys:
                    carSegment["depPlaceCountryCode"] = places[segment["depPlace"]]["countryCode"]
                if "countryCode" in arrPlaceKeys:
                    carSegment["arrPlaceCountryCode"] = places[segment["arrPlace"]]["countryCode"]
                if "drive" in routeName.lower():
                    preferredMode.append("car")
                    cars.append(carSegment)
                else:
                    transfers.append(carSegment)
    return preferredMode, flights, trains, bus, cars, transfers
#-------------------------------------------------------------------------------------------------------------------#
#Parsing price
def getPrice(indicativePrice, routeName):
    """ Returns price object if there is a median, max and min price available , else returns indicativePrice as it is.
        There is no max or min price if the mode of transport is one of the car_types.
        Inputs: indicativePrice -> Array that contains an indicative price for the route.
                routeName -> Used to figure out the mode of transport. 
        Output: A curated JSON containing the indicative price or the input indicativePrice array.
    
    """
    car_types = ["rideshare", "car", "shuttle", "taxi", "towncar", "drive"]
    if routeName.lower() in car_types:
        return indicativePrice
    else:
        return {
            "indicativeMedianPrice": indicativePrice[0]["price"],
            "indicativeMaxPrice": indicativePrice[0]["priceHigh"],
            "indicativeMinPrice": indicativePrice[0]["priceLow"],
            "currencyCode": indicativePrice[0]["currency"]
        }

In [325]:
def getExistingCityConnection(fromCity, toCity, db):
    cityConn = getACollection(db, 'city_connection')
    connection = cityConn.find_one({"fromCity": fromCity["planningid"], "toCity": toCity["planningid"]})
    print(connection)
    if connection != None and "_id" in list(connection.keys()):
        return connection, True
    else:
        return {}, False
    

In [351]:
def getAllEuropeanCities(db):
    """Returns all European cities present in the database."""
    region = getACollection(db, 'searchregion')
    europeanCountries = region.find_one({"regionCode": "eur"}, {"countryIds": 1})["countryIds"]
    country = getACollection(db, 'country')
    europeanCountriesData = country.find({"countryId": {"$in": europeanCountries}})
    countryCodes = []
    for country in europeanCountriesData:
        countryCodes.append(country["countryCode"])
    city = getACollection(db, 'city')
    europeanCities = city.find({"countryCode": {"$in": countryCodes}})
    return europeanCities


In [367]:
local = connectToDb("mongodb://oceanjar:wwmib3112@localhost:27017/localDb?authMechanism=SCRAM-SHA-1", "localDb")

## Rome2Rio Execution Starts here

In [370]:
db=connectToDb("mongodb://oceanjardb:oceanjardbwwmib3112#@35.154.159.75:27017/oceanjar?authMechanism=MONGODB-CR", "oceanjar")
europeanCities = getAllEuropeanCities(db)
europeanCitiesMap = {}
routeNotPresentCities = []
for city in europeanCities:
    europeanCitiesMap[city["planningid"]] = city
    isRoutePresent = checkIfRoutePresent(city, db)
    if isRoutePresent!=True:
        routeNotPresentCities.append(city)
defaultResponseTemplates=[]
for city1 in routeNotPresentCities and len(routeNotPresentCities) > 0:
    for city2 in list(europeanCitiesMap.keys()):
        if city1!=city2:
            responseTemplate1 = {
                "fromCity": europeanCitiesMap[city1],
                "toCity": europeanCitiesMap[city2],
                "response": {}
            }
            responseTemplate2 = {
                "fromCity": europeanCitiesMap[city2],
                "toCity": europeanCitiesMap[city1],
                "response": {}
            }
            defaultResponseTemplates.append(responseTemplate1)
            defaultResponseTemplates.append(responseTemplate2)
print("length", len(defaultResponseTemplates))
#write_to_db(db, defaultResponseTemplates)



length 0


In [369]:
def checkIfRoutePresent(city, db):
    rome2rio = getACollection(db, 'rome2rioResponses')
    route = rome2rio.find_one({"fromCity.planningid": city["planningid"]})
    if route!=None and "routes" in "routes" not in list(route["response"].keys()):
        return True
    else:
        return False

In [288]:
#Testing out for 10 cities.
sample_cities = []
all_keys = list(europeanCitiesMap.keys())
for i in range(1,11):
    sample_cities.append(random.choice(all_keys))


In [333]:
def write_to_db(db, arr):
    r2r = getACollection(db, 'rome2rioResponses')
    result = r2r.insert_many(arr)
    try:
        assert len(result.inserted_ids) == len(arr)
    except AssertionError:
        print("There is a mis-match in the number of documents inserted", len(result.inserted_ids), len(arr))
    return None

In [340]:
total_count = 0
start_time = time.time()
total_api_call_time = 0
parsedResponses = []
for city1 in list(europeanCitiesMap.keys())[0:5]:
    for city2 in list(europeanCitiesMap.keys())[0:5]:
        if city1!=city2:
            total_count+=1
            originCity = europeanCitiesMap[city1]
            destCity = europeanCitiesMap[city2]
            apiStTime = time.time()
            r2rResponse = callRome2Rio(originCity["name"], destCity["name"], originCity["latitude"], originCity["longitude"], destCity["latitude"], destCity["longitude"])
            r2rResponse["fromCityId"] = originCity["planningid"]
            r2rResponse["toCityId"] = destCity["planningid"]
            apiTime = time.time() - apiStTime
            total_api_call_time+=apiTime
            parsedResponses.append(r2rResponse)
            time.sleep(2)
elapsed_time = time.time() - start_time
print("----Total Count is ", total_count, "it is completed in ", elapsed_time ," seconds")
print("-------Writing to database------------")
write_to_db(local, parsedResponses)


The url is  http://free.rome2rio.com/api/1.4/json/Search?key=V67gPjlQ&oName=Padova&dName=Pisa&oPos=45.4064826965332,11.882489204406738&dPos=43.72283935546875,10.401689529418945
Something went wrong. The status we got is  401
Trying for the  1  time
Trying for the  2  time
Trying for the  3  time
Trying for the  4  time
Trying for the  5  time
Trying for the  6  time
Trying for the  7  time
Trying for the  8  time
Trying for the  9  time
Trying for the  10  time
Trying for the  11  time
Trying for the  12  time
Trying for the  13  time
Trying for the  14  time
Trying for the  15  time
Trying for the  16  time
Trying for the  17  time
Trying for the  18  time
Trying for the  19  time
Trying for the  20  time
Trying for the  21  time
Trying for the  22  time
Trying for the  23  time
Trying for the  24  time
Trying for the  25  time
Trying for the  26  time
Trying for the  27  time


KeyboardInterrupt: 

In [327]:
getExistingCityConnection({"planningid": 4}, {"planningid": 14}, db)

{'_id': ObjectId('58981ddecd9e4fa4a48b3bcf'), 'fromCity': 4.0, 'toCity': 14.0, 'directConnection': {'defaultConnection': {'mode': 'FLIGHT', 'travelTime': 85.0, 'transferType': 'SHARED', 'availability': 'MORNING', 'lcc': True, 'stops': 0.0, 'slot': 'MORNING_NOON'}, 'selfDrive': False, 'geographyFactor': 1.0}, 'geographyFactor': 1.0}


({'_id': ObjectId('58981ddecd9e4fa4a48b3bcf'),
  'directConnection': {'defaultConnection': {'availability': 'MORNING',
    'lcc': True,
    'mode': 'FLIGHT',
    'slot': 'MORNING_NOON',
    'stops': 0.0,
    'transferType': 'SHARED',
    'travelTime': 85.0},
   'geographyFactor': 1.0,
   'selfDrive': False},
  'fromCity': 4.0,
  'geographyFactor': 1.0,
  'toCity': 14.0},
 True)

In [317]:
setResponse = []
for resp in parsedResponses:
    resp["preferredRoute"]["preferredMode"] = list(set(resp["preferredRoute"]["preferredMode"]))
    for alt in resp["alternateRoutes"]:
        alt["preferredMode"] = list(set(alt["preferredMode"]))
    setResponse.append(resp)

KeyError: 'preferredMode'

In [299]:
print("API time", total_api_call_time, "Parsing time ", total_parsing_time)

API time 342.87971591949463 Parsing time  0.05852317810058594


In [None]:
print()

In [None]:
# Problems to be solved 
# 1. Identify whether a train/bus/car segment is a transfer if the preferred route is also the same.
# 2. How to reduce API time. -> (parallelization is not supported by rome2rio. Do we need to explore more on this option ?)
# 3. Sho


In [361]:
defaultResponseTemplates=[]
count=0
for city1 in list(europeanCitiesMap.keys()):
    for city2 in list(europeanCitiesMap.keys()):
        if city1!=city2:
            count+=1
            responseTemplate = {
                "fromCity": europeanCitiesMap[city1],
                "toCity": europeanCitiesMap[city2],
                "response": {}
            }
            defaultResponseTemplates.append(responseTemplate)

In [362]:
print(len(defaultResponseTemplates))

28730


In [363]:
write_to_db(local, defaultResponseTemplates)

In [360]:
print(len(list(europeanCitiesMap.keys())))

170


In [364]:
for i in range(1, 10):
    print(i)
    if i==4:
        break

1
2
3
4


In [365]:
import logging

In [371]:
r2r = getACollection(local, 'rome2rioResponses')
r2rAll = r2r.find()
data = []
for r in r2rAll:
    data.append(r)
write_to_db(db, data)