# API QoS Estimation

In [1]:
#First we import the requested modules
import pandas as pd
from pandas.io.json import json_normalize
import json

import math
import numpy as np

import plotly.graph_objects as go
import plotly.io as pio
import plotly.express as px
pio.templates.default = 'plotly_white'
pd.set_option("display.precision", 3)

import datetime
from datetime import timedelta

from pandarallel import pandarallel
from joblib import Parallel, delayed
import multiprocessing
num_cores = multiprocessing.cpu_count()
pandarallel.initialize()

INFO: Pandarallel will run on 16 workers.
INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.


In [2]:
#Available colors
colors = [
    '#1f77b4',  # muted blue
    '#ff7f0e',  # safety orange
    '#2ca02c',  # cooked asparagus green
    '#d62728',  # brick red
    '#9467bd',  # muted purple
    '#8c564b',  # chestnut brown
    '#e377c2',  # raspberry yogurt pink
    '#7f7f7f',  # middle gray
    '#bcbd22',  # curry yellow-green
    '#17becf'   # blue-teal
]

In [3]:
# FUNCTIONS
def str_to_int(string) :
    final_val = 0
    for c in string:
        val = ord(c)
        final_val += val
    return final_val

In [4]:
f = '../Data/'

In [5]:
#Load line_stops_dict
with open(f+'Static/lines_dict.json', 'r') as file:
    lines_dict = json.load(file)

## Last week's data

In [6]:
#Read week df
week_df = pd.read_csv(f+'RealTime/buses_data_week_cleaned.csv',
    dtype={
        'line': 'uint16',
        'destination': 'str',
        'stop': 'str',
        'bus': 'str',
        'estimateArrive': 'uint16'
    }
)[['line','destination','stop','bus','datetime','estimateArrive']]

#Parse the dates
week_df['datetime'] = pd.to_datetime(week_df['datetime'], errors = 'coerce', format='%Y-%m-%dT%H:%M:%S.%f')

In [7]:
def add_direction (row) : 
    #Assign destination values
    dest2,dest1 = lines_dict[str(row.line)]['destinations']
    
    direction = 1 if row.destination == dest1 else 2
    return direction

#Add direction field to df
week_df['direction'] = week_df.apply(add_direction, axis=1)

In [8]:
week_df.head()

Unnamed: 0,line,destination,stop,bus,datetime,estimateArrive,direction
0,82,PITIS,1689,8322,2021-02-15 12:15:55.574581,1273,1
1,132,HOSPITAL LA PAZ,1689,4835,2021-02-15 12:15:55.574581,403,1
2,82,PITIS,1689,4708,2021-02-15 12:15:55.574581,443,1
3,132,HOSPITAL LA PAZ,1643,4832,2021-02-15 12:15:55.583123,586,1
4,132,HOSPITAL LA PAZ,1643,4831,2021-02-15 12:15:55.583123,179,1


In [9]:
week_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10247 entries, 0 to 10246
Data columns (total 7 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   line            10247 non-null  uint16        
 1   destination     10247 non-null  object        
 2   stop            10247 non-null  object        
 3   bus             10247 non-null  object        
 4   datetime        10247 non-null  datetime64[ns]
 5   estimateArrive  10247 non-null  uint16        
 6   direction       10247 non-null  int64         
dtypes: datetime64[ns](1), int64(1), object(3), uint16(2)
memory usage: 440.4+ KB


# Analysis of temporal series belonging to a bus
Analyze all the data corresponding to the different trips of a bus to a stop.
We pay attention to the last TH values of the series.

In [10]:
#Number of last ocurrences which form the series we are going to analyze for QoS
TH = 30

In [11]:
th_df = week_df.sort_values(by=['bus','stop','datetime'], ascending = True)
th_df = th_df.drop_duplicates(['bus','stop','datetime'], keep = 'last')
th_df = th_df[th_df.datetime > th_df.datetime.max() - timedelta(seconds=900)]
th_df.tail(5)

Unnamed: 0,line,destination,stop,bus,datetime,estimateArrive,direction
7641,1,CRISTO REY,743,8327,2021-02-15 12:25:57.013542,753,2
8265,1,CRISTO REY,743,8327,2021-02-15 12:26:46.832539,668,2
8853,1,CRISTO REY,743,8327,2021-02-15 12:27:36.603846,626,2
9441,1,CRISTO REY,743,8327,2021-02-15 12:28:26.822609,576,2
9994,1,CRISTO REY,743,8327,2021-02-15 12:29:16.834310,557,2


In [12]:
th_df[th_df.line == 132]

Unnamed: 0,line,destination,stop,bus,datetime,estimateArrive,direction
777,132,MONCLOA,1331,4707,2021-02-15 12:16:46.274293,892,2
1440,132,MONCLOA,1331,4707,2021-02-15 12:17:36.341468,842,2
1991,132,MONCLOA,1331,4707,2021-02-15 12:18:26.224565,704,2
2596,132,MONCLOA,1331,4707,2021-02-15 12:19:16.274094,654,2
3241,132,MONCLOA,1331,4707,2021-02-15 12:20:06.319124,604,2
...,...,...,...,...,...,...,...
9166,132,MONCLOA,3291,5560,2021-02-15 12:28:25.614645,508,2
9690,132,MONCLOA,3291,5560,2021-02-15 12:29:15.677410,460,2
8702,132,MONCLOA,93,5560,2021-02-15 12:27:36.099158,915,2
9297,132,MONCLOA,93,5560,2021-02-15 12:28:26.321905,865,2


In [13]:
def build_time_series_graph(th_df,TH,bus_id) :

    graph = go.Figure()
    
    #TH_DF
    series_df = th_df[th_df.datetime > th_df.datetime.max() - timedelta(seconds=TH*30)]
    
    #Loc Bus Appearances
    series_df = series_df[series_df.bus == bus_id]
    
    if series_df.shape[0] < 1 :
        return graph
    line = series_df.line.iloc[0]
    direction = series_df.direction.iloc[0]
    stops_list = lines_dict[str(line)][str(direction)]['stops']
    
    #Set title and layout
    graph.update_layout(
        title='<b>Bus {} : ETA Time Series</b> - Line: {}'.format(bus_id,line),
        legend_title='<b>Destination Stop</b>',
        yaxis = dict(
            title='ETA in Seconds',
            nticks=10,
            zerolinecolor='darkgrey'
        ),
        margin=dict(r=0, l=0, t=40, b=0),
        hovermode='closest'
    )
    
    #Locate unique stops
    unique_stops = series_df.stop.unique().tolist()
    for stop in stops_list :
        if stop not in unique_stops :
            continue
        else : 
            stop_index = stops_list.index(stop)
            
        stop_df = series_df[series_df.stop == stop]
        
        
        #Build stop trace
        graph.add_trace(go.Scatter(
            name= '[' + str(stop_index) + '] ' + str(stop),
            x=stop_df.datetime,
            y=stop_df.estimateArrive,
            mode='lines+markers',
            line=dict(width=3,color=colors[(str_to_int(stop))%len(colors)]),
            text=['<b>Bus : ' + str(bus_id) + '</b> <br>' + \
                    'Stop[' + str(stop_index) + ']: ' + str(stop) + '<br>' + \
                    'Time : ' + row.datetime.strftime("%H:%M:%S") + '<br>' + \
                    'ETA : ' + str(row.estimateArrive) for row in stop_df.itertuples() ],
            hoverinfo='text'
        ))


    return graph

In [15]:
bus_id = '4707'
build_time_series_graph(th_df, TH, bus_id).show()