# Setup and Defintions

In [38]:
import altair as alt
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from scipy.optimize import curve_fit
from sklearn import metrics
import matplotlib.pyplot as plt

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

__author__ = "Jakob Rehmann"
__email__ = "rehmann@vsp.tu-berlin.de"

In [39]:
def get_raw_files(file_number):
    if file_number == 'noZoomer':
        root= "./input/1-noZoomer/berlin-v5.5-1pct."
    if file_number == 'slowZoomer':
        root= "./input/2-slowZoomer/berlin-v5.5-1pct."
    if file_number == 'fastZoomer':
        root= "./input/3-fastZoomer/berlin-v5.5-1pct."
        
    modes_raw = pd.read_csv(root + "modestats.txt",sep='\t')
    mode_coverage_raw = pd.read_csv(root + "modeChoiceCoverage1x.txt",sep='\t')
    score_raw = pd.read_csv(root + "scorestats.txt",sep='\t')
    
    return modes_raw, mode_coverage_raw, score_raw
    

In [40]:
def find_convergence(metric_name, limit, df_raw):

    num_iter= np.size(df_raw.Iteration)
    slopes = np.empty(num_iter)
    slopes[:] = np.nan
    
    df = df_raw[['Iteration',metric_name]].copy()

    for x in range(analysis_begin,num_iter):
        
        if fixed_window:
            start_iter = x - window_size
        else:
            start_iter = np.min([int((1-pct_ret)*x),x-min_window_size])

        temp = df[start_iter:x].copy().reset_index(drop=True)
        it = temp['Iteration'].to_numpy().reshape(-1, 1)
        mode = temp[metric_name].to_numpy()
        
        transformer = PolynomialFeatures(degree=1, include_bias=False).fit(it)
        it_ = transformer.transform(it)

        model = LinearRegression().fit(it_,mode)
        y_pred = model.predict(it_).reshape(-1, 1)
        s = (y_pred[-1]-y_pred[-2])
        slopes[x]= s


    df['slopes'] = pd.DataFrame(slopes)
    df['negLim'] = pd.DataFrame(np.ones(num_iter)*limit*-1)
    df['posLim'] = pd.DataFrame(np.ones(num_iter)*limit)
    df['zeros'] = pd.DataFrame(np.zeros(num_iter))
    
    
    pointsInZone = np.zeros(num_iter)
    pointsInZone[:] = np.nan
    count = 0
    iterLimit = 50
    shutdown_iteration = np.nan
    
    for b in range(0,slopes.size):
        if slopes[b] > -1*limit and slopes[b]< limit:
            count+=1
        else:
            count = 0
        
        if count==iterLimit:
            pointsInZone[b-iterLimit:b]= slopes[b-iterLimit:b]
            shutdown_iteration = b
            break
    
    
    df['pointsInZone'] = pd.DataFrame(pointsInZone)
    
    return df,shutdown_iteration
    

In [41]:
def assemble_graph(df, zeroGraph,shutdown_iteration):
    base = alt.Chart(df).encode(x='Iteration')
    if zeroGraph:
        mode = base.mark_line(color='green').encode(alt.Y(metric_name+':Q', scale = alt.Scale(zero=False )))
    else:
        mode = base.mark_line(color='green').encode(alt.Y(metric_name+':Q'))

    zone = base.mark_line(opacity = 0.5).transform_fold(['negLim','posLim']).encode(y = 'value:Q')
    zero = base.mark_line(size= 0.5).encode(y='zeros')
    points = base.mark_circle().encode(y='pointsInZone')
    slopeGr = base.mark_line().transform_fold(
        ['slopes', 'zeros']
    ).encode(y = 'value:Q',color='key:N')
    zzz = alt.layer(slopeGr, zone, zero, points, data = df )
    finalG = alt.layer(zzz,mode, data = df ).resolve_scale(y='independent')

    return finalG

# Parameterization


In [42]:
fixed_window = False
window_size = 50
pct_ret = 0.25

analysis_begin = 50
min_window_size = 50

## Score Comparison

In [43]:
threshold_score = 0.001
run_array = ['noZoomer', 'slowZoomer', 'fastZoomer']
score_array = {'score_executed'} #,'score_best','score_avg','score_worst'
scores_from_Python = pd.DataFrame

for metric_name in score_array:
    if 'stacked' in locals():
        del stacked
    for run in run_array:
        modes_raw, mode_coverage_raw, score_raw = get_raw_files(run)
        score_mod = score_raw.rename(columns={'ITERATION': 'Iteration','avg. EXECUTED': 'score_executed', 'avg. WORST':'score_worst','avg. AVG':'score_avg','avg. BEST':'score_best'})
        if not metric_name in score_mod.columns:
            print(metric_name, 'is not contained in df')
            continue


        df,shutdown_iteration = find_convergence(metric_name, threshold_score, score_mod)
        print(metric_name + ';' + str(threshold_score) + ';' + str(run) + ';' + str(shutdown_iteration))
        
        finalG = assemble_graph(df,True, shutdown_iteration)
        finalG = finalG.properties(title='score convergence for ' + metric_name + ', Run: ' + str(run) + ', convergence at iter ' + str(shutdown_iteration))
        if not 'stacked' in locals():
            stacked = finalG
        else:
            stacked = stacked | finalG
    stacked.display()

score_executed;0.001;noZoomer;517
score_executed;0.001;slowZoomer;625
score_executed;0.001;fastZoomer;1144


## Mode Comparison

In [44]:
threshold_mode = 0.00001
run_array = ['noZoomer', 'slowZoomer', 'fastZoomer']
mode_array = {'zoomer','bicycle', 'car','pt','walk'}
for metric_name in mode_array:
    if 'stacked' in locals():
        del stacked
    zzz = metric_name + ';' + str(threshold_mode);
    for run in run_array:
        modes_raw, mode_coverage_raw, score_raw = get_raw_files(run)
        if not metric_name in modes_raw.columns:
            print(metric_name, 'is not contained in df')
            continue


        df,shutdown_iteration = find_convergence(metric_name, threshold_mode, modes_raw)
        zzz = zzz + ';' + str(shutdown_iteration)
        finalG = assemble_graph(df,False, shutdown_iteration)
        finalG = finalG.properties(title='mode convergence for ' + metric_name + ', Run:'' ' + str(run) + ', convergence at iter ' + str(shutdown_iteration))
        if not 'stacked' in locals():
            stacked = finalG
        else:
            stacked = stacked | finalG
    print(zzz)
    stacked.display()


car;1e-05;1021;1516;nan


pt;1e-05;1189;1104;1828


bicycle;1e-05;695;1283;1442


walk;1e-05;470;779;1553


zoomer is not contained in df
zoomer;1e-05;1313;nan


## Mode Choice Coverage  Comparison

In [45]:
threshold_mode_coverage=0.0003
run_array = ['noZoomer', 'slowZoomer', 'fastZoomer']
mode_array = {'zoomer','bicycle', 'car','pt','walk'}

for metric_name in mode_array:
    if 'stacked' in locals():
        del stacked
        zzz = metric_name + ';' + str(threshold_mode_coverage);
    for run in run_array:
        modes_raw, mode_coverage_raw, score_raw = get_raw_files(run)
        if not metric_name in mode_coverage_raw.columns:
            print(metric_name, 'is not contained in df')
            continue


        df,shutdown_iteration = find_convergence(metric_name, threshold_mode_coverage, mode_coverage_raw)
        zzz = zzz + ';' + str(shutdown_iteration)
        finalG = assemble_graph(df,False, shutdown_iteration)
        finalG = finalG.properties(title='mode choice coverage for ' + metric_name + ', Run:' + str(run) + ', convergence at iter ' + str(shutdown_iteration))
        if not 'stacked' in locals():
            stacked = finalG
        else:
            stacked = stacked | finalG
    stacked.display()
    print(zzz)
            

car;0.0003;503;566;568


pt;0.0003;417;464;476


bicycle;0.0003;559;647;654


walk;0.0003;465;536;537
zoomer is not contained in df


zoomer;0.0003;619;634
