# MuTraff - Unify Experiments
Takes a list of multiple experiments over the same configuration, makes a statistical analysis and creates a mean-value experiment.

In [12]:
import os
import re
import pandas as pd
import numpy as np

In [13]:
MUTRAFF_HOME="/Users/alvaro/Desktop/workspace/mutraff/uah-gist-mutraff-bastra"
MUTRAFF_EXP_PATH=MUTRAFF_HOME + "/experiments/tmp"
EXPERIMENTS={}
MEASURES=None
COLUMNS=[]
VEHICLES=None

## Experiments

In [28]:
experiment="alcalahenares_XL_mutraff_tele60_uni5x8_timeALL_fulltraffic_logit50"
experiment="alcalahenares_M_mutraff_tele60_uni5x16_timeALL_fulltraffic_logit50"


## Code

In [29]:
def load_results_file( n, filename):
    df = pd.read_csv(filename)
    df['STATS_experiment']=n
    
    global EXPERIMENTS
    EXPERIMENTS[n]=df

In [30]:
def calculate_means():
    global MEASURES
    veh_list=[]
    print( "Found {} trips corresponding to {} measures".format( len(MEASURES['id'].unique()), len(MEASURES['id']) ))
    n=0
    print( "Creating statistics... please be patient, it takes some time")
    for veh in MEASURES['id'].unique():
        if( np.isnan(veh) ):
            continue
        veh_measures = MEASURES.loc[MEASURES['id']==veh]
        veh_attended = veh_measures.loc[veh_measures['is_attended']==True]
        veh_finished = veh_measures.loc[veh_measures['has_finished']==True]
        # print("* Add vehicle {} --> {} measures ".format(veh,len(veh_measures)) )
        values = dict(veh_measures.iloc[0])
        values.update( dict(veh_finished.mean( axis = 0 )) )
        values.pop('STATS_experiment')
        values['STATS_exp_total']=len(veh_measures)
        values['has_finished']=len(veh_finished)
        values['is_attended']=len(veh_attended)
    #    print(values)
        veh_list.append( values) 
        n=n+1
    print( "Analyzed {} vehicles".format(n) )
    df = pd.DataFrame( veh_list )
    return df


In [31]:
def save_file_means(experiment):
    global MUTRAFF_EXP_PATH, VEHICLES
    out_dir = "{}/{}_{}".format( MUTRAFF_EXP_PATH, experiment, "MEANS")
    out_file_means = "{}/{}_{}".format( out_dir, experiment, "MEANS.csv")
    print("creating directory "+out_dir)
    try:
        os.makedirs(out_dir) 
    except:
        print( "(Directory exists: skipped)")
    print("Saving to file "+out_file_means)
    VEHICLES.to_csv( out_file_means )

In [32]:
# files = sorted(filter( lambda f: f.startswith(experiment_prefix), os.listdir(MUTRAFF_EXP_PATH) ))
experiment_regexp=experiment+"_[012]._.*"
regexp = re.compile(experiment_regexp)
files = sorted(filter( lambda f: regexp.match(f), os.listdir(MUTRAFF_EXP_PATH) ))
max_files = 30
n=0
for name in files:
    if( n>=max_files ):
        break
    print("Loading ",name)
    filename = "{}/{}/{}.csv".format(MUTRAFF_EXP_PATH,name,name)
    load_results_file(n,filename)
    n=n+1

print("Read {} files".format(n))
MEASURES = pd.concat(EXPERIMENTS)
print("Obtained {} experiment measures".format(len(MEASURES)))
# MEASURES.rename( columns={'is_attended':'is_rerouted', 'id':'veh_id'}, inplace=True)
COLUMNS=MEASURES.columns.values.tolist()

VEHICLES=calculate_means()
VEHICLES.head()
save_file_means(experiment)


Loading  alcalahenares_M_mutraff_tele60_uni5x16_timeALL_fulltraffic_logit50_01_190502_234402
Loading  alcalahenares_M_mutraff_tele60_uni5x16_timeALL_fulltraffic_logit50_02_190503_083619
Loading  alcalahenares_M_mutraff_tele60_uni5x16_timeALL_fulltraffic_logit50_03_190503_085940
Loading  alcalahenares_M_mutraff_tele60_uni5x16_timeALL_fulltraffic_logit50_04_190503_160402
Loading  alcalahenares_M_mutraff_tele60_uni5x16_timeALL_fulltraffic_logit50_05_190503_162735
Loading  alcalahenares_M_mutraff_tele60_uni5x16_timeALL_fulltraffic_logit50_06_190503_165102
Loading  alcalahenares_M_mutraff_tele60_uni5x16_timeALL_fulltraffic_logit50_07_190503_171425
Loading  alcalahenares_M_mutraff_tele60_uni5x16_timeALL_fulltraffic_logit50_08_190503_173752
Loading  alcalahenares_M_mutraff_tele60_uni5x16_timeALL_fulltraffic_logit50_09_190503_180119
Loading  alcalahenares_M_mutraff_tele60_uni5x16_timeALL_fulltraffic_logit50_10_190503_182443
Loading  alcalahenares_M_mutraff_tele60_uni5x16_timeALL_fulltraffic_lo

# GARBAGE AND TESTS

In [6]:
print(COLUMNS)

['id', 'veh_type', 't_depart_secs', 't_arrival_secs', 't_traveltime_secs', 'origin', 'destiny', 'route_calc_num', 'route_detail', 'route_path_num', 'route_distance', 'is_attended', 'has_finished', 'maps', 'tripfile', 'STATS_experiment']


In [329]:
veh_63_all = MEASURES.loc[MEASURES['id']==63]
veh_63_finished = MEASURES.loc[ (MEASURES['id']==63) & (MEASURES['is_attended']==True)]
print( "============ VEHICLE 63 ===========")
print( veh_63_all )
print( "============ VEHICLE 63 finished ===========")
print( veh_63_finished )
print('Mean values:\n{df}'.format(df=veh_63_finished.mean(axis=0)))


     experiment  veh_id veh_type  t_depart_secs  t_arrival_secs  \
0 0           0      63      car              0               0   
1 0           1      63      car              0             299   
2 0           2      63      car              0             297   
3 0           3      63      car              0             336   
4 0           4      63      car              0             297   
5 0           5      63      car              0             306   
6 0           6      63      car              0             301   
7 0           7      63      car              0             309   
8 0           8      63      car              0             330   
9 0           9      63      car              0             300   

     t_traveltime_secs    origin     destiny  route_calc_num  \
0 0                  0  35576028  76508304#3               1   
1 0                299  35576028  76508304#3               0   
2 0                297  35576028  76508304#3               0   
3 0   

In [122]:
df1=pd.DataFrame( [2,4,6], ['a','b','c'] )
df2=pd.DataFrame( [4,6,8], ['a','b','c'] )
df3=pd.DataFrame( [0,2,4], ['a','b','c'] )
#print(df1)
#print(df2)
#print(df3)
df_concat = pd.concat(( df1, df2, df3 ))
#print(df_concat)
by_row_index = df_concat.groupby(df_concat.index)
df_means = by_row_index.mean()
print(df_means)

   0
a  2
b  4
c  6


In [264]:
dfs = {}
nrows = 4
ncols = 3
for i in range(4):
    if(i==0):
        x=np.arange(i, nrows*ncols+i).reshape(nrows, ncols)
    else:
        x=np.arange(0, nrows*ncols*i, step=i).reshape(nrows, ncols)
    dfs[i] = pd.DataFrame(x, columns=['age','weight','points'])
    dfs[i]['name']=['Peter', 'John', 'Mary', 'Luke']
    dfs[i].insert(0,'experiment',i)
    print('DF{i}:\n{df}\n'.format(i=i, df=dfs[i]))
#panel = pd.Panel(dfs)
panel = pd.concat(dfs)
print(panel)

DF0:
   experiment  age  weight  points   name
0           0    0       1       2  Peter
1           0    3       4       5   John
2           0    6       7       8   Mary
3           0    9      10      11   Luke

DF1:
   experiment  age  weight  points   name
0           1    0       1       2  Peter
1           1    3       4       5   John
2           1    6       7       8   Mary
3           1    9      10      11   Luke

DF2:
   experiment  age  weight  points   name
0           2    0       2       4  Peter
1           2    6       8      10   John
2           2   12      14      16   Mary
3           2   18      20      22   Luke

DF3:
   experiment  age  weight  points   name
0           3    0       3       6  Peter
1           3    9      12      15   John
2           3   18      21      24   Mary
3           3   27      30      33   Luke

     experiment  age  weight  points   name
0 0           0    0       1       2  Peter
  1           0    3       4       5   John
  2 

In [266]:
# Busquedas
print( panel.loc[panel['name']=='John'])


     experiment  age  weight  points  name
0 1           0    3       4       5  John
1 1           1    3       4       5  John
2 1           2    6       8      10  John
3 1           3    9      12      15  John


In [268]:
print('Mean of stacked DFs:\n{df}'.format(df=panel.mean(axis=0)))

Mean of stacked DFs:
experiment     1.500
age            7.875
weight         9.625
points        11.375
dtype: float64
