_Data comes from **MIG/DualData**_

In [46]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from collections import namedtuple
from argparse import ArgumentParser
from path import Path
from IPython.display import display
from xilio import dump

In [47]:
plt.style.use('ggplot')
plt.figure()
pd.set_option('display.max_columns', 50)

# File name / Environment / Crowd / AI
Simset = namedtuple("Simset", ["Name","Envs","Crowd","AI"])
boolmap = lambda x:bool(int(x))
Situs = [
    Simset("All", *map(boolmap,"111")),
    Simset("Environment", *map(boolmap,"100")),
    Simset("Crowd", *map(boolmap,"010")),
    Simset("AI", *map(boolmap,"001")),
]

current = Situs[1] 

In [48]:
def parseLogfile(filename, multi=False):
    benchmark = pd.concat(
    [pd.read_table(file, sep=' ', skiprows=1) for file in filename],
    ignore_index = True) if multi else pd.read_table(filename, sep=' ', skiprows=1)
    benchmark = benchmark[benchmark["frames"] < benchmark["frames"].max()]
    
    value_col_names = ["hashing", "time_avg", "len_avg", "ple_avg", "cls_avg"]
    col_names = [col_name for col_name in benchmark.columns if 
        (col_name.endswith("th_obstacle") and current.Envs) or
        (col_name.endswith("th_region") and current.Crowd) or
        (col_name.endswith("th_ai") and current.AI)]
    
    # Modifing & extracting datas
    benchmark.loc[:, "hashing"] = benchmark[col_names].apply(
        lambda x: hash(tuple(x)),axis=1)
    benchmark.loc[:, "time_avg"] = (
        benchmark["agent_time_enableds"].apply(
        lambda x: pd.Series(x.strip("( )").split(','), dtype=float).mean()))
    benchmark.loc[:, "len_avg"] = (
        benchmark["agent_distance_traveleds"].apply(
        lambda x: pd.Series(x.strip("( )").split(','), dtype=float).mean()))
    benchmark.loc[:, "ple_avg"] = (
        benchmark["agent_ple_energys"].apply(
        lambda x: pd.Series(x.strip("( )").split(','), dtype=float).mean()))
    benchmark.loc[:, "cls_avg"] = (
        benchmark["collisionTimes"].apply(
        lambda x: pd.Series(x.strip("( )").split(','), dtype=float).mean()))

    sampleSet = benchmark[col_names + value_col_names]
    sample1, sample2 = sampleSet.iloc[::2], sampleSet.iloc[1::2]

    sample1.set_index('hashing', inplace=True)
    sample2.set_index('hashing', inplace=True)
    
    print("Dual trail")
    dual = ((sample1-sample2).dropna().apply(np.std, ddof=1)
      .loc[value_col_names[1:]])
    print(dual)
    print(len(dual))
    
    print("Varience test")
    print(sampleSet.apply(np.std, ddof=1).loc[value_col_names[1:]].to_frame())
    
    print("Mean Value test")
    print(sampleSet.apply(np.mean).loc[value_col_names[1:]].to_frame())
    
    return sampleSet

In [49]:
f = Path("/Users/kaidong/logfiles").files()
f  # Those three files are used for the env gen

[Path('/Users/kaidong/logfiles/MapB-ORCA-Env-4.log'),
 Path('/Users/kaidong/logfiles/MapB-ORCA-Env-2.log'),
 Path('/Users/kaidong/logfiles/MapB-ORCA-Env-3.log'),
 Path('/Users/kaidong/logfiles/MapB-ORCA-Env-1.log')]

In [50]:
ss = parseLogfile(f, True)

Dual trail
time_avg    1.393265
len_avg     1.252040
ple_avg     5.280296
cls_avg     1.458689
dtype: float64
4
Varience test
                  0
time_avg   3.666536
len_avg    4.158294
ple_avg   14.720363
cls_avg    2.249365
Mean Value test
                   0
time_avg  107.397961
len_avg   136.730050
ple_avg   464.706657
cls_avg    14.145722


In [51]:
# DATA Generation

col_names = [col_name for col_name in ss.columns if 
        (col_name.endswith("th_obstacle") and current.Envs) or
        (col_name.endswith("th_region") and current.Crowd) or
        (col_name.endswith("th_ai") and current.AI)]

sn = ss.as_matrix(col_names)
for i in [x for x in ss.columns if x.endswith("avg")]:
    st = ss.as_matrix((i,))
    dump(
        Path(".")/i,
        {
            "X": sn,
            "Y": st,
            "info": "ORCA Map2 Env " + i[:-4]
        }
    )

In [52]:
len(sn)

32014