In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import yaml

from argparse import ArgumentParser
from collections import namedtuple
from matplotlib import mlab
from os import makedirs
from pickle import dump
from path import Path
from pdb import set_trace
from time import time

pd.set_option('display.max_columns', 50)
pd.set_option('display.max_rows', 50)
plt.style.use('ggplot')
plt.figure()

# File name / Environment / Crowd / AI
Simset = namedtuple("Simset", ["Name","Envs","Crowd","AI"])
Situs = [
    Simset("All", *map(bool,"111")),
    Simset("Environment", *map(bool,"100")),
    Simset("Crowd", *map(bool,"010")),
    Simset("AI", *map(bool,"001")),
]

parser = ArgumentParser()
parser.add_argument('path', default='.')
parser.add_argument('--type', type=int, choices=range(4), required=True)
parser.add_argument('--info', type=str)


def main(p):
    print("new process")
    current = Situs[p.type]
    path = Path(p.path) / current.Name
    files = (path).glob(r"bench*") + (path).glob(r"**\bench*") 
    info = {}
    print(time()-itime)
    benchmark = pd.concat(
                    [pd.read_table(file, sep=' ', skiprows=1) for file in files],
                    ignore_index = True
                    )
    info["All_data_number"] = benchmark.shape[0]
    benchmark = benchmark[benchmark["frames"] < benchmark["frames"].max()]
    info["Vaild_data_number"] = benchmark.shape[0]
    col_names = [col_name for col_name in benchmark.columns if 
                     (col_name.endswith("th_obstacle") and current.Envs) or
                     (col_name.endswith("th_region") and current.Crowd) or
                     (col_name.endswith("th_ai") and current.AI)]
    info["Parameter_number"] = len(col_names)
    print(time()-itime)

    times = benchmark["agent_time_enableds"].apply(lambda x: pd.Series(x.strip("( )").split(','), dtype=float))
    print(time()-itime)
    lengths = benchmark["agent_distance_traveleds"].apply(lambda x: pd.Series(x.strip("( )").split(','), dtype=float))
    print(time()-itime)
    energies = benchmark["agent_ple_energys"].apply(lambda x: pd.Series(x.strip("( )").split(','), dtype=float))
    print(time()-itime)
    collisions = benchmark["collisionTimes"].apply(lambda x: pd.Series(x.strip("( )").split(','), dtype=float))
    print(time()-itime)
    confs = []
    confs.append(("param",
                      benchmark[col_names]))
    confs.append(("time",
                    {
                    "time_max" :times.max(axis=1),
                    "time_min" :times.min(axis=1),
                    "time_avg" :times.mean(axis=1)
                    }))
    confs.append(("len",
                    {
                    "len_max" :lengths.max(axis=1),
                    "len_min" :lengths.min(axis=1),
                    "len_avg" :lengths.mean(axis=1)
                    }))
    confs.append(("ple",
                    {
                    "ple_max" :energies.max(axis=1),
                    "ple_min" :energies.min(axis=1),
                    "ple_avg" :energies.mean(axis=1)
                    }))
    confs.append(("cls",
                    {
                    "cls_max" :collisions.max(axis=1),
                    "cls_min" :collisions.min(axis=1),
                    "cls_avg" :collisions.mean(axis=1)
                    }))

    pickle_output2(**locals())


def pickle_output2(confs, path, p, **_):    
    x_pre = confs[0][1]
    x_pre_col = x_pre.columns[(x_pre - x_pre.mean() > 0.01).any()]
    x = x_pre[x_pre_col].as_matrix()
    for name, dic in confs[1:]:
        y = pd.DataFrame(dic[name+"_avg"]).as_matrix().reshape((-1,1))
        
        with open(path/"Processed"/("learn_"+name+".dat"), "wb") as f:
            dump({"X":x,"Y":y,"info":p.info+name}, f)    
    
def pickle_output(confs, path, **_):    
    for name, dic in confs:
        conf = pd.DataFrame(dic).as_matrix()
        with open(path/"Processed"/(name+".dat"), "wb") as f:
            dump(conf, f)
            
def table_output(confs, path, **_):  
    dat = pd.DataFrame()
    for name, dic in confs:
        dat = pd.concat([dat, pd.DataFrame(dic)], axis=1)
    dat.to_csv(path/"Processed"/("data.dat"), sep=" ", float_format='%.6f')
            
def dist_output(confs, path, **_):
    
    for name, dic in confs[1:]:
        _pic_output(dic[name + "_avg"],
                   path/"Processed"/(name+".jpg"))
        
def info_output(info, path, **_):
    with open(path/"Processed"/"info.txt", "w") as f:
        yaml.dump(info, f, default_flow_style=False)

def _pic_output(data, path):  
    plt.clf()
    plt.xlim((min(data), max(data)))
    plt.hist(data, bins=1000, 
             normed=True, color="gray", 
             label="Raw Data")
    
    mean = np.mean(data)
    variance = np.var(data)
    sigma = np.sqrt(variance)
    x = np.linspace(min(data), max(data), 1000)
    plt.plot(x, mlab.normpdf(x, mean, sigma), 
             color="b", label="Gaussian Dist.")
    plt.xlabel("mean: {:.2f}, sigma: {:.2f}"
                  .format(mean, sigma))
    plt.savefig(path)

def whole_output(confs, path, **_):
    dat = pd.DataFrame()
    for name, dic in confs:
        dat = pd.concat([dat, pd.DataFrame(dic)], axis=1)
        dat.to_pickle(path/"df.dat")

In [None]:
itime = time()
p = parser.parse_args([r"C:\Users\kaidonghu\Google Drive\MIG\SF\Map2","--type","0","--info","SF Map2 All "])
main(p)
