In [1]:
import os, time, datetime, glob, copy, shutil, math
import numpy as np
import pandas as pd
import pickle as pk
from ult import *
from matplotlib import pyplot as plt

gct = time.time
runtime = lambda et, st: str(datetime.timedelta(seconds=round(et-st)))

# Read road network

In [2]:
noUTurn = 1
st = gct()
# read road network
allSelectedRids = np.genfromtxt('/Shared/xunzhou/Shenzhen_new/haoyi/CongProg/selectedRoadsCC%s' %\
                                ('_noUTurn' if noUTurn else ''), dtype=np.int32)

gridLen = 500.0 #500 meter grid
f_road ='/Shared/xunzhou/Shenzhen_new/haoyi/RoadNetwork/road_projected_utm50.geojson'
# select road types for analysis

RoadSelection = True
typeOfRoads=["motorway","motorway_link","primary",
             "primary_link", "trunk", "trunk_link"]

roads, topLeftOrigin, xyNGrids, grid_to_road, road_to_grid = RoadNet.readGeojson(f_road, gridLen,
                                                                                 selectedRoadTypes=typeOfRoads,
                                                                                 selectedRoadIDs=allSelectedRids.tolist(),
                                                                                 noUTurn = True)
# find largest connected component
selectedRids, linkRoads = RoadNet.find_LCC_update_Adj(roads, linkRoadMaxLength = 30)
print "Runtime %s" % runtime(gct(), st)

original # of roads: 99486
U-Turn is ignored in network connectivity
NO. X-grids: 190, NO. Y-grids: 88
# of roads in the largest connected component: 6570
# of link roads: 690
Runtime 0:00:06


# Optional: Display link roads and visualize selected roads

In [None]:
# write out Link level road list:
outDict = modAdjPrePath
with open('/Shared/xunzhou/Shenzhen_new/haoyi/CongProg/Result/Primary/adjLinkPath', 'w') as wrt:
    s = ''
    for k in outDict:
        s += '%d,%d:%s\n' % (k[0],k[1],','.join(str(v) for v in outDict[k]))
    wrt.write(s)

In [4]:
Snapshot.visual(selectedRids, [], selectedRids, roads,
               figPath='/Shared/xunzhou/Shenzhen_new/haoyi/CongProg/Result/Primary/roads.pdf')

# Optional: Compute Free Flow speed for selected roads 

In [3]:
nObsDays = 30
noisePercentile = 5
nMinVehicles = 100
freeFlowFileName = 'obs_free_flow%s_%d_%d_%d' % ('_noUTurn' if noUTurn else '', nMinVehicles, noisePercentile, nObsDays)
ProcDataDir = '/Shared/xunzhou/Shenzhen_new/haoyi/CongProg/FinalSubmissionResult'

In [4]:
# read all GPS data
st = gct()
road_GPS_spd = pd.read_csv('/Shared/xunzhou/Shenzhen_new/haoyi/CongProg/RoadSpd/free_flow/road_GPS_spd_sort',
                           dtype={'device':np.uint32,'spd':np.uint16,'road':np.int32},
                           sep=',',
                           converters={'time': lambda x: pd.Timestamp(x)},
                           error_bad_lines=False)
road_GPS_spd_sel = road_GPS_spd[road_GPS_spd['road'].isin(selectedRids)]
road_GPS_spd_sel['day'] = road_GPS_spd_sel['time'].apply(lambda x: x.day)
if nObsDays < 30:
    road_GPS_spd_sel = road_GPS_spd_sel[road_GPS_spd_sel['day'] < nObsDays]
print 'runtime of reading all GPS data and select corresponding ones for selected roads and time period: %s' \
% runtime(gct(), st)

runtime of reading all GPS data and select corresponding ones for selected roads and time period: 1:09:32


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [8]:
# find free flow
st = gct()
seg_st_index = road_GPS_spd_sel['road'].diff()
seg_st_index = seg_st_index[seg_st_index != 0].index.values
if seg_st_index[-1] < road_GPS_spd_sel.index.values[-1]:
    seg_st_index = np.append(seg_st_index, road_GPS_spd_sel.index.values[-1]+1)
print 'runtime: find road index starting row index %s' % runtime(gct(), st)
st = gct()
road_free_flow_speed = {}
for i in xrange(seg_st_index.shape[0] - 1):
    road_spd_pts = road_GPS_spd_sel.loc[seg_st_index[i] : seg_st_index[i+1] - 1]
    road_spd_pts = road_spd_pts[road_spd_pts['spd'] > 0.]
    nVeh = road_spd_pts['device'].unique().shape[0]
    if nVeh >= nMinVehicles:
        spds = road_spd_pts['spd'].values
        free_flow_spd = np.max(spds[spds < np.percentile(spds, 100 - noisePercentile)])
        roadID = road_GPS_spd_sel['road'].loc[seg_st_index[i]]
        road_free_flow_speed[roadID] = free_flow_spd
print 'runtime: get free flow speed for roads satisfying conditions %s' % runtime(gct(), st)
print '# of roads having free flow: %d (%.3f%%)' % (len(road_free_flow_speed), 
                                                    len(road_free_flow_speed) * 100.0/selectedRids.shape[0])
road_free_flow_speed = pd.Series(road_free_flow_speed)
df_free_flow_spd = road_free_flow_speed.to_frame()
df_free_flow_spd.columns = ['ff_spd']
df_free_flow_spd.index.name = 'roadID'
df_free_flow_spd.to_csv(os.path.join(ProcDataDir, freeFlowFileName))

runtime: find road index starting row index 0:00:02
runtime: get free flow speed for roads satisfying conditions 0:01:40
# of roads having free flow: 6453 (98.219%)


# Optional (run one time): extract road speed for selected roads in prediction

In [23]:
ProcDataDir = '/Shared/xunzhou/Shenzhen_new/haoyi/CongProg/FinalSubmissionResult'
roadSpeedDir = '/Shared/xunzhou/Shenzhen_new/haoyi/CongProg/RoadSpd/5min_allR_allT'
dayRange = range(1, 31)
for tInterval in np.arange(5, 51, 5):
    for testDay in dayRange:
        f = os.path.join(roadSpeedDir, 'roadSpd_tInterval_5_min_2014_11_%d' % testDay)
        rdSpdDf = pd.read_csv(f, dtype={'timeSlot':np.uint16, 'road':np.int32, 'avgGPS':np.float64, 
                                        'nGPSPts':np.uint32, 'avgVehicles':np.float64, 'nVehicles':np.uint32})
        rdSpdDf = rdSpdDf[rdSpdDf['road'].isin(selectedRids)]
        if tInterval > 5:
            rdSpdDf['newTSlot'] = rdSpdDf['timeSlot'].apply(lambda x:int(x*5/tInterval))
            rdSpdDf['cumuVehSpd'] = rdSpdDf['avgVehicles'] * rdSpdDf['nVehicles']
            nTSlots = rdSpdDf['newTSlot'].max()+1
            gp = rdSpdDf.groupby(['road', 'newTSlot'])
            newRdSpdDf = gp['cumuVehSpd', 'nVehicles'].sum()
            newRdSpdDf['avgVehicles'] = newRdSpdDf['cumuVehSpd'] / newRdSpdDf['nVehicles'] 
        else:
            nTSlots = rdSpdDf['timeSlot'].max()+1
            newRdSpdDf = rdSpdDf
            newRdSpdDf = newRdSpdDf.set_index(['road', 'timeSlot'])
        outDir = os.path.join(ProcDataDir ,'selRoads_roadSpd_%d' % tInterval)
        if not os.path.exists(outDir):
            os.mkdir(outDir)
        newRdSpdDf = newRdSpdDf['avgVehicles'].to_frame()
        newRdSpdDf.to_csv(os.path.join(outDir, '_'.join(f.split('_')[-3:])), index=True)

# Build congestion propagation index

In [3]:
# sanity check and evaluate preprocessed data
ProcDataDir = '/Shared/xunzhou/Shenzhen_new/haoyi/CongProg/FinalSubmissionResult'
nFreeFlowObsDays = 23
congestionStateRid = np.genfromtxt(os.path.join(ProcDataDir, 'congestionMatrixRoadIDByIndex'), dtype=np.uint32)
print 'Selected road ID is congestion state matrix road ID index: ', np.all(congestionStateRid==selectedRids)
obsFreeFlow = pd.read_csv(os.path.join(ProcDataDir, 'obs_free_flow_noUTurn_100_5_%d') % nFreeFlowObsDays)
print 'Free flow: %.2f%%' % (obsFreeFlow.index.shape[0]* 100.0 / selectedRids.shape[0])

selectedRids_index = np.array([np.argwhere(congestionStateRid==rid).flatten()[0] for rid in selectedRids])

Selected road ID is congestion state matrix road ID index:  True
Free flow: 97.79%


In [4]:
# parameters for building
tInt = 5
nTrainDays = 23
tb_wd = [6, 10, 15, 20, 24]
def getTimeSlot(tDelta, tInt):
    return int(math.ceil(tDelta/pd.Timedelta(tInt, unit='m')))
tb_wd = [pd.Timedelta(v, 'h') for v in tb_wd]
tb_wd = [getTimeSlot(tb_wd[i] - tb_wd[0], tInt) for i in xrange(len(tb_wd))]
indices_congProg = ['weekday_%d-%d' % (tb_wd[i], tb_wd[i+1]-1) for i in xrange(len(tb_wd)-1)]
indices_congProg.extend(['Saturday', 'Sunday'])
print tb_wd, indices_congProg

[0, 48, 108, 168, 216] ['weekday_0-47', 'weekday_48-107', 'weekday_108-167', 'weekday_168-215', 'Saturday', 'Sunday']


In [None]:
# # determine all congestion state of all road in all time slot in the entire dataset
# st = gct()
# d = os.path.join(ProcDataDir, 'selRoads_roadSpd_%d' % (tInt))
# nTSlots = tb_wd[-1]-1
# congStates = []
# roadFreeFlowSpd = obsFreeFlowTrain.set_index('roadID')
# for f in sorted(glob.glob('%s/2014_11_*' % d), key=lambda x: int(x.split('_')[-1]))[:nTrainDays]:
#     roadSpdT = pd.read_csv(f)
#     roadSpdT = roadSpdT.set_index(['road', 'timeSlot'])
#     CongStat = []
#     for rid in selectedRids:
#         CongStatRow = []
#         if rid not in roadFreeFlowSpd.index:
#             CongStatRow = [0 for t in xrange(nTSlots)]
#         else:
#             for t in xrange(nTSlots):
#                 if (rid, t) not in roadSpdT.index:
#                     CongStatRow.append(0)
#                 else:
#                     ff_spd = roadFreeFlowSpd.loc[rid]['ff_spd']
#                     if 1 - roadSpdT.loc[(rid,t)]['avgVehicles'] / ff_spd < 0.5:
#                         CongStatRow.append(0)
#                     else:
#                         CongStatRow.append(1)
#         CongStat.append(CongStatRow)
#     CongStat = np.array(CongStat, dtype=np.uint8) # roadID X tSLotID
#     congStates.append(CongStat) # allSelectedRids X tSlot
#     print os.path.basename(f), ' is done'
# print 'runtime: get all congestion states for %d days %s' % (len(congStates), runtime(gct(), st))

In [5]:
# read all congestion state of all road in all time slot in the entire dataset
st = gct()
d = os.path.join(ProcDataDir, 'selRoads_congState_%d_%d' % (nFreeFlowObsDays, tInt))
congStates = []
for f in sorted(glob.glob('%s/%s2014_11_*' % (d, 'noUTurn' if noUTurn else '')), 
                key=lambda x: int(x.split('_')[-1]))[:nTrainDays]:
    congState = np.genfromtxt(f, delimiter=',')
    congStates.append(congState[selectedRids_index,:]) # allSelectedRids X tSlot
print 'runtime: read all congestion states for %d days %s' % (len(congStates), runtime(gct(), st))

runtime: read all congestion states for 23 days 0:00:30


In [6]:
# naive index updating
def updateCongNaive(congStatM, congProgDict, indexRids, roads, linkRoads):
    for ri in xrange(indexRids.shape[0]):
        rid = indexRids[ri]
        if rid not in linkRoads:
            for t in xrange(congStatM[ri].shape[0]-1):
                if congStatM[ri,t] == 1:
                    for pRid in roads[rid].preRids:
                        pRid_index = np.argwhere(indexRids == pRid).flatten()[0]
                        if congStatM[pRid_index, t] == 0:
                            congProgDict[rid][pRid][1] += 1
                            if congStatM[pRid_index, t+1] == 1:
                                congProgDict[rid][pRid][0] += 1

def updateCongSmart(congStatM, congProgDict, indexRids, roads, linkRoads):
    jumpIndex = np.zeros(congStatM.shape, dtype=np.uint32)
    jumpIndex[:,:] = -1
    for ri in xrange(congStatM.shape[0]):
        mt = 0
        for t in xrange(congStatM.shape[1]):
            if congStatM[ri, t] == 1:
                for pt in xrange(mt, t+1):
                    jumpIndex[ri, pt] = t
                mt = t+1
    for ri in xrange(indexRids.shape[0]):
        rid = indexRids[ri]
        if rid not in linkRoads:
            for pRid in roads[rid].preRids:
                tInd = jumpIndex[ri,0]
                pRid_index = np.argwhere(indexRids == pRid).flatten()[0]
                while(tInd < congStatM.shape[1] - 1):
                    if tInd >= 0:
                        if congStatM[pRid_index,tInd]==0:
                            congProgDict[rid][pRid][1] += 1
                            if congStatM[pRid_index,tInd+1]==1:
                                congProgDict[rid][pRid][0] += 1
                        tInd = jumpIndex[ri,tInd+1]
                    else:
                        break

In [7]:
# build index for each time period of weekday and each day of weekends given road connectivity
updateCong = updateCongSmart
                        
st = gct()
# weekday time period boundary
index_data_congProg = []
cDay = datetime.date(2014, 11, 1)
# set up index structure
for i in xrange(len(indices_congProg)):
    CongProg = {}
    for rid in selectedRids:
        if rid not in linkRoads:
            CongProg[rid] = {preRid:[0, 0] for preRid in roads[rid].preRids} # nProg, nPossibleProg
    index_data_congProg.append(CongProg)
# compute values for the index
for congState in congStates:
    if cDay.weekday() >= 5: # Saturday & Sunday
        CongProg = index_data_congProg[cDay.weekday()-1]
        congStatM = congState
        updateCong(congStatM, CongProg, selectedRids, roads, linkRoads)
    else: # weekday
        for j in xrange(len(tb_wd)-1):
            if j < len(tb_wd)-2:
                #add 1 more time slot for counting propagation
                congStatM = congState[:, tb_wd[j] : tb_wd[j+1] + 1]
            else:
                #end time of the day, cannot add more time slot to the end
                congStatM = congState[:, tb_wd[j] : tb_wd[j+1]]
            CongProg = index_data_congProg[j]
            updateCong(congStatM, CongProg, selectedRids, roads, linkRoads)
    print 'runtime: process %s days %s' % (str(cDay), runtime(gct(), st))
    cDay += datetime.timedelta(days=1)
for i in xrange(len(indices_congProg)):
    CongProg = index_data_congProg[i]
    for rid in CongProg:
        for preRid in CongProg[rid]:
            if CongProg[rid][preRid][1] == 0:
                CongProg[rid][preRid] = 0.
            else:
                CongProg[rid][preRid] = CongProg[rid][preRid][0] * 1.0 / CongProg[rid][preRid][1]
print 'runtime: build index %s' % runtime(gct(), st)

runtime: process 2014-11-01 days 0:00:05
runtime: process 2014-11-02 days 0:00:10
runtime: process 2014-11-03 days 0:00:15
runtime: process 2014-11-04 days 0:00:19
runtime: process 2014-11-05 days 0:00:24
runtime: process 2014-11-06 days 0:00:29
runtime: process 2014-11-07 days 0:00:34
runtime: process 2014-11-08 days 0:00:39
runtime: process 2014-11-09 days 0:00:43
runtime: process 2014-11-10 days 0:00:47
runtime: process 2014-11-11 days 0:00:52
runtime: process 2014-11-12 days 0:00:57
runtime: process 2014-11-13 days 0:01:03
runtime: process 2014-11-14 days 0:01:09
runtime: process 2014-11-15 days 0:01:14
runtime: process 2014-11-16 days 0:01:19
runtime: process 2014-11-17 days 0:01:23
runtime: process 2014-11-18 days 0:01:27
runtime: process 2014-11-19 days 0:01:31
runtime: process 2014-11-20 days 0:01:37
runtime: process 2014-11-21 days 0:01:42
runtime: process 2014-11-22 days 0:01:47
runtime: process 2014-11-23 days 0:01:50
runtime: build index 0:01:51


# compute traffic bottleneck score

In [8]:
def getCongIndexPos(day, tSlot, tb_wd):
    w = datetime.date(2014, 11, day).weekday()
    if w >= 5:
        return w - 1
    else:
        for tb_i in xrange(len(tb_wd[1:])):
            if tSlot < tb_wd[tb_i+1]:
                return tb_i

def checkProg(cRid, nRid, ct, CongStateM, rid_index):
    cRid_index = rid_index[cRid]
    nRid_index = rid_index[nRid]
    if (CongStateM[cRid_index][ct] == 1) and (CongStateM[nRid_index][ct] == 0) \
    and (CongStateM[nRid_index][ct+1] == 1):
        return 1
    else:
        return 0
    
def getConnCongSet(congRids, linkRoads, roads):
    CCSs = []
    for congRid in congRids:
        if congRid not in linkRoads:
            mergeIndex = set()   
            for m in xrange(len(CCSs)):
                neighborhood = set(CCSs[m])
                for CCS_rid in CCSs[m]:
                    neighborhood |= set(roads[CCS_rid].preRids)
                if (len(set(roads[congRid].preRids) & CCSs[m]) > 0) or (congRid in neighborhood):
                    mergeIndex.add(m)
            newCCS = []
            mergeCCS = set([congRid])
            for m in xrange(len(CCSs)):
                if m in mergeIndex:
                    mergeCCS |= CCSs[m]
                else:
                    newCCS.append(CCSs[m])
            newCCS.append(mergeCCS)
            CCSs = newCCS
    return CCSs

In [9]:
# set parameter and read data
testDays = range(24, 31)
d = os.path.join(ProcDataDir, 'selRoads_congState_%d_%d' % (nFreeFlowObsDays, tInt))

st = gct()
rid_index = {selectedRids[i]:i for i in xrange(selectedRids.shape[0])}

evalFlag = True
if evalFlag:
    p_thd = 10**(-32)
    delta_t = 12+1 # how many future time slots to be predicted
    predictProgProb = [{} for i in xrange(delta_t-1)]
else:
    delta_t = 0
    p_thd = 0.01

if not evalFlag:
    outDir = os.path.join(ProcDataDir, '%d_prog' % testDay)
    if os.path.exists(outDir):
        shutil.rmtree(outDir)
    os.mkdir(outDir)

for testDay in testDays:
    f = '%s/%s2014_11_%d' % (d, 'noUTurn' if noUTurn else '', testDay)
    testCongState = np.genfromtxt(f, delimiter=',') # allSelectedRids X tSlot
    testCongState = testCongState[selectedRids_index, :]
    # set up parameters to make prediction for each day
    nTSlots = testCongState.shape[1]
    tMax = nTSlots - 1 if delta_t > 0 else nTSlots-delta_t-1 
    for t in xrange(tMax):
        # get current congested segments
        congRids = selectedRids[testCongState[:,t]==1]
        # set up evaluation parameters
        if delta_t == 0:
            t_thd = nTSlots-1
        else:
            if t + delta_t >= nTSlots - 1:
                t_thd = nTSlots - 1
            else:
                t_thd = t + delta_t
        # find connected congested sets, and roots and leaves
        CCSs = getConnCongSet(congRids, linkRoads, roads)
        # Naive solution
        roots_CCSs, paths_CCSs, visited_CCSs, rank_CCSs  = [], [], [], []
        for CCS in CCSs:
            roots, allPaths, allVisited = [], [], set()
            for congRid in CCS:
                # starting a BFS search from a given congested segment and compute the score for all segments
                isRoot = True
                for preRid in roads[congRid].preRids:
                    if preRid not in CCS:
                        isRoot = False
                progIndex = index_data_congProg[getCongIndexPos(testDay, t, tb_wd)]
                if isRoot:
                    roots.append(congRid)
                else:
                    paths, path_prob, ext_flags = [[congRid]], [1.0], [True]
                    maxTF = t+1
                    visitedRids = set([congRid]) # for preventing cycling propagation
                    while (np.any(ext_flags) and maxTF < t_thd):
                        progIndex = index_data_congProg[getCongIndexPos(testDay, maxTF-1, tb_wd)]
                        new_paths, new_path_prob, new_ext_flags = [], [], []
                        for m in xrange(len(paths)):
                            updated = False
                            if ext_flags[m]:
                                cl_rid, p = paths[m][-1], path_prob[m]
                                for pre_cl_rid in roads[cl_rid].preRids:
                                    if (pre_cl_rid not in visitedRids) and (pre_cl_rid not in CCS):
                                        new_p = p * progIndex[cl_rid][pre_cl_rid]
                                        if (p_thd > 0 and new_p >= p_thd) or (p_thd == 0):
                                            subPath = copy.copy(paths[m])
                                            subPath.append(pre_cl_rid)
                                            new_paths.append(subPath)
                                            new_path_prob.append(new_p)
                                            new_ext_flags.append(True)
                                            updated = True
                                            if evalFlag:
                                                tDist, cpEntry = maxTF-(t+1), (cl_rid, pre_cl_rid, maxTF-1, testDay)
                                                if cpEntry in predictProgProb[tDist]:
                                                    if new_p > predictProgProb[tDist][cpEntry][0]:
                                                        # predictProgProb:
                                                        # tDist: time slot distance to current given time
                                                        # cpEntry: entry containing elements describing a propagation
                                                        # recording the largest probability of propagation
                                                        # and true propagation label
                                                        predictProgProb[tDist][cpEntry][0] = new_p
                                                else:
                                                    trueProg = checkProg(cl_rid, pre_cl_rid, maxTF-1, testCongState, rid_index)
                                                    predictProgProb[tDist][cpEntry] = [new_p, trueProg]
                            # if the path is not extended, keep it for final result and mark as unextendable
                            if not updated:
                                new_paths.append(paths[m])
                                new_path_prob.append(path_prob[m])
                                new_ext_flags.append(False)
                        paths, path_prob, ext_flags = new_paths, new_path_prob, new_ext_flags
                        visitedRids |= set([pa[-1] for pa in paths])
                        maxTF += 1
                    maxTF -= 1
                    allVisited |= visitedRids
                    allPaths.append(paths)
            roots_CCSs.append(roots)
            paths_CCSs.append(allPaths)
            visited_CCSs.append(allVisited)
        # rank the propagation pattern of each connected congested segments by impact length and output top-10
        if not evalFlag:
            for j in xrange(len(visited_CCSs)):
                impactedRids = visited_CCSs[j] - CCSs[j]
                score = sum([roads[rid].length for rid in impactedRids])
                rank_CCSs.append(score)
            topK = sorted([j for j in xrange(len(rank_CCSs))], key=lambda x: rank_CCSs[x], reverse=True)[:10]
            with open(os.path.join(outDir, 't%d_congProg' % t), 'w') as wrt:
                s = ''
                for k in topK:
                    s += '%s;' % (','.join(str(rid) for rid in roots_CCSs[k]))
                    s += '%s;' % (','.join(str(rid) for rid in (CCSs[k] - set(roots_CCSs[k]))))
                    s += ','.join(' '.join('_'.join(str(rid) for rid in pa) for pa in paths) for paths in paths_CCSs[k])
                    s += '\n'
                wrt.write(s)
        if t > 0 and t % 20 == 0:
            print 'runtime: day %d, t%d, %s' % (testDay, t, runtime(gct(), st))
    print 'runtime: day %d %s' % (testDay, runtime(gct(), st))

runtime: day 24, t20, 0:00:21
runtime: day 24, t40, 0:01:10
runtime: day 24, t60, 0:01:50
runtime: day 24, t80, 0:02:21
runtime: day 24, t100, 0:02:52
runtime: day 24, t120, 0:03:38
runtime: day 24, t140, 0:04:30
runtime: day 24, t160, 0:05:45
runtime: day 24, t180, 0:06:19
runtime: day 24, t200, 0:06:42
runtime: day 24 0:06:52
runtime: day 25, t20, 0:07:24
runtime: day 25, t40, 0:08:25
runtime: day 25, t60, 0:09:09
runtime: day 25, t80, 0:09:39
runtime: day 25, t100, 0:10:08
runtime: day 25, t120, 0:10:48
runtime: day 25, t140, 0:11:37
runtime: day 25, t160, 0:12:43
runtime: day 25, t180, 0:13:11
runtime: day 25, t200, 0:13:35
runtime: day 25 0:13:44
runtime: day 26, t20, 0:14:08
runtime: day 26, t40, 0:14:58
runtime: day 26, t60, 0:15:35
runtime: day 26, t80, 0:16:02
runtime: day 26, t100, 0:16:31
runtime: day 26, t120, 0:17:05
runtime: day 26, t140, 0:17:53
runtime: day 26, t160, 0:19:01
runtime: day 26, t180, 0:19:33
runtime: day 26, t200, 0:19:54
runtime: day 26 0:20:03
runtime: d

# output probability and true label for evaluation

In [10]:
st = gct()
pk.dump(predictProgProb, open(os.path.join(ProcDataDir, 'pred'),'wb'))
print 'Runtime: %s' % (gct()-st)

Done
