In [1]:
import pandas as pd
from pandas import *

from functools import reduce

import json

import math

import itertools

import datetime

In [2]:
footnotes = {}

with open('../incoming/ns-latest/footnote.dat', 'r') as f:
    id = None
    
    for line in f:
        if line.startswith('#'):
            id = line[1:].strip()
        else:
            footnotes[id] = line

In [3]:
kilonet = []

with open('../incoming/ns-latest/kilonetnew.dat', 'r') as f:
    for line in f:
        kilonet.append({
            'from': line.split(',')[0].strip(),
            'to': line.split(',')[1].strip(),
            'tarif-unit': int(line.split(',')[3].strip())
        })
        
kilonet[:5], len(kilonet)

([{'from': 'ac', 'tarif-unit': 2, 'to': 'ashd'},
  {'from': 'ac', 'tarif-unit': 12, 'to': 'bkl'},
  {'from': 'acdg', 'tarif-unit': 240, 'to': 'brusz'},
  {'from': 'acdg', 'tarif-unit': 22, 'to': 'marne'},
  {'from': 'acdg', 'tarif-unit': 483, 'to': 'valtgv'}],
 1404)

In [4]:
X0      = 155000
Y0      = 463000
phi0    = 52.15517440
lam0    = 5.38720621

def fromRdToWgs(coords):
    global phi0, lam0, X0, Y0

    Kp = [0,2,0,2,0,2,1,4,2,4,1]
    Kq = [1,0,2,1,3,2,0,0,3,1,1]
    Kpq = [3235.65389,-32.58297,-0.24750,-0.84978,-0.06550,-0.01709,-0.00738,0.00530,-0.00039,0.00033,-0.00012]

    Lp = [1,1,1,3,1,3,0,3,1,0,2,5]
    Lq = [0,1,2,0,3,1,1,2,4,2,0,0]
    Lpq = [5260.52916,105.94684,2.45656,-0.81885,0.05594,-0.05607,0.01199,-0.00256,0.00128,0.00022,-0.00022,0.00026]

    dX = 1E-5 * ( coords[0] - X0 )
    dY = 1E-5 * ( coords[1] - Y0 )

    phi = 0
    lam = 0
    
    for k in range(len(Kpq)):
        phi = phi + ( Kpq[k] * dX**Kp[k] * dY**Kq[k] )
    phi = phi0 + phi / 3600

    for l in range(len(Lpq)):
        lam = lam + ( Lpq[l] * dX**Lp[l] * dY**Lq[l] )
    lam = lam0 + lam / 3600

    return [phi,lam]

def fromWgsToRd(coords):
    global phi0, lam0, X0, Y0
    
    Rp = [0,1,2,0,1,3,1,0,2]
    Rq = [1,1,1,3,0,1,3,2,3]
    Rpq = [190094.945,-11832.228,-114.221,-32.391,-0.705,-2.340,-0.608,-0.008,0.148]

    Sp = [1,0,2,1,3,0,2,1,0,1]
    Sq = [0,2,0,2,0,1,2,1,4,4]
    Spq = [309056.544,3638.893,73.077,-157.984,59.788,0.433,-6.439,-0.032,0.092,-0.054]

    dPhi = 0.36 * ( coords[0] - phi0 )
    dLam = 0.36 * ( coords[1] - lam0 )

    X = 0
    Y = 0

    for r in range( len( Rpq ) ):
        X = X + ( Rpq[r] * dPhi**Rp[r] * dLam**Rq[r] ) 
    X = X0 + X

    for s in range( len( Spq ) ):
        Y = Y + ( Spq[s] * dPhi**Sp[s] * dLam**Sq[s] )
    Y = Y0 + Y

    return [X,Y]

In [22]:
stations = []

with open('../incoming/ns-latest/stations.dat', 'r') as f:
    for i, line in enumerate(f):
        if i == 0: continue

        stations.append({
            'short-name': line.split(',')[1].strip(),
            'country': line.split(',')[4].strip(),
            'x': int(line.split(',')[7].strip()),
            'y': int(line.split(',')[8].strip()),
            'full-name': line.split(',')[9].strip(),
            'lat': fromRdToWgs([int(line.split(',')[7].strip()), int(line.split(',')[8].strip())])[0],
            'lon': fromRdToWgs([int(line.split(',')[7].strip()), int(line.split(',')[8].strip())])[1]
        })
        
# stations[:1], len(stations)
st = pd.DataFrame(stations)
st = st[st.country == 'NL']
st[['full-name', 'short-name', 'lon', 'lat']].to_csv('../public/data/stations.csv', index=False)

In [8]:
rollup = 0

def stops(acc, cur):
    global rollup
    
    rollup += cur['tarif-unit']
    
    if cur['type'] in ['start', 'end', 'av-stop', 'short-stop']:
        cur['tarif-unit'] = rollup
        acc.append(cur)
        rollup = 0
    

    return acc

trips = []
skip_line = False

with open('../incoming/ns-latest/timetbls.dat', 'r') as f:
    trip = { 'legs': [], 'trains': [] }
    
    skip = False
    current = None

    for i, line in enumerate(f):
#         if line.startswith('#00000003'): skip_line = True
        
        if skip_line: continue

        try:
            if line.startswith('#'):
                trip = {
                    'legs': [],
                    'id': line.strip(),
                    'trains': []
                }
                current = line
                skip = False
            elif line.startswith('%'):
                trip['trains'].append({
                    'operator': int(line.split(',')[0].strip()[1:]), # this takes will be the latest operator in the list
                    'serie': int(100 * math.floor(int(line.split(',')[1].strip()) / 100)),
                    'id': int(line.split(',')[1].strip()),
                    'first-stop': int(line.split(',')[3].strip()),
                    'last-stop': int(line.split(',')[4].strip())
                })
            elif line.startswith('-'):
                trip['footnote'] = footnotes[line.split(',')[0].strip()[1:]].strip()
                trip['footnote-id'] = line.split(',')[0].strip()[1:]
            elif line.startswith('&'):
                trip['mode'] = line.split(',')[0].strip()[1:]
            elif line.startswith('>'):
                station = line.split(',')[0].strip()[1:]
                
                if filter(lambda x: x['short-name'] == station, stations)[0]['country'] != 'NL':
                    skip = True
                
                trip['legs'].append({
                    'station': station,
                    'depart': int(line.split(',')[1].strip()),
                    'arrival': None,
                    'type': 'start',
                    'tarif-unit': 0
                })
            elif line.startswith(';'):
                station = line.split(',')[0].strip()[1:]
                
                if filter(lambda x: x['short-name'] == station, stations)[0]['country'] != 'NL':
                    skip = True
                    
                previousStation = trip['legs'][-1:][0]['station']
                
                trip['legs'].append({
                    'station': station,
                    'depart': None,
                    'arrival': None,
                    'type': 'pass',
                    'tarif-unit': filter(lambda x: x['from'] == previousStation and x['to'] == station, kilonet)[0]['tarif-unit']
                })
            elif line.startswith('.'):
                station = line.split(',')[0].strip()[1:]
                
                if filter(lambda x: x['short-name'] == station, stations)[0]['country'] != 'NL':
                    skip = True
                    
                previousStation = trip['legs'][-1:][0]['station']
                
                trip['legs'].append({
                    'station': station,
                    'depart': int(line.split(',')[1].strip()),
                    'arrival': int(line.split(',')[1].strip()) - 1,
                    'type': 'short-stop',
                    'tarif-unit': filter(lambda x: x['from'] == previousStation and x['to'] == station, kilonet)[0]['tarif-unit']
                })
            elif line.startswith('+'):
                station = line.split(',')[0].strip()[1:]
                
                if filter(lambda x: x['short-name'] == station, stations)[0]['country'] != 'NL':
                    skip = True
                    
                previousStation = trip['legs'][-1:][0]['station']
                
                trip['legs'].append({
                    'station': station,
                    'depart': int(line.split(',')[2].strip()),
                    'arrival': int(line.split(',')[1].strip()),
                    'type': 'av-stop',
                    'tarif-unit': filter(lambda x: x['from'] == previousStation and x['to'] == station, kilonet)[0]['tarif-unit']
                })
            elif line.startswith('<'):
                station = line.split(',')[0].strip()[1:]
                
                if filter(lambda x: x['short-name'] == station, stations)[0]['country'] != 'NL':
                    skip = True
                    
                previousStation = trip['legs'][-1:][0]['station']
                
                trip['legs'].append({
                    'station': station,
                    'depart': None,
                    'arrival': int(line.split(',')[1].strip()),
                    'type': 'end',
                    'tarif-unit': filter(lambda x: x['from'] == previousStation and x['to'] == station, kilonet)[0]['tarif-unit']
                })
                
                if not skip:
                    trip['legs'] = reduce(stops, trip['legs'], [])
                    
                    trips.append(trip)
                else:
                    print 'skipping %s' % current
        except:
            print 'ERROR', line, trip['id']
            
            
print json.dumps(trips[:10], indent=1)

skipping #00000433

skipping #00000434

skipping #00000435

skipping #00000436

skipping #00000437

skipping #00000438

skipping #00000439

skipping #00000440

skipping #00000441

skipping #00000442

skipping #00000443

skipping #00000444

skipping #00000445

skipping #00000446

skipping #00000447

skipping #00000448

skipping #00000449

skipping #00000450

skipping #00000451

skipping #00000452

skipping #00000453

skipping #00000454

skipping #00000455

skipping #00000456

skipping #00000457

skipping #00000458

skipping #00000459

skipping #00000460

skipping #00000461

skipping #00000462

skipping #00000463

skipping #00000464

skipping #00000465

skipping #00000466

skipping #00000467

skipping #00000468

skipping #00000469

skipping #00000470

skipping #00000471

skipping #00000472

skipping #00000473

skipping #00000474

skipping #00000475

skipping #00000476

skipping #00000477

skipping #00000478

skipping #00000479

skipping #00000480

skipping #00000481

skipping #00000482



skipping #00015666

skipping #00015667

skipping #00015668

skipping #00015669

skipping #00015670

skipping #00015671

skipping #00015672

skipping #00015673

skipping #00015674

skipping #00015675

skipping #00015676

skipping #00015677

skipping #00015678

skipping #00015679

skipping #00015680

skipping #00015681

skipping #00015682

skipping #00015683

skipping #00015684

skipping #00015685

skipping #00015686

skipping #00015687

skipping #00015688

skipping #00015689

skipping #00015690

skipping #00015691

skipping #00015692

skipping #00015693

skipping #00015694

skipping #00015695

skipping #00015696

skipping #00015697

skipping #00015698

skipping #00015699

skipping #00015700

skipping #00015701

skipping #00015702

skipping #00015703

skipping #00015704

skipping #00015705

skipping #00015706

skipping #00015707

skipping #00015708

skipping #00015709

skipping #00015710

skipping #00015711

skipping #00015712

skipping #00015713

skipping #00015714

skipping #00015715




skipping #00017990

skipping #00017991

skipping #00017992

skipping #00017993

skipping #00017994

skipping #00017995

skipping #00017996

skipping #00017997

skipping #00017998

skipping #00017999

skipping #00018000

skipping #00018001

skipping #00018002

skipping #00018003

skipping #00018004

skipping #00018005

skipping #00018006

skipping #00018007

skipping #00018008

skipping #00018009

skipping #00018010

skipping #00018011

skipping #00018012

skipping #00018013

skipping #00018014

skipping #00018015

skipping #00018016

skipping #00018017

skipping #00018018

skipping #00018019

skipping #00018020

skipping #00018021

skipping #00018022

skipping #00018023

skipping #00018024

skipping #00018025

skipping #00018026

skipping #00018027

skipping #00018028

skipping #00018029

skipping #00018030

skipping #00018031

skipping #00018032

skipping #00018033

skipping #00018034

skipping #00018035

skipping #00018036

skipping #00018037

skipping #00018038

skipping #00018039



skipping #00022972

skipping #00022973

skipping #00022974

skipping #00022975

skipping #00022976

skipping #00022977

skipping #00022978

skipping #00022979

skipping #00022980

skipping #00022981

skipping #00022982

skipping #00022983

skipping #00022984

skipping #00022985

skipping #00022986

skipping #00022987

skipping #00022988

skipping #00022989

skipping #00022990

skipping #00022991

skipping #00022992

skipping #00022993

skipping #00022994

skipping #00022995

skipping #00022996

skipping #00022997

skipping #00022998

skipping #00022999

skipping #00023000

skipping #00023001

skipping #00023002

skipping #00023003

skipping #00023004

skipping #00023005

skipping #00023006

skipping #00023007

skipping #00023008

skipping #00023009

skipping #00023010

skipping #00023011

skipping #00023012

skipping #00023013

skipping #00023014

skipping #00023015

skipping #00023016

skipping #00023017

skipping #00023018

skipping #00023019

skipping #00023020

skipping #00023021


skipping #00035416

skipping #00035417

ERROR .kdffh  ,0633
#00035418
skipping #00035418

ERROR .kdffh  ,0725
#00035419
skipping #00035419

ERROR .kdffh  ,0725
#00035420
skipping #00035420

ERROR .kdffh  ,0733
#00035421
skipping #00035421

ERROR .kdffh  ,0825
#00035422
skipping #00035422

ERROR .kdffh  ,0825
#00035423
skipping #00035423

ERROR .kdffh  ,0833
#00035424
skipping #00035424

ERROR .kdffh  ,0925
#00035425
skipping #00035425

ERROR .kdffh  ,0933
#00035426
skipping #00035426

ERROR .kdffh  ,1025
#00035427
skipping #00035427

ERROR .kdffh  ,1033
#00035428
skipping #00035428

ERROR .kdffh  ,1125
#00035429
skipping #00035429

ERROR .kdffh  ,1133
#00035430
skipping #00035430

ERROR .kdffh  ,1225
#00035431
skipping #00035431

ERROR .kdffh  ,1233
#00035432
skipping #00035432

ERROR .kdffh  ,1325
#00035433
skipping #00035433

ERROR .kdffh  ,1333
#00035434
skipping #00035434

ERROR .kdffh  ,1425
#00035435
skipping #00035435

ERROR .kdffh  ,1433
#00035436
skipping #00035436

ERROR .kdf

skipping #00038553

ERROR .kdffh  ,1933
#00038554
skipping #00038554

ERROR .kdffh  ,2036
#00038555
skipping #00038555

ERROR .kdffh  ,2033
#00038556
skipping #00038556

ERROR .kdffh  ,2125
#00038557
skipping #00038557

ERROR .kdffh  ,2133
#00038558
skipping #00038558

ERROR .kdffh  ,2236
#00038559
skipping #00038559

ERROR .kdffh  ,2233
#00038560
skipping #00038560

skipping #00038561

skipping #00038562

skipping #00038588

skipping #00038589

skipping #00038590

skipping #00038591

skipping #00038592

skipping #00038593

skipping #00038594

skipping #00038595

skipping #00038596

skipping #00038597

skipping #00038598

skipping #00038599

skipping #00038600

skipping #00038601

skipping #00038602

skipping #00038603

skipping #00038604

skipping #00038605

skipping #00038606

skipping #00038607

skipping #00038608

skipping #00038609

skipping #00038610

skipping #00038611

skipping #00038612

skipping #00038613

skipping #00038614

skipping #00038615

skipping #00038616

skipping #

In [9]:
legs = []

for trip in trips:
    for train in trip['trains']:
        for i in range(train['first-stop'], train['last-stop']):
            legs.append({
                'trip-id': trip['id'],
                'operator': train['operator'],
                'serie': train['serie'],
                'train-id': train['id'],
                'mode': trip['mode'],
                'footnote-id': trip['footnote-id'],
                'footnote': trip['footnote'],
                'from': trip['legs'][i - 1]['station'],
                'depart': trip['legs'][i - 1]['depart'],
                'to': trip['legs'][i]['station'],
                'arrival': trip['legs'][i]['arrival'],
                'tarif-unit': trip['legs'][i]['tarif-unit']
            })

len(legs)

329256

In [10]:
filter(lambda x: x['trip-id']  == '#00000077', legs)

[{'arrival': 2645,
  'depart': 2611,
  'footnote': '000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000',
  'footnote-id': '00003',
  'from': 'ut',
  'mode': 'IC',
  'operator': 100,
  'serie': 1400,
  'tarif-unit': 37,
  'to': 'asd',
  'train-id': 1409,
  'trip-id': '#00000077'},
 {'arrival': 2701,
  'depart': 2647,
  'footnote': '000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000

In [11]:
groups = []
uniquekeys = []

keyfunc = lambda x: x['from'] + '_' + x['to'] + '_' + str(x['depart']) + '_' + str(x['train-id'])

data = sorted(legs, key=keyfunc)

for k, g in itertools.groupby(data, keyfunc):
    groups.append(list(g))
    uniquekeys.append(k)
    
len(uniquekeys)

75583

In [12]:
datetime.datetime(2019, 8, 4) - datetime.datetime(2018, 7, 2)

datetime.timedelta(398)

In [13]:
g = groups[0]

zipit = lambda x: zip(*x)
toInt = lambda x: int(x)
merge = lambda x, y: x or y
footnoteslist = lambda x: x['footnote']
leg_active = lambda z: reduce(merge, map(toInt, z), False)

uniq_legs = []

for g in groups:
    g[0]['footnote'] = map(leg_active, zipit(map(footnoteslist, g)))
    uniq_legs.append(g[0])

In [14]:
#flatten = lambda x: [a for b in x for a in b]

daily_active_legs = map(lambda x: sum(list(x)), zipit(map(lambda x: x['footnote'], uniq_legs)))

most_active_day_index = daily_active_legs.index(max(daily_active_legs))
least_active_day_index = daily_active_legs.index(min(daily_active_legs))

most_active_day_legs = filter(lambda x: x['footnote'][most_active_day_index] == 1, uniq_legs)
least_active_day_legs = filter(lambda x: x['footnote'][least_active_day_index] == 1, uniq_legs)

In [15]:
print len(most_active_day_legs), most_active_day_index, datetime.datetime(2018, 7, 2) + datetime.timedelta(days=most_active_day_index)
print len(least_active_day_legs), least_active_day_index, datetime.datetime(2018, 7, 2) + datetime.timedelta(days=least_active_day_index)

51933 165 2018-12-14 00:00:00
37802 6 2018-07-08 00:00:00


In [16]:
selected_index = most_active_day_index

In [17]:
df = pd.DataFrame(most_active_day_legs).sort_values('depart')

In [18]:
tmp = df[['from', 'to', 'tarif-unit']].drop_duplicates().reset_index(drop=True)
tmp = tmp.set_index(['from', 'to'])
tmp['ix'] = range(0, tmp.size)

tmp[['from', 'to', 'tarif-unit']].to_csv('../public/data/legs.csv', index=True)

In [19]:
df[:3]

Unnamed: 0,arrival,depart,footnote,footnote-id,from,mode,operator,serie,tarif-unit,to,train-id,trip-id
43295,445,411,"[0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, ...",2,ut,IC,100,1400,37,asd,1417,#00002514
37832,440,418,"[0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, ...",2193,rtd,NSS,100,0,14,dt,0,#00021727
17951,444,442,"[1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, ...",1118,ekz,SPR,100,3300,4,bkf,3317,#00006382


In [146]:
df.to_csv('../data/raw.csv', index=False)

In [21]:
df = pd.DataFrame(most_active_day_legs).sort_values('depart')

df['trip-id'] = df['trip-id'].apply(lambda x: int(x[1:]))
df['leg-index'] = df.apply(lambda x: tmp.loc[x['from'], x['to']]['ix'], axis=1)
df['duration'] = df['arrival'] - df['depart']
df['tarif-dur-ratio'] = df['tarif-unit'] / df['duration']

df[['arrival', 'depart', 'leg-index', 'train-id', 'trip-id']] \
    .sort_values(['depart'], ascending=[True]) \
    .to_csv('../public/data/data.csv', index=False)
    
!head ../public/data/data.csv

arrival,depart,leg-index,train-id,trip-id
445,411,0,1417,2514
440,418,1,0,21727
444,442,2,3317,6382
502,442,3,0,21734
455,444,4,4308,8329
448,445,5,3317,6382
449,446,6,7213,13265
453,446,7,1714,2989
501,448,8,1417,2514


In [158]:
df[df['leg-index'] == 1]

Unnamed: 0,arrival,depart,footnote,footnote-id,from,mode,operator,serie,tarif-unit,to,train-id,trip-id,leg-index,duration,tarif-dur-ratio
37832,440,418,"[0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, ...",2193,rtd,NSS,100,0,14,dt,0,21727,1,22,0.636364
37833,658,648,"[1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, ...",501,rtd,IC,100,1100,14,dt,1114,2338,1,10,1.4
37834,728,718,"[1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, ...",22,rtd,IC,100,1100,14,dt,1116,2340,1,10,1.4
37835,758,748,"[1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, ...",466,rtd,IC,100,1100,14,dt,1118,2346,1,10,1.4
37836,828,818,"[1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, ...",22,rtd,IC,100,1100,14,dt,1120,2355,1,10,1.4
37837,858,848,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",466,rtd,IC,100,1100,14,dt,1122,2360,1,10,1.4
37838,928,918,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",508,rtd,IC,100,1100,14,dt,1124,2365,1,10,1.4
37839,958,948,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",510,rtd,IC,100,1100,14,dt,1126,2369,1,10,1.4
37800,1028,1018,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",172,rtd,IC,100,1100,14,dt,1128,2373,1,10,1.4
37801,1058,1048,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",241,rtd,IC,100,1100,14,dt,1130,2375,1,10,1.4


# BELOW IS OLD STUFF

In [97]:
groups2 = []
uniquekeys2 = []

keyfunc2 = lambda x: '_'.join(map(lambda x: x['station'], x['legs'])) + '_' + str(x['legs'][-1:][0]['arrival'] - x['legs'][:1][0]['depart'])

data2 = sorted(active_day_legs, key=keyfunc2)

for k, g in itertools.groupby(data2, keyfunc2):
    groups2.append(list(g))
    uniquekeys2.append(k)

KeyError: 'legs'

In [96]:
to_elapsed = lambda x: int(math.floor(x / 100) * 60 + x % 100) # time int to minutes elapsed since midnight
to_time = lambda x: int(x / 60 * 100 + x % 60) # minutes elapsed since midnight to time int 

In [564]:
tmp = filter(lambda x: len(x) > 0, map(lambda x: list(set(map(lambda x: x['legs'][-1:][0]['arrival'] - x['legs'][:1][0]['depart'], x))), groups))
filter(lambda x: x[0] > 0, map(lambda x: [max(x) - min(x), x], tmp))

[]

In [596]:
map(lambda x: None if x['depart'] == None else to_elapsed(x['depart']), groups2[0][0]['legs'])

[358, 360, 364, 368, 372, 380, 385, 394, 407, 415, 420, None]

In [646]:
ix = 1
sorted(groups2[ix], key=lambda x: x['legs'][0]['depart'])[0]['legs'][0]['depart'], map(lambda x: to_elapsed(x['legs'][0]['depart']) - to_elapsed(sorted(groups2[ix], key=lambda x: x['legs'][0]['depart'])[0]['legs'][0]['depart']), sorted(groups2[ix], key=lambda x: x['legs'][0]['depart']))

(617, [0, 60, 120, 180, 240, 300, 360, 420, 480, 540, 600, 660, 720])

In [617]:
map(lambda x: x['legs'][0]['depart'], sorted(groups2[1], key=lambda x: x['legs'][0]['depart']))

[617, 717, 817, 917, 1017, 1117, 1217, 1317, 1417, 1517, 1617, 1717, 1817]

In [663]:
groups2[1][0]

{'footnote': [1,
  1,
  1,
  1,
  1,
  0,
  0,
  1,
  1,
  1,
  1,
  1,
  0,
  0,
  1,
  1,
  1,
  1,
  1,
  0,
  0,
  1,
  1,
  1,
  1,
  1,
  0,
  0,
  1,
  1,
  1,
  1,
  1,
  0,
  0,
  1,
  1,
  1,
  1,
  1,
  0,
  0,
  1,
  1,
  1,
  1,
  1,
  0,
  0,
  1,
  1,
  1,
  1,
  1,
  0,
  0,
  1,
  1,
  1,
  1,
  1,
  0,
  0,
  1,
  1,
  1,
  1,
  1,
  0,
  0,
  1,
  1,
  1,
  1,
  1,
  0,
  0,
  1,
  1,
  1,
  1,
  1,
  0,
  0,
  1,
  1,
  1,
  1,
  1,
  0,
  0,
  1,
  1,
  1,
  1,
  1,
  0,
  0,
  1,
  1,
  1,
  1,
  1,
  0,
  0,
  1,
  1,
  1,
  1,
  1,
  0,
  0,
  1,
  1,
  1,
  1,
  1,
  0,
  0,
  1,
  1,
  1,
  1,
  1,
  0,
  0,
  1,
  1,
  1,
  1,
  1,
  0,
  0,
  1,
  1,
  1,
  1,
  1,
  0,
  0,
  1,
  1,
  1,
  1,
  1,
  0,
  0,
  1,
  1,
  1,
  1,
  1,
  0,
  0,
  1,
  1,
  1,
  1,
  1,
  0,
  0,
  1,
  1,
  1,
  1,
  1,
  0,
  0,
  1,
  1,
  1,
  1,
  1,
  0,
  0,
  1,
  0,
  0,
  1,
  1,
  0,
  0,
  1,
  0,
  1,
  1,
  1,
  0,
  0,
  1,
  1,
  1,
  1,
  1,
  0,
  0,
  1,
  1

In [801]:
# station_names = map(lambda x: x['short-name'], stations)

legs = []

for g in groups2:
    for trip in g:
        legs_enum = enumerate(trip['legs'])
        for i, leg in legs_enum:
            if i > 0:
                legs.append({
                    'id': int(trip['id'][1:]),
#                     'operator': trip['operator'],
#                     'mode': trip['mode'],
                    'from': trip['legs'][i - 1]['station'],
                    'depart': to_elapsed(trip['legs'][i - 1]['depart']),
                    'to': leg['station'],
                    'arrive': to_elapsed(leg['arrival']),
                    'tarif-unit': leg['tarif-unit'],
                    'serie': trip['serie']
                })

len(legs)

44190

In [810]:
df = pd.DataFrame(legs)
# df['duration'] = df['arrive'] - df['depart']

In [779]:
to_time(710)

1150

In [818]:
all_legs = df[['from', 'to', 'tarif-unit']].drop_duplicates().reset_index(drop=True)
all_legs['id'] = all_legs.apply(lambda x: x['from'] + '_' + x['to'], axis=1)
all_legs

Unnamed: 0,from,to,tarif-unit,id
0,ah,ahp,1,ah_ahp
1,ahp,wtv,4,ahp_wtv
2,wtv,dvn,4,wtv_dvn
3,dvn,zv,5,dvn_zv
4,zv,did,5,zv_did
5,did,wl,6,did_wl
6,wl,dtch,3,wl_dtch
7,dtch,dtc,2,dtch_dtc
8,dtc,gdr,5,dtc_gdr
9,gdr,tbg,1,gdr_tbg


In [816]:
df.apply(lambda x: all_legs[x['from'] + '_' + x['to']].index, axis=1)

KeyError: ('ah_ahp', u'occurred at index 0')

In [804]:
def convert(x):
    try:
        return x.astype(int)
    except:
        return x

# df.arr = df.arr.fillna(-1)
# df.arr = pd.to_numeric(df.arr, errors='ignore', downcast='integer')    

# df.dep = df.dep.fillna(-1)
# df.dep = pd.to_numeric(df.dep, errors='ignore', downcast='integer')

df.to_csv('../data/data.csv', index=False)

!head ../data/data.csv

arrive,depart,from,id,serie,tarif-unit,to
605,604,ah,18630,30900,1,ahp
609,606,ahp,18630,30900,4,wtv
613,610,wtv,18630,30900,4,dvn
617,614,dvn,18630,30900,5,zv
622,618,zv,18630,30900,5,did
628,623,did,18630,30900,6,wl
631,629,wl,18630,30900,3,dtch
636,632,dtch,18630,30900,2,dtc
665,664,ah,18643,30900,1,ahp
