In [22]:
import csv
import datetime
import operator
import os

In [2]:
def datetime_from_text(text_date):
    try:
        day = int(text_date[0:2])
        month = int(text_date[3:5])
        year = int(text_date[6:10])
        hour = int(text_date[11:13])
        minute = int(text_date[14:16])
        date = datetime.datetime(year,month,day,hour,minute)
    except ValueError:
        date = datetime.datetime(2000,1,1)
    return date

def date_from_text(text_date):
    try:
        day = int(text_date[0:2])
        month = int(text_date[3:5])
        year = int(text_date[6:10])
        date = datetime.date(year,month,day)
    except ValueError:
        date = datetime.date(2000,1,1)
    return date

def print_time(date_time):
    hour = ""
    minute = ""
    
    if (date_time.hour < 10):
        hour = "0"
    hour += str(date_time.hour)
    
    if (date_time.minute < 10):
        minute = "0"
    minute += str(date_time.minute)
    
    return hour + ":" + minute

In [3]:
class TrainStop:
    def __init__(self, 
                 date,
                 ID,
                 operator_ID,
                 operator,
                 operator_name,
                 transport_type,
                 line_type,
                 line_text,
                 detour_ID,
                 vehicle_text,
                 is_extra,
                 is_cancelled,
                 bpuic,
                 stop_name, 
                 arrival_time,
                 arrival_predicted,
                 arrival_predicted_status,
                 departure_time,
                 departure_predicted,
                 departure_predicted_status,
                 is_coming_through):
        
        self.date = date_from_text(date)
        self.ID = ID
        self.operator_ID = operator_ID
        self.operator = operator
        self.operator_name = operator_name
        self.transport_type = transport_type
        self.line_type = line_type
        self.line_text = line_text
        self.detour_ID = detour_ID
        self.vehicle_text = vehicle_text
        self.is_extra = bool(is_extra)
        self.is_cancelled = bool(is_cancelled)
        self.bpuic = bpuic
        self.stop_name = stop_name
        self.arrival_time = datetime_from_text(arrival_time)
        self.arrival_predicted = datetime_from_text(arrival_predicted)
        self.arrival_predicted_status = arrival_predicted_status
        self.departure_time = datetime_from_text(departure_time)
        self.departure_predicted = datetime_from_text(departure_predicted)
        self.departure_predicted_status = departure_predicted_status
        self.is_coming_through = is_coming_through
    
    def __str__(self):
        s = ""
        s += self.date.isoformat() + " "
        s += print_time(self.arrival_time) + " "
        s += self.stop_name
        return s
    
    def arrival_delay(self):
        delay = self.arrival_predicted - self.arrival_time
        delay_minutes = delay.total_seconds() / 60
        if delay_minutes < -600:
            delay_minutes = 0
        return delay_minutes
    
    def departure_delay(self):
        delay = self.departure_predicted - self.departure_time
        delay_minutes = delay.total_seconds() / 60
        if delay_minutes < -600:
            delay_minutes = 0
        return delay_minutes
        

In [4]:
def create_train_stop(train):
    return TrainStop(train["BETRIEBSTAG"],
              train["FAHRT_BEZEICHNER"],
              train["BETREIBER_ID"],
              train["BETREIBER_ABK"],
              train["BETREIBER_NAME"],
              train["PRODUKT_ID"],
              train["LINIEN_ID"],
              train["LINIEN_TEXT"],
              train["UMLAUF_ID"],
              train["VERKEHRSMITTEL_TEXT"],
              train["ZUSATZFAHRT_TF"],
              train["FAELLT_AUS_TF"],
              train["BPUIC"],
              train["HALTESTELLEN_NAME"],
              train["ANKUNFTSZEIT"],
              train["AN_PROGNOSE"],
              train["AN_PROGNOSE_STATUS"],
              train["ABFAHRTSZEIT"],
              train["AB_PROGNOSE"],
              train["AB_PROGNOSE_STATUS"],
              train["DURCHFAHRT_TF"])

In [32]:
data_by_date = {}

for filename in os.listdir('./data'):
    date = datetime.date(int(filename[0:4]),int(filename[5:7]),int(filename[8:10]))
    file = open("data/" + date.isoformat() + "_IstDaten.csv")
    data_by_date[date] = csv.DictReader(file, delimiter=';')
    

In [33]:
#import data from files

train_stops = []
    
for date in data_by_date:
    for train in data_by_date[date]:
        train_stops.append(create_train_stop(train))

In [34]:
#count delay per stop

train_stops_delay = {}

for train_stop in train_stops:
    if train_stop not in train_stops_delay:
        train_stops_delay[train_stop.stop_name] = 0
    train_stops_delay[train_stop.stop_name] += train_stop.arrival_delay()
    
    

In [35]:
train_stops_delay

{'': 0.0,
 'Saalfelden': 0,
 'Schwanden GL': -1.0,
 'Stans': 3.0,
 'Besançon-Mouillère': 0,
 'Küngoldingen': -2.0,
 'Mannenbach-Salenstein': 14.0,
 'Liestal': 1.0,
 'Zürich Flughafen': 1.0,
 'Orschweier': 7.0,
 'St. Erhard-Knutwil': 0.0,
 'Otelfingen Golfpark': 3.0,
 'Cortébert': 3.0,
 'Sevelen': 0.0,
 'Dottikon-Dintikon': 1.0,
 'Uetendorf Allmend': 4.0,
 'Killwangen-Spreitenbach': 5.0,
 'Minden (Westf)': 3.0,
 'Hindelbank': 7.0,
 'Feldkirch': 0,
 'Riedbach BE': 2.0,
 'Bretonnières': 0,
 'Bäch': 0.0,
 'Wil': -1.0,
 'Chambésy': 0,
 'Raron': 4.0,
 'Locarno': 0.0,
 'Birr': 13.0,
 'Heimberg': 9.0,
 'Winterthur': 0.0,
 'Thayngen': 0.0,
 'Bassecourt': 0.0,
 'Mels': 1.0,
 'Wassen': 5.0,
 'Besnate': 0,
 'Siegershausen': 5.0,
 'Linthal Braunwaldbahn': 0.0,
 'Rüti ZH': 0.0,
 'Amriswil': 1.0,
 'Bad Krozingen': 2.0,
 'Rosé': -1.0,
 'Lyss': 0,
 'Wallisellen': 1.0,
 'Bad Hersfeld': 12.0,
 'Olten Hammer': 2.0,
 'Thurnen': 5.0,
 'Berlin-Spandau': 5.0,
 'Andelfingen': 2.0,
 'Padova': 0,
 'Eifeld': 3.0,

In [36]:
sorted_by_delay = sorted(train_stops_delay.items(), key=operator.itemgetter(1))
sorted_by_delay

[('Sedrun SMF', -12.0),
 ('Buchloe', -4.0),
 ('Eisenach', -3.0),
 ('La Heutte', -3.0),
 ('Solothurn Allmend', -3.0),
 ('Goppenstein', -3.0),
 ('Crémines-Zoo', -3.0),
 ('Arnex', -3.0),
 ('Küngoldingen', -2.0),
 ('Payerne', -2.0),
 ('Oberdorf SO', -2.0),
 ('Marthalen', -2.0),
 ('Beringerfeld', -2.0),
 ('Sennhof-Kyburg', -2.0),
 ('Lyss Grien', -2.0),
 ('Murg', -2.0),
 ('Giubiasco', -2.0),
 ('Porrentruy', -2.0),
 ('Neuchâtel', -2.0),
 ('Frutigen', -2.0),
 ('Hochdorf', -2.0),
 ('Gänsbrunnen', -2.0),
 ('Ausserberg', -2.0),
 ('Cully', -2.0),
 ('Grandval', -2.0),
 ('Uzwil', -2.0),
 ('Tenero', -2.0),
 ('Schwanden GL', -1.0),
 ('Wil', -1.0),
 ('Rosé', -1.0),
 ('Auvernier', -1.0),
 ('S. Nazzaro', -1.0),
 ('Lutry', -1.0),
 ('Zofingen', -1.0),
 ('Zell', -1.0),
 ('Thun', -1.0),
 ('Wünnewil', -1.0),
 ('Pully', -1.0),
 ('Bischofszell Nord', -1.0),
 ('Meiringen', -1.0),
 ('Les Hauts-Geneveys', -1.0),
 ('Wangen an der Aare', -1.0),
 ('Lungern', -1.0),
 ('Birmensdorf ZH', -1.0),
 ('Lausen', -1.0),
 ('Lei

In [37]:
train_stops_delay["Genève"]

-1.0