# Filter gtfs-feed
A notebook with which timetables from a gtfs-feed can be filtered

In [None]:
import os
import csv
import codecs
import zipfile
import tempfile
import shutil

In [None]:
# the input gtfs-feed
input_gtfs = r"D:\data\90_divers\gtfsfp20192018-12-05.zip"

# a temporary gtfs-feed coded in utf-8 and not in utf-8-sig
# the gtfs-feeds from https://opentransportdata.swiss/de/ are coded utf-8 with bom
output_gtfs_without_bom = r"D:\data\90_divers\gtfsfp20192018-12-05_without_bom.zip"

# the gtfs-feed after filtering
output_gtfs_small = r"D:\data\90_divers\gtfsfp20192018-12-05_small.zip"

## Change encoding
Write input-gtfs-feed from utf-8-sig to utf-8.
This step is only necessary if the input gtfs-feed is not in utf-8.

In [None]:
path_tmp_dict = os.path.join(os.path.dirname(output_gtfs_without_bom), "tmp")
if os.path.exists(path_tmp_dict) and os.path.isdir(path_tmp_dict):
    shutil.rmtree(path_tmp_dict)
temp_dict = os.mkdir(path_tmp_dict)
with zipfile.ZipFile(input_gtfs, "r") as f:
    for filename in f.namelist():
        data = f.read(filename).decode("utf-8-sig").encode("utf-8")
        tmp_filename = os.path.join(path_tmp_dict, filename)
        with open(tmp_filename, "w") as tmp:
            tmp.write(data)
shutil.make_archive(os.path.splitext(output_gtfs_without_bom)[0], "zip", path_tmp_dict)
shutil.rmtree(path_tmp_dict)

## Filter gtfs-feed
Given a list of routes we filter all information in the input-gtfs-feed which is connected to one of these routes

In [None]:
# define the routes
route_ids = ["1-85-j19-1", "61-265-Y-j19-1", "26-759-j19-1", "90-73-Y-j19-1", "6-1-j19-1"]

In [None]:
def filter_gtfs_feed(input_gtfs_feed, output_gtfs_feed, routes_to_filter):
    # unzip gtfs-feed
    dir_unzipped = os.path.splitext(input_gtfs_feed)[0]
    with zipfile.ZipFile(input_gtfs_feed) as f:
        f.extractall(dir_unzipped)

    # create temporary directory
    path_tmp_dict = os.path.join(os.path.dirname(output_gtfs_without_bom), "tmp")
    if os.path.exists(path_tmp_dict) and os.path.isdir(path_tmp_dict):
        shutil.rmtree(path_tmp_dict)
    os.mkdir(path_tmp_dict)
    
    def filter_by_id(key, values, folder, feed_file, new_keys, out_folder):
        with open(os.path.join(folder, feed_file)) as f:
            reader = csv.DictReader(f)
            res = []
            new_values = []
            for l in reader:
                if l[key] in values:
                    res += [l]
                    new_values += [[l[new_key] for new_key in new_keys]]
            with open(os.path.join(out_folder, feed_file), "wb") as g:
                g.write(",".join(reader.fieldnames) + "\r\n")
                writer = csv.DictWriter(g, fieldnames=reader.fieldnames, quotechar='"', quoting=csv.QUOTE_ALL)
                for l in res:
                    writer.writerow(l)
        return [set(x) for x in zip(*new_values)]
    
    trip_ids, service_ids = filter_by_id("route_id", route_ids, dir_unzipped, "trips.txt", ["trip_id", "service_id"], path_tmp_dict)
    stop_ids = filter_by_id("trip_id", trip_ids, dir_unzipped, "stop_times.txt", ["stop_id"], path_tmp_dict)[0]
    filter_by_id("stop_id", stop_ids, dir_unzipped, "stops.txt", [], path_tmp_dict)
    filter_by_id("service_id", service_ids, dir_unzipped, "calendar.txt", [], path_tmp_dict)
    filter_by_id("service_id", service_ids, dir_unzipped, "calendar_dates.txt", [], path_tmp_dict)
    agency_ids = filter_by_id("route_id", route_ids, dir_unzipped, "routes.txt", ["agency_id"], path_tmp_dict)[0]
    filter_by_id("agency_id", agency_ids, dir_unzipped, "agency.txt", [], path_tmp_dict)
    
    def filter_transfers(stop_ids, folder, out_folder):
        with open(os.path.join(folder, "transfers.txt")) as f:
            reader = csv.DictReader(f)
            res = []
            for l in reader:
                if l["from_stop_id"] in stop_ids and l["to_stop_id"] in stop_ids:
                    res += [l]
            with open(os.path.join(out_folder, "transfers.txt"), "wb") as g:
                g.write(",".join(reader.fieldnames) + "\r\n")
                writer = csv.DictWriter(g, fieldnames=reader.fieldnames, quotechar='"', quoting=csv.QUOTE_ALL)
                for l in res:
                    writer.writerow(l)
    
    filter_transfers(stop_ids, dir_unzipped, path_tmp_dict)
    
    shutil.copyfile(os.path.join(dir_unzipped, "feed_info.txt"), os.path.join(path_tmp_dict, "feed_info.txt"))
    
    # zip it
    shutil.make_archive(os.path.splitext(output_gtfs_small)[0], "zip", path_tmp_dict)
    
    # remove tmp-data
    if os.path.exists(path_tmp_dict) and os.path.isdir(path_tmp_dict):
        shutil.rmtree(path_tmp_dict)
    if os.path.exists(dir_unzipped) and os.path.isdir(dir_unzipped):
        shutil.rmtree(dir_unzipped)
    
    

In [None]:
filter_gtfs_feed(output_gtfs_without_bom, output_gtfs_small, route_ids)