In [1]:
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import contextily as ctx
import pyproj
from shapely.geometry import Point, LineString
from zipfile import ZipFile, Path
import datetime

In [2]:
with ZipFile("C:\\Users\\zare\\GEO\\geopanda\\gtfs.zip") as myzip:
    stops_df = pd.read_csv(myzip.open("stops.txt"), dtype={ 
    'stop_id': 'str', 
    'stop_code': 'str',
    'stop_name': 'str',
    'stop_desc' : 'str',                                              
    'stop_lat': 'float',
    'stop_lon': 'float',
    'location_type': 'Int64',
    'parent_station': 'str',
    'wheelchair_boarding': 'str', 
    'platform_code': 'str',
    'zone_id': 'str',
    'level_id' : 'str'
    })
    
    
    stop_times_df = pd.read_csv(myzip.open("stop_times.txt"), dtype={
        'trip_id': 'str',
        'arrival_time': 'str',
        'stop_id': 'str', 
        'departure_time': 'str', 
        'stop_id': 'str',
        'stop_sequence': 'Int64',
        'stop_headsign': 'str',
        'pickup_type': 'Int64',
        'drop_off_type': 'Int64',
    })
    
    
    
    routes_df = pd.read_csv(myzip.open("routes.txt"), dtype={
        'route_id': 'str',  
        'agency_id': 'str',  
        'route_short_name': 'str',  
        'route_long_name': 'str', 
        'route_desc': 'str', 
        'route_type': 'Int64',
        'route_color': 'str',  
        'route_text_color': 'str', 
        'rout_desc': 'str'
    })
    
    trips_df = pd.read_csv(myzip.open("trips.txt"), dtype={
        'route_id': 'str', 
        'service_id': 'str',  
        'trip_id': 'str',
        'shape_id': 'str', 
        'trip_headsign': 'str',
        'trip_short_name': 'str',
        'direction_id': 'Int64',  
        'block_id': 'str',
        'shape_id': 'str',
        'wheelchair_accessible': 'str',  
        'bikes_allowed': 'str'
    })
    
    shapes_df = pd.read_csv(myzip.open("shapes.txt"), dtype={
        'shape_id': 'str', 
        'shape_pt_lat': 'float', 
        'shape_pt_lon': 'float',  
        'shape_pt_sequence': 'Int64'
    })
    
    calendar_df = pd.read_csv(myzip.open("calendar.txt"), dtype={
        'service_id': 'str',  
        'monday': 'bool',  
        'tuesday': 'bool',  
        'wednesday': 'bool',  
        'thursday': 'bool',  
        'friday': 'bool', 
        'saturday': 'bool',  
        'sunday': 'bool',  
        'start_date': 'str', 
        'end_date': 'str',
    })
    
    calendar_dates_df = pd.read_csv(myzip.open("calendar_dates.txt"), dtype={
        'service_id': 'str',  
        'date': 'str',
        'exception_type': 'Int64',
    })
    
    agency_df = pd.read_csv(myzip.open("agency.txt"), dtype={
        'agency_id': 'str', 
        'agency_name': 'str', 
        'agency_url': 'str',  
        'agency_timezone': 'str',
        'agency_lang': 'str', 
        'agency_phone': 'str',
    })

In [3]:
show_date_str = "2023-11-27"

date = datetime.datetime.strptime(show_date_str, "%Y-%m-%d")
date_string = date.strftime("%Y%m%d")
day_of_week_name = date.strftime('%A').lower()

services_for_day_1 = calendar_df[(calendar_df[day_of_week_name]) & (date_string >= calendar_df.start_date) & (date_string <= calendar_df.end_date)].service_id.to_numpy()

services_added_for_day = calendar_dates_df[(calendar_dates_df.date == date_string) & (calendar_dates_df.exception_type == 1)].service_id.to_numpy()
services_removed_for_day = calendar_dates_df[(calendar_dates_df.date == date_string) & (calendar_dates_df.exception_type == 2)].service_id.to_numpy()
services_for_day_2 = np.concatenate([services_for_day_1, services_added_for_day])
services_for_day = np.setdiff1d(services_for_day_2, services_removed_for_day)

trips_for_day = trips_df[trips_df.service_id.isin(services_for_day)]
berlin_bus_route_ids = routes_df[(routes_df['route_type'] == 700) | (routes_df['route_type'] == 3) ].route_id.unique()
day_trip_buses = trips_for_day[trips_for_day.route_id.isin(berlin_bus_route_ids)]

In [4]:
#creating stop_gdf
stops_gdf = gpd.GeoDataFrame(stops_df, geometry = gpd.points_from_xy(stops_df.stop_lon, stops_df.stop_lat)).set_crs(epsg=4326)

In [5]:
#creating shape_gdf called shapes
shapes = shapes_df[["shape_id", "shape_pt_lat", "shape_pt_lon"]].groupby("shape_id").agg(list).apply(lambda x: LineString(zip(x.iloc[1], x.iloc[0])), axis=1)

In [6]:
shapes = gpd.GeoDataFrame( data=shapes.index, geometry = shapes.values, crs=4326)

In [7]:
shapes['shape_id'] = shapes.shape_id.astype(str)

In [8]:
shapes = shapes.rename(columns={'geometry': 'geometry_shapes'})
stops_gdf = stops_gdf.rename(columns={'geometry': 'geometry_stops'})

In [9]:
#merging data to get all info for shape_stop
stop_data_shape = pd.merge(day_trip_buses, stop_times_df[['trip_id','stop_id','stop_sequence']], on='trip_id')
stop_data_shape1 = pd.merge(stop_data_shape, stops_gdf[['stop_id','stop_name','geometry_stops']], on='stop_id')
stop_data_shape2 = pd.merge(stop_data_shape1, routes_df[['route_id','route_short_name']], on='route_id')

req_columns = ["shape_id", "stop_sequence", "stop_id", "geometry_stops"]
add_columns = ["route_id", "route_short_name","direction_id", "stop_name"]

df_shape_stop = stop_data_shape2[req_columns + add_columns].drop_duplicates()


In [10]:
#getting finall shapes of stops
df_shape_stop = pd.merge(df_shape_stop, shapes[['shape_id','geometry_shapes']], on='shape_id')


In [11]:
df_shape_stop

Unnamed: 0,shape_id,stop_sequence,stop_id,geometry_stops,route_id,route_short_name,direction_id,stop_name,geometry_shapes
0,137,0,de:12051:900275125::4,POINT (12.53589 52.41884),21947_700,2,0,"Brandenburg, Fontanestr.","LINESTRING (12.53589 52.41884, 12.53568 52.418..."
1,137,1,de:12051:900275226::1,POINT (12.51434 52.41764),21947_700,2,0,"Brandenburg, August-Sonntag-Str.","LINESTRING (12.53589 52.41884, 12.53568 52.418..."
2,137,2,de:12051:900275224::1,POINT (12.52295 52.41319),21947_700,2,0,"Brandenburg, Dreifertstr.","LINESTRING (12.53589 52.41884, 12.53568 52.418..."
3,137,3,de:12051:900275225::1,POINT (12.51621 52.41355),21947_700,2,0,"Brandenburg, Südtor","LINESTRING (12.53589 52.41884, 12.53568 52.418..."
4,137,4,de:12051:900275869::3,POINT (12.51130 52.41389),21947_700,2,0,"Brandenburg, Frankenstr.","LINESTRING (12.53589 52.41884, 12.53568 52.418..."
...,...,...,...,...,...,...,...,...,...
165805,13822,1,de:12062:900415005:1:50,POINT (13.71030 51.63692),19715_700,RB43,0,"Finsterwalde, Bahnhof","LINESTRING (13.56419 51.62061, 13.56429 51.620..."
165806,13824,1,de:12062:900415112:1:50,POINT (13.56416 51.62053),19715_700,RB43,1,"Doberlug-Kirchhain, Bahnhof","LINESTRING (13.71045 51.63700, 13.70806 51.636..."
165807,13824,0,de:12062:900415005:1:50,POINT (13.71030 51.63692),19715_700,RB43,1,"Finsterwalde, Bahnhof","LINESTRING (13.71045 51.63700, 13.70806 51.636..."
165808,13802,0,de:12070:900215696:1:50,POINT (11.85094 53.07086),19706_700,RE6,1,"Perleberg, Bahnhof","LINESTRING (11.85178 53.07105, 11.85094 53.070..."


In [12]:
#getting_distance
df_shape_stop["cut_distance_stop_point"] = df_shape_stop[["geometry_stops", "geometry_shapes"]].apply(lambda x: x.iloc[1].project(x.iloc[0], normalized=True), axis=1)

  return lib.line_locate_point_normalized(line, other)


In [13]:
df_shape_stop["projected_stop_point"] = df_shape_stop[["geometry_shapes", "cut_distance_stop_point"]].apply(lambda x: x.iloc[0].interpolate(x.iloc[1], normalized=True), axis=1)


In [14]:
#calculate distances
from shapely.geometry import LineString, MultiPoint

df_shape = shapes[shapes.shape_id.isin(stop_data_shape2.shape_id.unique())]
df_shape["list_of_points"] = df_shape.geometry_shapes.apply(lambda x: list(MultiPoint(x.coords).geoms))
df_shape_exp = df_shape.explode("list_of_points")
df_shape_exp["projected_line_points"] = df_shape_exp[["geometry_shapes", "list_of_points"]].apply(lambda x: x.iloc[0].project(x.iloc[1], normalized=True), axis=1)


  result = super().apply(func, convert_dtype=convert_dtype, args=args, **kwargs)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
  return lib.line_locate_point_normalized(line, other)


In [15]:
#renaming dataframes and concatenating
df_shape_stop.rename({ "projected_stop_point": "geometry", "cut_distance_stop_point": "normalized_distance_along_shape"},axis=1,inplace=True)
df_shape_stop["cut_flag"] = True

df_shape_exp = df_shape_exp[["shape_id", "list_of_points", "projected_line_points"]]
df_shape_exp.rename({ "list_of_points": "geometry", "projected_line_points": "normalized_distance_along_shape"}, axis=1, inplace=True)
df_shape_exp["cut_flag"] = False

In [16]:
# combine stops and shape points

gdf = pd.concat([df_shape_stop, df_shape_exp], ignore_index=False)
gdf.sort_values(["shape_id", "normalized_distance_along_shape"], inplace=True)
gdf.reset_index(inplace=True, drop=True)




In [17]:
 # drop all non stops

cuts = gdf.where(gdf.cut_flag).dropna(subset="cut_flag")
cuts = cuts.astype({"shape_id": str, "stop_sequence": int, "direction_id": int})
cuts[["end_stop_id", "end_stop_name"]] = cuts.groupby("shape_id")[['stop_id', "stop_name"]].shift(-1)

In [18]:
#create segments for buses

segment_geometries = []
for shape_id in cuts.shape_id.drop_duplicates():
    cut_idx = cuts[cuts.shape_id == shape_id].index
    for i, cut in enumerate(cut_idx[:-1]):
        segment_geometries.append(LineString(gdf.iloc[cut_idx[i]:cut_idx[i+1]+1].geometry))

In [19]:
#creating bus_segments_gdf

segment_df = cuts.dropna(subset="end_stop_id", axis=0)
segment_gdf = gpd.GeoDataFrame(segment_df, geometry=segment_geometries)
segment_gdf.drop(["geometry_shapes", "cut_flag", "normalized_distance_along_shape", "geometry_stops"], axis=1, inplace=True)
segment_gdf.crs = "EPSG:4326"

segment_gdf['segment_id'] = segment_gdf.stop_id.astype(str) + ' - ' + segment_gdf.end_stop_id.astype(str)
segment_gdf['segment_name'] = segment_gdf.stop_name + ' - ' + segment_gdf.end_stop_name



In [20]:
segment_gdf

Unnamed: 0,shape_id,stop_sequence,stop_id,route_id,route_short_name,direction_id,stop_name,geometry,end_stop_id,end_stop_name,segment_id,segment_name
1,1,0,de:12051:900275308::1,327_700,B/522,0,"Brandenburg, Betriebshof Ausfahrt","LINESTRING (12.53409 52.43042, 12.53411 52.430...",de:12051:900275301::3,"Brandenburg, Betriebshof Hohenstücken",de:12051:900275308::1 - de:12051:900275301::3,"Brandenburg, Betriebshof Ausfahrt - Brandenbur..."
10,1,1,de:12051:900275301::3,327_700,B/522,0,"Brandenburg, Betriebshof Hohenstücken","LINESTRING (12.53071 52.42696, 12.53071 52.426...",de:12051:900275126::3,"Brandenburg, August-Bebel-Str.",de:12051:900275301::3 - de:12051:900275126::3,"Brandenburg, Betriebshof Hohenstücken - Brande..."
26,1,2,de:12051:900275126::3,327_700,B/522,0,"Brandenburg, August-Bebel-Str.","LINESTRING (12.53180 52.42265, 12.53180 52.422...",de:12051:900275123::5,"Brandenburg, Altstadt Bhf",de:12051:900275126::3 - de:12051:900275123::5,"Brandenburg, August-Bebel-Str. - Brandenburg, ..."
68,1,3,de:12051:900275123::5,327_700,B/522,0,"Brandenburg, Altstadt Bhf","LINESTRING (12.53053 52.41221, 12.53053 52.412...",de:12051:900275143::1,"Brandenburg, Wilhelmsdorfer Str.",de:12051:900275123::5 - de:12051:900275143::1,"Brandenburg, Altstadt Bhf - Brandenburg, Wilhe..."
114,1,4,de:12051:900275143::1,327_700,B/522,0,"Brandenburg, Wilhelmsdorfer Str.","LINESTRING (12.54856 52.39938, 12.54856 52.399...",de:12051:900275601::1,"Brandenburg, Göttiner Str.",de:12051:900275143::1 - de:12051:900275601::1,"Brandenburg, Wilhelmsdorfer Str. - Brandenburg..."
...,...,...,...,...,...,...,...,...,...,...,...,...
3306809,9999,25,de:12064:900320876::1,21750_700,889,1,"Prötzel, Schule","LINESTRING (13.97917 52.63359, 13.97788 52.632...",de:12064:900320874::1,"Prötzel, Forsthaus",de:12064:900320876::1 - de:12064:900320874::1,"Prötzel, Schule - Prötzel, Forsthaus"
3306818,9999,26,de:12064:900320874::1,21750_700,889,1,"Prötzel, Forsthaus","LINESTRING (13.96895 52.62538, 13.96888 52.625...",de:12064:900320877::1,"Prötzel, Sägewerk",de:12064:900320874::1 - de:12064:900320877::1,"Prötzel, Forsthaus - Prötzel, Sägewerk"
3306848,9999,27,de:12064:900320877::1,21750_700,889,1,"Prötzel, Sägewerk","LINESTRING (13.94445 52.60516, 13.94157 52.604...",de:12064:900320564::1,"Strausberg, Gesundheitszentrum",de:12064:900320877::1 - de:12064:900320564::1,"Prötzel, Sägewerk - Strausberg, Gesundheitszen..."
3306862,9999,28,de:12064:900320564::1,21750_700,889,1,"Strausberg, Gesundheitszentrum","LINESTRING (13.92058 52.59449, 13.92053 52.594...",de:12064:900320559::1,"Strausberg, Bundeswehr",de:12064:900320564::1 - de:12064:900320559::1,"Strausberg, Gesundheitszentrum - Strausberg, B..."


In [21]:
segment_gdf.columns

Index(['shape_id', 'stop_sequence', 'stop_id', 'route_id', 'route_short_name',
       'direction_id', 'stop_name', 'geometry', 'end_stop_id', 'end_stop_name',
       'segment_id', 'segment_name'],
      dtype='object')

In [22]:

segment_gdf.rename( columns=dict(stop_name='start_stop_name', stop_id='start_stop_id'),inplace=True)

In [26]:
index_ = ['route_id', 'route_short_name', 'stop_id']

col= 'window'

index_list = index_ + ['direction_id', col]

In [27]:
time_windows = [0,24]
cutoffs = time_windows

stop_times_df['arrival_time'] = pd.to_timedelta(stop_times_df['arrival_time'])
stop_times_df['departure_time'] = pd.to_timedelta(stop_times_df['departure_time'])

stop_times_df['arrival_time_in_seconds'] = stop_times_df['arrival_time'].dt.total_seconds()
stop_times_df['departure_time_in_seconds'] = stop_times_df['departure_time'].dt.total_seconds()

def fix_departure_time(times_to_fix):
    
    next_day = times_to_fix >= 24*3600
    times_to_fix[next_day] = times_to_fix[next_day] - 24 * 3600
    
    return times_to_fix

if max(cutoffs) <= 24:
    stop_times_df['departure_time'] = fix_departure_time(stop_times_df.departure_time_in_seconds.values)
    stop_times_df['arrival_time'] = fix_departure_time(stop_times_df.arrival_time_in_seconds.values)
    
def label_creation(cutoffs):
    
    labels = []
    if max(cutoffs) <= 24:
        for w in cutoffs:
            if float(w).is_integer():
                label = str(w) + ':00'
            else:
                n = math.modf(w)
                label = str(int(n[1])) + ':' + str(int(n[0]*60))
            labels.append(label)
    else:
        labels = []
        for w in cutoffs:
            if float(w).is_integer():
                if w > 24:
                    w1 = w-24
                    label = str(w1) + ':00'
                else:
                    label = str(w) + ':00'
                labels.append(label)
            else:
                if w > 24:
                    w1 = w-24
                    n = math.modf(w1)
                    label = str(int(n[1])) + ':' + str(int(n[0]*60))
                else:
                    n = math.modf(w)
                    label = str(int(n[1])) + ':' + str(int(n[0]*60))
                labels.append(label)

    labels = [labels[i] + '-' + labels[i+1] for i in range(0, len(labels)-1)]

    return labels

labels = label_creation(cutoffs)

departure_time = stop_times_df.departure_time / 3600
stop_times_df['window'] = pd.cut(departure_time, bins=cutoffs, right=False, labels=labels)

stop_times_df = stop_times_df.loc[~stop_times_df.window.isnull()]
stop_times_df['window'] = stop_times_df.window.astype(str)

In [28]:
day_trip_buses1 = pd.merge(day_trip_buses, stop_times_df[['trip_id','stop_id','arrival_time','departure_time','window']], on='trip_id')

day_trip_buses2 = pd.merge(day_trip_buses1, routes_df[['route_id','route_short_name']], on='route_id')

trips_agg = day_trip_buses2.pivot_table('trip_id', index=index_list,aggfunc='count').reset_index()

trips_agg.rename(columns={'trip_id': 'ntrips'}, inplace=True)

start_time = trips_agg.window.apply(lambda x: cutoffs[labels.index(x)])

end_time = trips_agg.window.apply(lambda x: cutoffs[labels.index(x) + 1])

trips_agg['frequency'] = (trips_agg.ntrips / (end_time - start_time))\
.astype(float)


In [29]:
line_frequencies = trips_agg

In [30]:
keep_these = [
            'route_id', 'route_short_name',  'segment_name', 
            'start_stop_name', 'end_stop_name',
            'segment_id', 'start_stop_id', 'end_stop_id',
            'direction_id', 'geometry']

In [31]:
line_frequencies = pd.merge(
            line_frequencies,
            segment_gdf[keep_these],
            left_on=['route_id', 'route_short_name', 'stop_id', 'direction_id'],
            right_on=['route_id', 'route_short_name', 'start_stop_id', 'direction_id'],
            how='left')

In [32]:
line_frequencies.drop('stop_id', axis=1, inplace=True)

In [33]:
 # Remove duplicates after merging
line_frequencies.drop_duplicates(inplace=True)

In [34]:
line_frequencies

Unnamed: 0,route_id,route_short_name,direction_id,window,ntrips,frequency,segment_name,start_stop_name,end_stop_name,segment_id,start_stop_id,end_stop_id,geometry
0,10296_3,657,1,0:00-24:00,4,0.166667,"Brieselang, Bahnhof/A.-Bebel-Str. - Brieselang...","Brieselang, Bahnhof/A.-Bebel-Str.","Brieselang, Bahnhof/A.-Bebel-Str.",de:12063:900210012::1 - de:12063:900210012::1,de:12063:900210012::1,de:12063:900210012::1,"LINESTRING (13.00186 52.58202, 13.00186 52.58202)"
1,10296_3,657,0,0:00-24:00,1,0.041667,,,,,,,
2,10296_3,657,1,0:00-24:00,4,0.166667,"Brieselang, Hölderlinstr. - Brieselang, Hölder...","Brieselang, Hölderlinstr.","Brieselang, Hölderlinstr.",de:12063:900210055::2 - de:12063:900210055::2,de:12063:900210055::2,de:12063:900210055::2,"LINESTRING (12.98366 52.57591, 12.98366 52.57591)"
3,10296_3,657,0,0:00-24:00,5,0.208333,"Brieselang, Hans-Klakow-Str. - Brieselang, Sch...","Brieselang, Hans-Klakow-Str.","Brieselang, Schillerstr./Forstweg",de:12063:900210061::1 - de:12063:900210062::1,de:12063:900210061::1,de:12063:900210062::1,"LINESTRING (12.99819 52.58134, 12.99811 52.581..."
4,10296_3,657,1,0:00-24:00,4,0.166667,"Brieselang, Hans-Klakow-Str. - Brieselang, Han...","Brieselang, Hans-Klakow-Str.","Brieselang, Hans-Klakow-Str.",de:12063:900210061::1 - de:12063:900210061::1,de:12063:900210061::1,de:12063:900210061::1,"LINESTRING (12.99819 52.58134, 12.99819 52.58134)"
...,...,...,...,...,...,...,...,...,...,...,...,...,...
159379,9813_700,599,0,0:00-24:00,10,0.416667,"Senftenberg, Klettwitzer Str. - Senftenberg, S...","Senftenberg, Klettwitzer Str.","Senftenberg, Straße des Bergmanns",de:12066:900435406::1 - de:12066:900435295::1,de:12066:900435406::1,de:12066:900435295::1,"LINESTRING (13.97406 51.52465, 13.97415 51.524..."
159382,9813_700,599,1,0:00-24:00,10,0.416667,"Senftenberg, Klettwitzer Str. - Schipkau, Heiz...","Senftenberg, Klettwitzer Str.","Schipkau, Heizhaus",de:12066:900435406::2 - de:12066:900435033::1,de:12066:900435406::2,de:12066:900435033::1,"LINESTRING (13.97361 51.52470, 13.89945 51.523..."
159383,9813_700,599,1,0:00-24:00,10,0.416667,"Senftenberg, Klettwitzer Str. - Hörlitz, Kinde...","Senftenberg, Klettwitzer Str.","Hörlitz, Kindergarten",de:12066:900435406::2 - de:12066:900435392::2,de:12066:900435406::2,de:12066:900435392::2,"LINESTRING (13.97360 51.52481, 13.97351 51.524..."
159385,9813_700,599,1,0:00-24:00,9,0.375000,"Hörlitz, Klubhaus - Hörlitz, Schipkauer Str.","Hörlitz, Klubhaus","Hörlitz, Schipkauer Str.",de:12066:900435480::1 - de:12066:900435142::1,de:12066:900435480::1,de:12066:900435142::1,"LINESTRING (13.95216 51.52851, 13.95130 51.528..."


In [35]:
def add_all_lines(
            line_frequencies,
            segments_gdf,
            labels,
            cutoffs):
    
    logging.info('adding data for all lines.')
    
    # Calculate sum of trips per segment with all lines
    all_lines = line_frequencies.pivot_table(
        ['ntrips'],
        index=['segment_id', 'window'],
        aggfunc='sum').reset_index()

    sort_these = ['direction_id', 'window', 'stop_sequence']

    data_all_lines = pd.merge(
        all_lines,
        segments_gdf.drop_duplicates(subset=['segment_id']),
        left_on=['segment_id'], right_on=['segment_id'],
        how='left').reset_index().sort_values(by=sort_these, ascending=True)

    data_all_lines.drop(['index'], axis=1, inplace=True)
    data_all_lines['route_id'] = 'ALL_LINES'
    data_all_lines['route_name'] = 'All lines'
    data_all_lines['direction_id'] = 'NA'

    # Add frequency for all lines
    start_time = data_all_lines.window.apply(lambda x: cutoffs[labels.index(x)])
    end_time = data_all_lines.window.apply(lambda x: cutoffs[labels.index(x) + 1])

    data_all_lines['min_per_trip'] = ((end_time - start_time)*60 / data_all_lines.ntrips)\
        .astype(int)

    # Append data for all lines to the input df
    data_complete = pd.concat([line_frequencies, data_all_lines]).reset_index(drop=True)

    return data_complete

NameError: name 'add_all_lines' is not defined

In [36]:
all_lines = line_frequencies.pivot_table(['ntrips'], index=['segment_id', 'window'], aggfunc='sum').reset_index()

In [37]:
sort_these = ['direction_id', 'window', 'stop_sequence']

In [44]:
data_all_lines = pd.merge( all_lines, segment_gdf.drop_duplicates(subset=['segment_id']),
        left_on=['segment_id'], right_on=['segment_id'],
        how='left').reset_index().sort_values(by=sort_these, ascending=True)

In [39]:
segment_gdf.columns

Index(['shape_id', 'stop_sequence', 'start_stop_id', 'route_id',
       'route_short_name', 'direction_id', 'start_stop_name', 'geometry',
       'end_stop_id', 'end_stop_name', 'segment_id', 'segment_name'],
      dtype='object')

In [45]:
data_all_lines

Unnamed: 0,index,segment_id,window,ntrips,shape_id,stop_sequence,start_stop_id,route_id,route_short_name,direction_id,start_stop_name,geometry,end_stop_id,end_stop_name,segment_name
4,4,000005170002 - 000005100145,0:00-24:00,19,13818,0,000005170002,19712_700,RB66,0,"Szczecin, Krzysztofa Kolumba","LINESTRING (14.54847 53.41732, 14.54847 53.417...",000005100145,"Szczecin, Gumience","Szczecin, Krzysztofa Kolumba - Szczecin, Gumience"
5,5,000005170002 - de:12073:900340004:1:50,0:00-24:00,19,13819,0,000005170002,19712_700,RB66,0,"Szczecin, Krzysztofa Kolumba","LINESTRING (14.54847 53.41732, 14.54847 53.417...",de:12073:900340004:1:50,"Angermünde, Bahnhof","Szczecin, Krzysztofa Kolumba - Angermünde, Bah..."
17,17,000301049002 - de:12052:900470225::1,0:00-24:00,6,13469,0,000301049002,21619_700,2N,0,"Cottbus, Jessener Str.","LINESTRING (14.30675 51.74755, 14.30724 51.747...",de:12052:900470225::1,"Cottbus, Platz der Freundschaft","Cottbus, Jessener Str. - Cottbus, Platz der Fr..."
144,144,710000999901 - 710009550190,0:00-24:00,7,2638,0,710000999901,25352_700,RB15,0,Inselstadt Malchow,"LINESTRING (12.42417 53.48211, 12.42417 53.482...",710009550190,"Nossentin, Bahnhof","Inselstadt Malchow - Nossentin, Bahnhof"
145,145,710008011799 - 710009990074,0:00-24:00,38,2610,0,710008011799,17047_700,EV65,0,"Hagenwerder, Bahnhof","LINESTRING (14.96013 51.07022, 14.96013 51.070...",710009990074,"Ostritz, Görlitzer Str.","Hagenwerder, Bahnhof - Ostritz, Görlitzer Str."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4545,4545,de:11000:900085207::2 - de:11000:900085259::1,0:00-24:00,62,10302,56,de:11000:900085207::2,17306_700,125,1,Pankower Allee/Reginhardstr. (Berlin),"LINESTRING (13.37339 52.56468, 13.37347 52.564...",de:11000:900085259::1,Mickestr. (Berlin),Pankower Allee/Reginhardstr. (Berlin) - Mickes...
4568,4568,de:11000:900085259::1 - de:11000:900006103::1,0:00-24:00,62,10302,57,de:11000:900085259::1,17306_700,125,1,Mickestr. (Berlin),"LINESTRING (13.37330 52.56233, 13.37321 52.562...",de:11000:900006103::1,Residenzstr./Reginhardstr. (Berlin),Mickestr. (Berlin) - Residenzstr./Reginhardstr...
431,431,de:11000:900006103::1 - de:11000:900009202::13,0:00-24:00,124,10302,58,de:11000:900006103::1,17306_700,125,1,Residenzstr./Reginhardstr. (Berlin),"LINESTRING (13.37021 52.56001, 13.37018 52.559...",de:11000:900009202::13,U Osloer Str./Tromsöer Str. (Berlin),Residenzstr./Reginhardstr. (Berlin) - U Osloer...
562,562,de:11000:900009202::13 - de:11000:900009202::9,0:00-24:00,165,10302,59,de:11000:900009202::13,17306_700,125,1,U Osloer Str./Tromsöer Str. (Berlin),"LINESTRING (13.37296 52.55722, 13.37324 52.557...",de:11000:900009202::9,U Osloer Str. (Berlin),U Osloer Str./Tromsöer Str. (Berlin) - U Osloe...


In [46]:
    data_all_lines.drop(['index'], axis=1, inplace=True)
    data_all_lines['route_id'] = 'ALL_LINES'
    data_all_lines['route_short_name'] = 'All lines'
    data_all_lines['direction_id'] = 'NA'

In [47]:
data_all_lines

Unnamed: 0,segment_id,window,ntrips,shape_id,stop_sequence,start_stop_id,route_id,route_short_name,direction_id,start_stop_name,geometry,end_stop_id,end_stop_name,segment_name
4,000005170002 - 000005100145,0:00-24:00,19,13818,0,000005170002,ALL_LINES,All lines,,"Szczecin, Krzysztofa Kolumba","LINESTRING (14.54847 53.41732, 14.54847 53.417...",000005100145,"Szczecin, Gumience","Szczecin, Krzysztofa Kolumba - Szczecin, Gumience"
5,000005170002 - de:12073:900340004:1:50,0:00-24:00,19,13819,0,000005170002,ALL_LINES,All lines,,"Szczecin, Krzysztofa Kolumba","LINESTRING (14.54847 53.41732, 14.54847 53.417...",de:12073:900340004:1:50,"Angermünde, Bahnhof","Szczecin, Krzysztofa Kolumba - Angermünde, Bah..."
17,000301049002 - de:12052:900470225::1,0:00-24:00,6,13469,0,000301049002,ALL_LINES,All lines,,"Cottbus, Jessener Str.","LINESTRING (14.30675 51.74755, 14.30724 51.747...",de:12052:900470225::1,"Cottbus, Platz der Freundschaft","Cottbus, Jessener Str. - Cottbus, Platz der Fr..."
144,710000999901 - 710009550190,0:00-24:00,7,2638,0,710000999901,ALL_LINES,All lines,,Inselstadt Malchow,"LINESTRING (12.42417 53.48211, 12.42417 53.482...",710009550190,"Nossentin, Bahnhof","Inselstadt Malchow - Nossentin, Bahnhof"
145,710008011799 - 710009990074,0:00-24:00,38,2610,0,710008011799,ALL_LINES,All lines,,"Hagenwerder, Bahnhof","LINESTRING (14.96013 51.07022, 14.96013 51.070...",710009990074,"Ostritz, Görlitzer Str.","Hagenwerder, Bahnhof - Ostritz, Görlitzer Str."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4545,de:11000:900085207::2 - de:11000:900085259::1,0:00-24:00,62,10302,56,de:11000:900085207::2,ALL_LINES,All lines,,Pankower Allee/Reginhardstr. (Berlin),"LINESTRING (13.37339 52.56468, 13.37347 52.564...",de:11000:900085259::1,Mickestr. (Berlin),Pankower Allee/Reginhardstr. (Berlin) - Mickes...
4568,de:11000:900085259::1 - de:11000:900006103::1,0:00-24:00,62,10302,57,de:11000:900085259::1,ALL_LINES,All lines,,Mickestr. (Berlin),"LINESTRING (13.37330 52.56233, 13.37321 52.562...",de:11000:900006103::1,Residenzstr./Reginhardstr. (Berlin),Mickestr. (Berlin) - Residenzstr./Reginhardstr...
431,de:11000:900006103::1 - de:11000:900009202::13,0:00-24:00,124,10302,58,de:11000:900006103::1,ALL_LINES,All lines,,Residenzstr./Reginhardstr. (Berlin),"LINESTRING (13.37021 52.56001, 13.37018 52.559...",de:11000:900009202::13,U Osloer Str./Tromsöer Str. (Berlin),Residenzstr./Reginhardstr. (Berlin) - U Osloer...
562,de:11000:900009202::13 - de:11000:900009202::9,0:00-24:00,165,10302,59,de:11000:900009202::13,ALL_LINES,All lines,,U Osloer Str./Tromsöer Str. (Berlin),"LINESTRING (13.37296 52.55722, 13.37324 52.557...",de:11000:900009202::9,U Osloer Str. (Berlin),U Osloer Str./Tromsöer Str. (Berlin) - U Osloe...


In [48]:
    # Add frequency for all lines
    start_time = data_all_lines.window.apply(lambda x: cutoffs[labels.index(x)])
    end_time = data_all_lines.window.apply(lambda x: cutoffs[labels.index(x) + 1])

    data_all_lines['frequency'] = ( data_all_lines.ntrips / (end_time - start_time) )\
        .astype(float)

In [49]:
# Append data for all lines to the input df
data_complete = pd.concat([line_frequencies, data_all_lines]).reset_index(drop=True)

In [50]:
data_complete

Unnamed: 0,route_id,route_short_name,direction_id,window,ntrips,frequency,segment_name,start_stop_name,end_stop_name,segment_id,start_stop_id,end_stop_id,geometry,shape_id,stop_sequence
0,10296_3,657,1,0:00-24:00,4,0.166667,"Brieselang, Bahnhof/A.-Bebel-Str. - Brieselang...","Brieselang, Bahnhof/A.-Bebel-Str.","Brieselang, Bahnhof/A.-Bebel-Str.",de:12063:900210012::1 - de:12063:900210012::1,de:12063:900210012::1,de:12063:900210012::1,"LINESTRING (13.00186 52.58202, 13.00186 52.58202)",,
1,10296_3,657,0,0:00-24:00,1,0.041667,,,,,,,,,
2,10296_3,657,1,0:00-24:00,4,0.166667,"Brieselang, Hölderlinstr. - Brieselang, Hölder...","Brieselang, Hölderlinstr.","Brieselang, Hölderlinstr.",de:12063:900210055::2 - de:12063:900210055::2,de:12063:900210055::2,de:12063:900210055::2,"LINESTRING (12.98366 52.57591, 12.98366 52.57591)",,
3,10296_3,657,0,0:00-24:00,5,0.208333,"Brieselang, Hans-Klakow-Str. - Brieselang, Sch...","Brieselang, Hans-Klakow-Str.","Brieselang, Schillerstr./Forstweg",de:12063:900210061::1 - de:12063:900210062::1,de:12063:900210061::1,de:12063:900210062::1,"LINESTRING (12.99819 52.58134, 12.99811 52.581...",,
4,10296_3,657,1,0:00-24:00,4,0.166667,"Brieselang, Hans-Klakow-Str. - Brieselang, Han...","Brieselang, Hans-Klakow-Str.","Brieselang, Hans-Klakow-Str.",de:12063:900210061::1 - de:12063:900210061::1,de:12063:900210061::1,de:12063:900210061::1,"LINESTRING (12.99819 52.58134, 12.99819 52.58134)",,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
102444,ALL_LINES,All lines,,0:00-24:00,62,2.583333,Pankower Allee/Reginhardstr. (Berlin) - Mickes...,Pankower Allee/Reginhardstr. (Berlin),Mickestr. (Berlin),de:11000:900085207::2 - de:11000:900085259::1,de:11000:900085207::2,de:11000:900085259::1,"LINESTRING (13.37339 52.56468, 13.37347 52.564...",10302,56.0
102445,ALL_LINES,All lines,,0:00-24:00,62,2.583333,Mickestr. (Berlin) - Residenzstr./Reginhardstr...,Mickestr. (Berlin),Residenzstr./Reginhardstr. (Berlin),de:11000:900085259::1 - de:11000:900006103::1,de:11000:900085259::1,de:11000:900006103::1,"LINESTRING (13.37330 52.56233, 13.37321 52.562...",10302,57.0
102446,ALL_LINES,All lines,,0:00-24:00,124,5.166667,Residenzstr./Reginhardstr. (Berlin) - U Osloer...,Residenzstr./Reginhardstr. (Berlin),U Osloer Str./Tromsöer Str. (Berlin),de:11000:900006103::1 - de:11000:900009202::13,de:11000:900006103::1,de:11000:900009202::13,"LINESTRING (13.37021 52.56001, 13.37018 52.559...",10302,58.0
102447,ALL_LINES,All lines,,0:00-24:00,165,6.875000,U Osloer Str./Tromsöer Str. (Berlin) - U Osloe...,U Osloer Str./Tromsöer Str. (Berlin),U Osloer Str. (Berlin),de:11000:900009202::13 - de:11000:900009202::9,de:11000:900009202::13,de:11000:900009202::9,"LINESTRING (13.37296 52.55722, 13.37324 52.557...",10302,59.0


In [59]:
data_complete_gdf = gpd.GeoDataFrame( data=data_complete.drop('geometry', axis=1), geometry=data_complete.geometry)

In [60]:
keep_these = [
            'route_id', 'route_short_name',
            'direction_id',
            'segment_name', 'start_stop_name', 'end_stop_name',
            'window', 'frequency', 'ntrips', 
            'start_stop_id', 'end_stop_id', 'segment_id',
            'geometry'
        ]

In [61]:
data_complete_gdf = data_complete_gfd.loc[~data_complete_gfd.geometry.isnull()][keep_these]

In [62]:
data_complete_gdf

Unnamed: 0,route_id,route_short_name,direction_id,segment_name,start_stop_name,end_stop_name,window,frequency,ntrips,start_stop_id,end_stop_id,segment_id,geometry
0,10296_3,657,1,"Brieselang, Bahnhof/A.-Bebel-Str. - Brieselang...","Brieselang, Bahnhof/A.-Bebel-Str.","Brieselang, Bahnhof/A.-Bebel-Str.",0:00-24:00,0.166667,4,de:12063:900210012::1,de:12063:900210012::1,de:12063:900210012::1 - de:12063:900210012::1,"LINESTRING (13.00186 52.58202, 13.00186 52.58202)"
2,10296_3,657,1,"Brieselang, Hölderlinstr. - Brieselang, Hölder...","Brieselang, Hölderlinstr.","Brieselang, Hölderlinstr.",0:00-24:00,0.166667,4,de:12063:900210055::2,de:12063:900210055::2,de:12063:900210055::2 - de:12063:900210055::2,"LINESTRING (12.98366 52.57591, 12.98366 52.57591)"
3,10296_3,657,0,"Brieselang, Hans-Klakow-Str. - Brieselang, Sch...","Brieselang, Hans-Klakow-Str.","Brieselang, Schillerstr./Forstweg",0:00-24:00,0.208333,5,de:12063:900210061::1,de:12063:900210062::1,de:12063:900210061::1 - de:12063:900210062::1,"LINESTRING (12.99819 52.58134, 12.99811 52.581..."
4,10296_3,657,1,"Brieselang, Hans-Klakow-Str. - Brieselang, Han...","Brieselang, Hans-Klakow-Str.","Brieselang, Hans-Klakow-Str.",0:00-24:00,0.166667,4,de:12063:900210061::1,de:12063:900210061::1,de:12063:900210061::1 - de:12063:900210061::1,"LINESTRING (12.99819 52.58134, 12.99819 52.58134)"
5,10296_3,657,1,"Brieselang, Hans-Klakow-Str. - Brieselang, Han...","Brieselang, Hans-Klakow-Str.","Brieselang, Hans-Klakow-Str.",0:00-24:00,0.166667,4,de:12063:900210061::2,de:12063:900210061::2,de:12063:900210061::2 - de:12063:900210061::2,"LINESTRING (12.99854 52.58118, 12.99854 52.58118)"
...,...,...,...,...,...,...,...,...,...,...,...,...,...
102444,ALL_LINES,All lines,,Pankower Allee/Reginhardstr. (Berlin) - Mickes...,Pankower Allee/Reginhardstr. (Berlin),Mickestr. (Berlin),0:00-24:00,2.583333,62,de:11000:900085207::2,de:11000:900085259::1,de:11000:900085207::2 - de:11000:900085259::1,"LINESTRING (13.37339 52.56468, 13.37347 52.564..."
102445,ALL_LINES,All lines,,Mickestr. (Berlin) - Residenzstr./Reginhardstr...,Mickestr. (Berlin),Residenzstr./Reginhardstr. (Berlin),0:00-24:00,2.583333,62,de:11000:900085259::1,de:11000:900006103::1,de:11000:900085259::1 - de:11000:900006103::1,"LINESTRING (13.37330 52.56233, 13.37321 52.562..."
102446,ALL_LINES,All lines,,Residenzstr./Reginhardstr. (Berlin) - U Osloer...,Residenzstr./Reginhardstr. (Berlin),U Osloer Str./Tromsöer Str. (Berlin),0:00-24:00,5.166667,124,de:11000:900006103::1,de:11000:900009202::13,de:11000:900006103::1 - de:11000:900009202::13,"LINESTRING (13.37021 52.56001, 13.37018 52.559..."
102447,ALL_LINES,All lines,,U Osloer Str./Tromsöer Str. (Berlin) - U Osloe...,U Osloer Str./Tromsöer Str. (Berlin),U Osloer Str. (Berlin),0:00-24:00,6.875000,165,de:11000:900009202::13,de:11000:900009202::9,de:11000:900009202::13 - de:11000:900009202::9,"LINESTRING (13.37296 52.55722, 13.37324 52.557..."


In [63]:
data_complete_gdf.to_file("C:\\Users\\zare\\GEO\\geopanda\\bus_segments_frequency.shp", driver="ESRI Shapefile")

  data_complete_gdf.to_file("C:\\Users\\zare\\GEO\\geopanda\\bus_segments_frequency.shp", driver="ESRI Shapefile")


In [30]:
#saving bus_segments_gdf

bus_segments_gdf = gpd.GeoDataFrame(pd.DataFrame(segment_gdf), geometry='geometry')

bus_segments_gdf.to_file("C:\\Users\\zare\\GEO\\geopanda\\bus_segments.shp", driver="ESRI Shapefile")

In [19]:
segment_gdf

Unnamed: 0,Bus_Num,dir,seq,seg_nm,start_nm,end_nm,seg_id,start_id,end_id,geometry
1,B/522,0,0,"Brandenburg, Betriebshof Ausfahrt - Brandenbur...","Brandenburg, Betriebshof Ausfahrt","Brandenburg, Betriebshof Hohenstücken",de:12051:900275308::1 - de:12051:900275301::3,de:12051:900275308::1,de:12051:900275301::3,"LINESTRING (12.53409 52.43042, 12.53411 52.430..."
10,B/522,0,1,"Brandenburg, Betriebshof Hohenstücken - Brande...","Brandenburg, Betriebshof Hohenstücken","Brandenburg, August-Bebel-Str.",de:12051:900275301::3 - de:12051:900275126::3,de:12051:900275301::3,de:12051:900275126::3,"LINESTRING (12.53071 52.42696, 12.53071 52.426..."
26,B/522,0,2,"Brandenburg, August-Bebel-Str. - Brandenburg, ...","Brandenburg, August-Bebel-Str.","Brandenburg, Altstadt Bhf",de:12051:900275126::3 - de:12051:900275123::5,de:12051:900275126::3,de:12051:900275123::5,"LINESTRING (12.53180 52.42265, 12.53180 52.422..."
68,B/522,0,3,"Brandenburg, Altstadt Bhf - Brandenburg, Wilhe...","Brandenburg, Altstadt Bhf","Brandenburg, Wilhelmsdorfer Str.",de:12051:900275123::5 - de:12051:900275143::1,de:12051:900275123::5,de:12051:900275143::1,"LINESTRING (12.53053 52.41221, 12.53053 52.412..."
114,B/522,0,4,"Brandenburg, Wilhelmsdorfer Str. - Brandenburg...","Brandenburg, Wilhelmsdorfer Str.","Brandenburg, Göttiner Str.",de:12051:900275143::1 - de:12051:900275601::1,de:12051:900275143::1,de:12051:900275601::1,"LINESTRING (12.54856 52.39938, 12.54856 52.399..."
...,...,...,...,...,...,...,...,...,...,...
3306809,889,1,25,"Prötzel, Schule - Prötzel, Forsthaus","Prötzel, Schule","Prötzel, Forsthaus",de:12064:900320876::1 - de:12064:900320874::1,de:12064:900320876::1,de:12064:900320874::1,"LINESTRING (13.97917 52.63359, 13.97788 52.632..."
3306818,889,1,26,"Prötzel, Forsthaus - Prötzel, Sägewerk","Prötzel, Forsthaus","Prötzel, Sägewerk",de:12064:900320874::1 - de:12064:900320877::1,de:12064:900320874::1,de:12064:900320877::1,"LINESTRING (13.96895 52.62538, 13.96888 52.625..."
3306848,889,1,27,"Prötzel, Sägewerk - Strausberg, Gesundheitszen...","Prötzel, Sägewerk","Strausberg, Gesundheitszentrum",de:12064:900320877::1 - de:12064:900320564::1,de:12064:900320877::1,de:12064:900320564::1,"LINESTRING (13.94445 52.60516, 13.94157 52.604..."
3306862,889,1,28,"Strausberg, Gesundheitszentrum - Strausberg, B...","Strausberg, Gesundheitszentrum","Strausberg, Bundeswehr",de:12064:900320564::1 - de:12064:900320559::1,de:12064:900320564::1,de:12064:900320559::1,"LINESTRING (13.92058 52.59449, 13.92053 52.594..."


In [21]:
index = ['route_id', 'route_name', 'stop_id']

col=window

SyntaxError: EOL while scanning string literal (1033661825.py, line 3)

In [20]:
time_windows = [0,24]
cutoffs = time_windows

stop_times_df['arrival_time'] = pd.to_timedelta(stop_times_df['arrival_time'])
stop_times_df['departure_time'] = pd.to_timedelta(stop_times_df['departure_time'])

stop_times_df['arrival_time_in_seconds'] = stop_times_df['arrival_time'].dt.total_seconds()
stop_times_df['departure_time_in_seconds'] = stop_times_df['departure_time'].dt.total_seconds()

def fix_departure_time(times_to_fix):
    
    next_day = times_to_fix >= 24*3600
    times_to_fix[next_day] = times_to_fix[next_day] - 24 * 3600
    
    return times_to_fix

if max(cutoffs) <= 24:
    stop_times_df['departure_time'] = fix_departure_time(stop_times_df.departure_time_in_seconds.values)
    stop_times_df['arrival_time'] = fix_departure_time(stop_times_df.arrival_time_in_seconds.values)
    
def label_creation(cutoffs):
    
    labels = []
    if max(cutoffs) <= 24:
        for w in cutoffs:
            if float(w).is_integer():
                label = str(w) + ':00'
            else:
                n = math.modf(w)
                label = str(int(n[1])) + ':' + str(int(n[0]*60))
            labels.append(label)
    else:
        labels = []
        for w in cutoffs:
            if float(w).is_integer():
                if w > 24:
                    w1 = w-24
                    label = str(w1) + ':00'
                else:
                    label = str(w) + ':00'
                labels.append(label)
            else:
                if w > 24:
                    w1 = w-24
                    n = math.modf(w1)
                    label = str(int(n[1])) + ':' + str(int(n[0]*60))
                else:
                    n = math.modf(w)
                    label = str(int(n[1])) + ':' + str(int(n[0]*60))
                labels.append(label)

    labels = [labels[i] + '-' + labels[i+1] for i in range(0, len(labels)-1)]

    return labels

labels = label_creation(cutoffs)

departure_time = stop_times_df.departure_time / 3600
stop_times_df['window'] = pd.cut(departure_time, bins=cutoffs, right=False, labels=labels)

stop_times_df = stop_times_df.loc[~stop_times_df.window.isnull()]
stop_times_df['window'] = stop_times_df.window.astype(str)

In [None]:
        # Aggregate trips
        line_frequencies = add_frequency(
            stop_times, labels, index_=['route_id', 'route_name', 'stop_id'],
            col='window', cutoffs=cutoffs)

        keep_these = [
            'route_id', 'route_name',  'segment_name', 
            'start_stop_name', 'end_stop_name',
            'segment_id', 'start_stop_id', 'end_stop_id',
            'direction_id', 'geometry']

        line_frequencies = pd.merge(
            line_frequencies,
            segment_gdf[keep_these],
            left_on=['route_id', 'route_name', 'stop_id', 'direction_id'],
            right_on=['route_id', 'route_name', 'start_stop_id', 'direction_id'],
            how='left')
        
        line_frequencies.drop('stop_id', axis=1, inplace=True)

        # Remove duplicates after merging
        line_frequencies.drop_duplicates(inplace=True)

        # Aggregate for all lines
        data_complete = add_all_lines(
            line_frequencies, segment_gdf, labels, cutoffs)

In [28]:
index_ = ['route_id', 'route_short_name', 'stop_id']

col= 'window'

index_list = index_ + ['direction_id', col]

In [29]:
day_trip_buses1 = pd.merge(day_trip_buses, stop_times_df[['trip_id','stop_id','arrival_time','departure_time','window']], on='trip_id')

In [30]:
day_trip_buses2 = pd.merge(day_trip_buses1, routes_df[['route_id','route_short_name']], on='route_id')

In [31]:
trips_agg = day_trip_buses2.pivot_table('trip_id', index=index_list,aggfunc='count').reset_index()

In [32]:
trips_agg

Unnamed: 0,route_id,route_short_name,stop_id,direction_id,window,trip_id
0,10296_3,657,de:12063:900210012::1,1,0:00-24:00,4
1,10296_3,657,de:12063:900210012::2,0,0:00-24:00,1
2,10296_3,657,de:12063:900210055::2,1,0:00-24:00,4
3,10296_3,657,de:12063:900210061::1,0,0:00-24:00,5
4,10296_3,657,de:12063:900210061::1,1,0:00-24:00,4
...,...,...,...,...,...,...
53012,9813_700,599,de:12066:900435392::2,1,0:00-24:00,9
53013,9813_700,599,de:12066:900435406::1,0,0:00-24:00,10
53014,9813_700,599,de:12066:900435406::2,1,0:00-24:00,10
53015,9813_700,599,de:12066:900435480::1,1,0:00-24:00,9


In [33]:
trips_agg.rename(columns={'trip_id': 'ntrips'}, inplace=True)

In [34]:
start_time = trips_agg.window.apply(lambda x: cutoffs[labels.index(x)])

end_time = trips_agg.window.apply(lambda x: cutoffs[labels.index(x) + 1])

In [37]:
trips_agg['frequency'] = (trips_agg.ntrips / (end_time - start_time))\
.astype(float)

In [38]:
trips_agg

Unnamed: 0,route_id,route_short_name,stop_id,direction_id,window,ntrips,frequency
0,10296_3,657,de:12063:900210012::1,1,0:00-24:00,4,0.166667
1,10296_3,657,de:12063:900210012::2,0,0:00-24:00,1,0.041667
2,10296_3,657,de:12063:900210055::2,1,0:00-24:00,4,0.166667
3,10296_3,657,de:12063:900210061::1,0,0:00-24:00,5,0.208333
4,10296_3,657,de:12063:900210061::1,1,0:00-24:00,4,0.166667
...,...,...,...,...,...,...,...
53012,9813_700,599,de:12066:900435392::2,1,0:00-24:00,9,0.375000
53013,9813_700,599,de:12066:900435406::1,0,0:00-24:00,10,0.416667
53014,9813_700,599,de:12066:900435406::2,1,0:00-24:00,10,0.416667
53015,9813_700,599,de:12066:900435480::1,1,0:00-24:00,9,0.375000


In [39]:
segment_gdf.columns

Index(['Bus_Num', 'dir', 'seq', 'seg_nm', 'start_nm', 'end_nm', 'seg_id',
       'start_id', 'end_id', 'geometry'],
      dtype='object')

In [40]:
keep_these = [
            'route_id', 'route_short_name',  'seg_nm', 
            'start_nm', 'end_nm',
            'seg_id', 'start_id', 'end_id',
            'dir', 'geometry']

In [41]:
line_frequencies = trips_agg

In [43]:
segment_gdf = segment_gdf.rename(columns={'Bus_Num': 'route_short_name'})


In [None]:
segment_gdf = pd.merge(segment_gdf, day_trip_buses2[['route_short_name','route_id']], on='route_short_name')

In [42]:
line_frequencies = pd.merge( line_frequencies, segment_gdf[keep_these],
            left_on=['route_id', 'route_short_name', 'stop_id', 'direction_id'],
            right_on=['route_id', 'Bus_Num', 'start_id', 'dir'],
            how='left')

KeyError: "['route_id', 'route_short_name'] not in index"