# 21_5 Descriptive Analysis - Delay Spans

Let us now take a closer look on the characterisitcs of our aggregrated data.

In [1]:
# import libraries
import pandas as pd
import numpy as np
import sklearn
from datetime import datetime
import os
import geopandas as gpd
import matplotlib.pyplot as plt
import folium
import shapely
import hashlib
import holidays
from shapely import Point, wkt
import scipy.stats
import seaborn as sns

## Load and prepare data

In [2]:
data_month_double = '07'

In [3]:
# read final data
data = pd.read_csv("../20_Final_Data.csv")

  data = pd.read_csv("../../../20_Final_Data_NEW_TEST.csv")


In [4]:
tier = pd.read_parquet("../Data/12_tier_part2.parquet")
next = pd.read_parquet("../Data/15_nextbike_part2.parquet")

In [5]:
tier['trip_duration_seconds'] = tier['trip_duration'].dt.total_seconds()
next['trip_duration_seconds'] = next['trip_duration'].dt.total_seconds()

In [6]:
# transform geometry
data['station_point'] = data['station_point'].apply(wkt.loads)

In [7]:
data['buffer_zone'] = shapely.wkt.loads(data['buffer_zone'])

In [8]:
data = gpd.GeoDataFrame(data, geometry='station_point')

In [9]:
data = data.loc[:, ~data.columns.str.contains('Unnamed')]

In [10]:
#data.drop(columns=['Unnamed: 0', 'Unnamed: 0.1', 'Unnamed: 0.2'], inplace=True)

In [11]:
data['actual_arrival_time'] = pd.to_datetime(data['actual_arrival_time'])
data['actual_departure_time'] = pd.to_datetime(data['actual_departure_time'])
data['scheduled_arrival_time'] = pd.to_datetime(data['scheduled_arrival_time'])
data['scheduled_departure_time'] = pd.to_datetime(data['scheduled_departure_time'])

In [12]:
data = gpd.GeoDataFrame(data, geometry='buffer_zone')

In [13]:
data.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 2564861 entries, 0 to 2564860
Data columns (total 56 columns):
 #   Column                               Dtype         
---  ------                               -----         
 0   route_id                             int64         
 1   agency_id                            int64         
 2   route_short_name                     object        
 3   route_type                           int64         
 4   route_type_name                      object        
 5   agency_name                          object        
 6   service_id                           int64         
 7   trip_id                              object        
 8   trip_headsign                        object        
 9   direction_id                         int64         
 10  shape_id                             int64         
 11  stop_id                              int64         
 12  actual_arrival_time                  datetime64[ns]
 13  actual_departure_ti

In [14]:
data['date'] = pd.to_datetime(data['date'])

In [15]:
data['arrival_delay'] = pd.to_timedelta(data['arrival_delay'])
data['departure_delay'] = pd.to_timedelta(data['departure_delay'])

In [16]:
data['arrival_delay_float'] = data['arrival_delay'].astype(str)
data['arrival_delay_float'] = data['arrival_delay_float'].str.split(' ', n=2).str[-1]
data['arrival_delay_float'] = data['arrival_delay_float'].astype(str)
data['arrival_delay_float'] = pd.to_timedelta(data['arrival_delay_float'])
data['arrival_delay_float'] = pd.to_timedelta(data['arrival_delay_float']).dt.total_seconds() / 60

In [17]:
data['departure_delay_float'] = data['departure_delay'].astype(str)
data['departure_delay_float'] = data['departure_delay_float'].str.split(' ', n=2).str[-1]
data['departure_delay_float'] = data['departure_delay_float'].astype(str)
data['departure_delay_float'] = pd.to_timedelta(data['departure_delay_float'])
data['departure_delay_float'] = pd.to_timedelta(data['departure_delay_float']).dt.total_seconds() / 60

In [18]:

#NEW FEATURES
data['delay_category'] = data['arrival_delay'].apply(lambda x: 1 if x > pd.Timedelta(0) else 0 if x == pd.Timedelta(0) else -1)
data['cancelled_trip'] = np.where((data['scheduled_arrival_time'].notna() & data['actual_arrival_time'].isna() & data['scheduled_departure_time'].notna() & data['actual_departure_time'].isna()), 1, 0)

## Data Description

### Definition of functions

In [19]:
def calc_stats(data, column_name):

    #generated by Copilot
    rows = len(data.index)

    sum = data[column_name].sum()

#Central Tendency

    # Calculate mean (average)
    mean = data[column_name].mean()
    # Calculate median (middle value)
    median = data[column_name].median()
    # Calculate mode (most frequent value)
    mode = data[column_name].mode()
    #calc maximum
    max = data[column_name].max()
    #calc minimum
    min = data[column_name].min()

    # Calculate range
    data_range = data[column_name].max() - data[column_name].min()
    # Calculate coefficient of variation of range
    cvr = data_range / mean
    # Calculate coefficient of variation of mean
    cvm = mean / mean
    # Calculate coefficient of variation of median
    cvmed = median / mean
    # Calculate coefficient of variation of mode
    cvmode = mode / mean
    # Calculate coefficient of variation of standard deviation
   

# Variability
# Measure of how spread out the values in a data set are
    
    # Calculate standard deviation (average amount of variability in a dataset; tells how far each score lies from the mean; the larger the standard deviation, the more variable the data)
    std_dev = data[column_name].std()
    # Calculate variance (average squared deviation from the mean; square of standard deviation; units of varriance are much larger than those of a typical value in the dataset)
    variance = data[column_name].var()
    # Calculate coefficient of variation
    cv = std_dev / mean # standard deviation divided by the mean; measures the relative variability of a dataset; allows comparison of variability of datasets with different units of measurement
    # Calculate 25th percentile
    percentile_25 = data[column_name].quantile(0.25)
    # Calculate 75th percentile
    percentile_75 = data[column_name].quantile(0.75)
    # Calculate interquartile range
    iqr = percentile_75 - percentile_25 # difference between the 75th and 25th percentiles; measures the spread of the middle 50% of values in a dataset
    # Calculate coefficient of quartile deviation
    qd = iqr / (percentile_75 + percentile_25)
    # Calculate coefficient of variation of coefficient of variation
    cvcv = std_dev / mean
    # Calculate coefficient of variation of coefficient of quartile deviation
    cvqd = iqr / (percentile_75 + percentile_25)
    # Calculate coefficient of variation of range

#Measure of Shape

    # Calculate skewness - measure of how much the probability distribution of a random variable deviates from the normal distribution
    skewness = data[column_name].skew()
    
    # Calculate kurtosis
    kurtosis = data[column_name].kurtosis()

    # Calculate coefficient of skewness
    cs = skewness / std_dev
    # Calculate coefficient of kurtosis
    ck = kurtosis / std_dev
    # Calculate coefficient of variation of skewness
    cvs = skewness / std_dev
    # Calculate coefficient of variation of kurtosis
    cvk = kurtosis / std_dev
    # Calculate coefficient of variation of coefficient of skewness
    cvcs = skewness / std_dev
    # Calculate coefficient of variation of coefficient of kurtosis
    cvck = kurtosis / std_dev
    
    
    
    #return "column: " + str(column_name), "mean: " + str(mean), "median: " + str(median), "mode: " + str(mode), "std_dev: " + str(std_dev), "variance: " + str(variance), "skewness: "+ str(skewness), "kurtosis: " + str(kurtosis), "percentile_25: "+ str(percentile_25), "percentile_75: "+ str(percentile_75), "iqr: " + str(iqr), "range: "+ str(data_range), "cv"+ str(cv), "qd: "+str(qd), "cs: "+ str(cs), "ck: " + str(ck), "csv: "+ str(cvs), "cvk: "+ str(cvk), "cvcs: "+ str(cvcs), "cvck: " +str(cvck), "cvcv: "+ str(cvcv), "cvqd: "+ str (cvqd), "cvr: "+ str(cvr), "cvm: " + str(cvm), "cvmed: " + str(cvmed), "cvmode: "+ str(cvmode)

    return pd.DataFrame({'column': [column_name],
                            'rows': [rows],
                         'sum': [sum],
                         'mean': [mean],
                         'median': [median],
                         'mode': [mode],
                         'max': [max],
                         'min': [min],
                         'std_dev': [std_dev],
                         'variance': [variance],
                         'skewness': [skewness],
                         'kurtosis': [kurtosis],
                         'percentile_25': [percentile_25],
                         'percentile_75': [percentile_75],
                         'iqr': [iqr],
                         'range': [data_range],
                         'cv': [cv],
                         'qd': [qd],
                         'cs': [cs],
                         'ck': [ck],
                         'cvs': [cvs],
                         'cvk': [cvk],
                         'cvcs': [cvcs],
                         'cvck': [cvck],
                         'cvcv': [cvcv],
                         'cvqd': [cvqd],
                         'cvr': [cvr],
                         'cvm': [cvm],
                         'cvmed': [cvmed],
                         'cvmode': [cvmode]}).T

In [20]:
# create a shorter dataframe with most interesting columns - for better overview
def show_short_df(dataframe):
    delay_short = dataframe[['route_id', 'trip_id', 'stop_id', 'stop_name', 'scheduled_arrival_time', 'actual_arrival_time', 'scheduled_departure_time', 'actual_departure_time', 'arrival_delay', 'departure_delay', 'tier_trips_count', 'tier_trips_end_at_station_count', 'nextbike_trips_count', 'nextbike_trips_end_at_station_count', 'buffer_zone', 'arrival_delay_float', 'departure_delay_float']]
    return delay_short

In [21]:
# transform geometry
point_start = gpd.GeoDataFrame(geometry=gpd.GeoSeries.from_wkb(tier["start_location"], crs=4326))
point_end = gpd.GeoDataFrame(geometry=gpd.GeoSeries.from_wkb(tier["end_location"], crs=4326))
# drop geometry columns in wrong format
tier = tier.drop(columns=["start_location", "end_location"])
# replace geometry columns with correct format
tier['start_location'] = point_start
tier['end_location'] = point_end

In [22]:
# get the data from the tier / nextbike dataset
def check_micromobility_datasets(type, data):
    
    if type == 'tier':

        return_data = tier.copy()
        return_data = return_data.iloc[0:0] # empty dataframe

        for item in data['tier_trips_id'].str.split(' ').explode().dropna():   # iterate over all tier ids in the dataframe
            
            id = tier[tier['tier_trips_id'] == int(float(item))]   # get the rows with the specific id
            return_data = pd.concat([return_data, id])  # add the rows to the return dataframe

            return_data.drop_duplicates(inplace=True)
            
    if type == 'next':

        return_data = next.copy()
        return_data = return_data.iloc[0:0] # empty dataframe

        for item in data['nextbike_trips_id'].str.split(' ').explode().dropna():   # iterate over all nextbike ids in the dataframe
            
            id = next[next['nextbike_trips_id'] == int(float(item))]  # get the rows with the specific id
            return_data = pd.concat([return_data, id]) # add the rows to the return dataframe

            return_data.drop_duplicates(inplace=True)

    return return_data

In [23]:
def get_station_to_station(data):
    t = check_micromobility_datasets('tier', data)
    n = check_micromobility_datasets('nextbike', data)

    t = t[t['end_stop_id'].notna() & t['start_stop_id'].notna()]
    n = n[n['end_stop_id'].notna() & n['start_stop_id'].notna()]
    
    return t, n

In [24]:
# map the start and end locations of the trips on a map and connect them with a line

def map_buffer_zones(dataset):
    
    micromobility_data = check_micromobility_datasets('tier', dataset)
    
    # Create a folium map object
    m = folium.Map(location=[50.73743, 7.09821], zoom_start=12)

    # Iterate over each row in the micromobility data
    for index, row in micromobility_data.iterrows():
        # Get the start and end locations
        start_location = row['start_location']
        end_location = row['end_location']
        
        # Get the start and end stop names
        start_stop_name = row['start_stop_name']
        end_stop_name = row['end_stop_name']
        # Add markers for the start and end locations
        folium.Marker(location=[start_location.y, start_location.x], popup=start_stop_name).add_to(m)
        folium.Marker(location=[end_location.y, end_location.x], popup=end_stop_name).add_to(m)
        # Create a line connecting the start and end locations
        folium.PolyLine(locations=[[start_location.y, start_location.x], [end_location.y, end_location.x]], color='blue').add_to(m)
        
    # Display the map
    return m

In [25]:
def get_stats(data, var):
    
    df = pd.DataFrame()

    for i in var:
        df = pd.concat([df, calc_stats(data, i)], axis=1)

    df.columns = df.iloc[0]
    df = df[1:]
    
    return df

## Create Delay, No_Delay, and Nan-Delay Datasets

In [26]:
delay = data[data['departure_delay'] > pd.Timedelta(0)]


In [27]:
#no_delay = data[data['arrival_delay'] == pd.Timedelta(0)]
no_delay = data[
    (data['departure_delay'] == pd.Timedelta(0)) | 
    (data['scheduled_arrival_time'].notna() & 
     data['actual_arrival_time'].isna() & 
     data['scheduled_departure_time'].notna() & 
     data['actual_departure_time'].notna())
]

In [28]:
#nan_delay = data[data['arrival_delay'].isna()]
#nan_delay = data[data['arrival_delay'].isna()]
nan_delay = data[data['scheduled_arrival_time'].notna() & data['actual_arrival_time'].isna() & data['scheduled_departure_time'].notna() & data['actual_departure_time'].isna()]
#nan_delay = data[data['arrival_delay'].isna()]
nan_delay

Unnamed: 0,route_id,agency_id,route_short_name,route_type,route_type_name,agency_name,service_id,trip_id,trip_headsign,direction_id,...,delay_span,arrival_delay_span,weather_int,time_span_arrival,time_span_int_arrival,weekend,time_span,time_span_int,delay_category,cancelled_trip
41,551,8,551,3,Bus,RSVG Rhein-Sieg-Verkehrsgesellschaft mbH,125,255-551-008-2071.2.22:033200-34-157_C0741D60-B...,Bonn Hbf,1,...,0,0,0,0,0,1,0,0,-1,1
42,551,8,551,3,Bus,RSVG Rhein-Sieg-Verkehrsgesellschaft mbH,125,159-551-008-687.2.36:044000-33-157_FAA787CB-18...,Troisdorf Bf,0,...,0,0,0,0,0,1,0,0,-1,1
43,551,8,551,3,Bus,RSVG Rhein-Sieg-Verkehrsgesellschaft mbH,125,159-551-008-687.2.36:044000-33-157_FAA787CB-18...,Troisdorf Bf,0,...,0,0,0,0,0,1,0,0,-1,1
44,551,8,551,3,Bus,RSVG Rhein-Sieg-Verkehrsgesellschaft mbH,125,159-551-008-687.2.36:044000-33-157_FAA787CB-18...,Troisdorf Bf,0,...,0,0,0,0,0,1,0,0,-1,1
45,551,8,551,3,Bus,RSVG Rhein-Sieg-Verkehrsgesellschaft mbH,125,159-551-008-687.2.36:044000-33-157_FAA787CB-18...,Troisdorf Bf,0,...,0,0,0,0,0,1,0,0,-1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2564854,689,6,N9,3,Bus,SWB Stadtwerke Bonn Verkehrs GmbH,43,6890005-689-006-687.2.34:263500-53-1_2AFE8F0F-...,Bonn Hbf,0,...,0,0,1,0,0,0,0,0,-1,1
2564856,689,6,N9,3,Bus,SWB Stadtwerke Bonn Verkehrs GmbH,43,6890005-689-006-687.2.34:263500-53-1_2AFE8F0F-...,Bonn Hbf,0,...,0,0,1,0,0,0,0,0,-1,1
2564857,689,6,N9,3,Bus,SWB Stadtwerke Bonn Verkehrs GmbH,43,6890005-689-006-687.2.34:263500-53-1_2AFE8F0F-...,Bonn Hbf,0,...,0,0,1,0,0,0,0,0,-1,1
2564858,689,6,N9,3,Bus,SWB Stadtwerke Bonn Verkehrs GmbH,43,6890005-689-006-687.2.34:263500-53-1_2AFE8F0F-...,Bonn Hbf,0,...,0,0,1,0,0,0,0,0,-1,1


### Delay Intervalls

#### 0 - 5 min

In [29]:
delay_0_5 = delay[(delay['departure_delay'] > pd.Timedelta(0, unit='m')) & (delay['departure_delay'] <= pd.Timedelta(5, unit='m'))]

In [30]:
delay_0_5.describe()

Unnamed: 0,route_id,agency_id,route_type,service_id,direction_id,shape_id,stop_id,actual_arrival_time,actual_departure_time,vrs_timestamp,...,departure_delay_float,actual_arrival_time_float,actual_departure_time_float,arrival_delay_span,weather_int,time_span_int_arrival,weekend,time_span_int,delay_category,cancelled_trip
count,941443.0,941443.0,941443.0,941443.0,941443.0,941443.0,941443.0,914715,941443,941443.0,...,941443.0,941443.0,941443.0,941443.0,941443.0,941443.0,941443.0,941443.0,941443.0,941443.0
mean,564.917622,6.004588,2.75199,2884.732623,0.511554,3612.564502,2322.587039,2023-07-16 17:58:57.146587648,2023-07-16 18:33:57.598276864,135671.949962,...,0.567516,136173.810929,140022.301674,0.0,0.168639,3.009753,0.716391,3.094147,0.636404,0.0
min,18.0,1.0,0.0,3.0,0.0,6.0,43.0,2023-07-01 01:24:50,2023-07-01 01:25:10,804.0,...,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0
25%,602.0,6.0,3.0,64.0,0.0,1918.0,1214.0,2023-07-09 06:08:45,2023-07-09 06:18:30,90305.0,...,0.25,93415.0,95745.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
50%,607.0,6.0,3.0,64.0,1.0,2717.0,1434.0,2023-07-17 12:02:15,2023-07-17 12:36:45,134305.0,...,0.5,140730.0,142115.0,0.0,0.0,3.0,0.0,3.0,1.0,0.0
75%,611.0,6.0,3.0,130.0,1.0,5633.0,1681.0,2023-07-24 12:17:15,2023-07-24 13:02:30,182304.0,...,0.75,183315.0,183930.0,0.0,0.0,5.0,1.0,5.0,1.0,0.0
max,843.0,12.0,3.0,27690.0,1.0,10396.0,9780.0,2023-08-01 03:19:00,2023-08-01 03:19:15,235807.0,...,5.0,235945.0,235950.0,0.0,2.0,8.0,3.0,8.0,1.0,0.0
std,153.561847,0.159702,0.826148,8207.249693,0.499867,2796.235457,2418.912927,,,55589.486617,...,0.44026,58158.497968,54003.438068,0.0,0.439332,2.082109,1.02504,2.043134,0.53682,0.0


In [31]:
get_stats(delay_0_5, ['tier_trips_count', 'tier_trips_end_at_station_count', 'nextbike_trips_count', 'nextbike_trips_end_at_station_count', 'current_temp', 'current_precipitation_volume'])

  qd = iqr / (percentile_75 + percentile_25)
  cvqd = iqr / (percentile_75 + percentile_25)
  qd = iqr / (percentile_75 + percentile_25)
  cvqd = iqr / (percentile_75 + percentile_25)
  qd = iqr / (percentile_75 + percentile_25)
  cvqd = iqr / (percentile_75 + percentile_25)
  qd = iqr / (percentile_75 + percentile_25)
  cvqd = iqr / (percentile_75 + percentile_25)


column,tier_trips_count,tier_trips_end_at_station_count,nextbike_trips_count,nextbike_trips_end_at_station_count,current_temp,current_precipitation_volume
rows,941443,941443,941443,941443,941443,941443
sum,12358,10086,33367,28067,20032158.55,76778.8
mean,0.013127,0.010713,0.035442,0.029813,21.278143,0.607874
median,0.0,0.0,0.0,0.0,20.8,0.36
mode,"0 0 Name: tier_trips_count, dtype: int64","0 0 Name: tier_trips_end_at_station_count, ...","0 0 Name: nextbike_trips_count, dtype: int64",0 0 Name: nextbike_trips_end_at_station_cou...,"0 21.48 Name: current_temp, dtype: float64","0 0.49 Name: current_precipitation_volume, ..."
max,6,6,8,8,35.37,3.89
min,0,0,0,0,11.13,0.11
std_dev,0.12436,0.111575,0.21291,0.192549,4.513271,0.694177
variance,0.015465,0.012449,0.045331,0.037075,20.369614,0.481882
skewness,11.355317,12.274075,7.609691,7.948524,0.711068,3.011071


In [32]:
delay_0_5_tier = check_micromobility_datasets('tier', delay_0_5)

In [33]:
get_stats(delay_0_5_tier, ['trip_duration_seconds', 'trip_distance_kilometers'])

column,trip_duration_seconds,trip_distance_kilometers
rows,8532,8532
sum,7224420.0,14777.932122
mean,846.744023,1.73206
median,600.0,1.336277
mode,"0 600.0 Name: trip_duration_seconds, dtype:...",0 0.100174 1 0.100290 2 0...
max,6900.0,13.908704
min,300.0,0.100174
std_dev,576.358232,1.426021
variance,332188.81127,2.033537
skewness,3.86863,1.859431


In [34]:
delay_0_5_tier_station_station = delay_0_5_tier[delay_0_5_tier['end_stop_id'].notna() & delay_0_5_tier['start_stop_id'].notna()]
delay_0_5_tier_station_station

Unnamed: 0,tier_trips_id,tier_trips_start_time,tier_trips_end_time,vehicle type,trip_duration,trip_distance_kilometers,trip_distance_meters,start_stop_id,start_stop_name,start_buffer_zone,end_stop_id,end_stop_name,end_buffer_zone,trip_duration_seconds,start_location,end_location
3209527,782391260,2023-07-01 01:45:00,2023-07-01 01:50:00,e-scooter,0 days 00:05:00,0.670973,670.973151,1115.0,Bonn Bertha-Von-Suttner-Pl./Beethovenhaus,801621 POLYGON ((7.157885983913106 50.7227...,687.0,Bonn Hbf,801621 ...,300.0,POINT (7.10180 50.73728),POINT (7.09852 50.73217)
3210269,782391943,2023-07-01 01:45:00,2023-07-01 01:50:00,e-scooter,0 days 00:05:00,0.838693,838.692953,1115.0,Bonn Bertha-Von-Suttner-Pl./Beethovenhaus,801621 POLYGON ((7.157885983913106 50.7227...,687.0,Bonn Hbf,801621 ...,300.0,POINT (7.10410 50.73699),POINT (7.09815 50.73232)
3234236,782413798,2023-07-01 06:40:00,2023-07-01 06:45:00,e-scooter,0 days 00:05:00,0.844529,844.529088,1131.0,Bonn Weberstr.,801621 POLYGON ((7.157885983913106 50.7227...,687.0,Bonn Hbf,801621 ...,300.0,POINT (7.10205 50.72540),POINT (7.09679 50.73093)
3164060,782350073,2023-07-01 07:10:00,2023-07-01 07:15:00,e-scooter,0 days 00:05:00,0.941159,941.158971,1182.0,Bonn Chlodwigplatz,801621 POLYGON ((7.157885983913106 50.7227...,688.0,Bonn West,801621 ...,300.0,POINT (7.08933 50.74278),POINT (7.08230 50.73803)
3109531,782329093,2023-07-01 07:25:00,2023-07-01 07:45:00,e-scooter,0 days 00:20:00,1.863754,1863.753840,1102.0,Bonn Friedensplatz,801621 POLYGON ((7.157885983913106 50.7227...,1221.0,Bonn Kaufmannstr.,801621 ...,1200.0,POINT (7.09925 50.73753),POINT (7.08612 50.72703)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3420883,832203680,2023-07-31 23:00:00,2023-07-31 23:05:00,e-scooter,0 days 00:05:00,0.369227,369.226899,1115.0,Bonn Bertha-Von-Suttner-Pl./Beethovenhaus,801621 POLYGON ((7.157885983913106 50.7227...,1115.0,Bonn Bertha-Von-Suttner-Pl./Beethovenhaus,801621 ...,300.0,POINT (7.10130 50.73766),POINT (7.10442 50.73650)
3318827,832120317,2023-07-31 23:10:00,2023-08-01 00:20:00,e-scooter,0 days 01:10:00,0.117849,117.848655,1115.0,Bonn Bertha-Von-Suttner-Pl./Beethovenhaus,801621 POLYGON ((7.157885983913106 50.7227...,1115.0,Bonn Bertha-Von-Suttner-Pl./Beethovenhaus,801621 ...,4200.0,POINT (7.10346 50.73716),POINT (7.10245 50.73747)
3422531,832205256,2023-07-31 23:25:00,2023-08-01 00:00:00,e-scooter,0 days 00:35:00,1.787106,1787.106290,1153.0,Bonn Heerstr.,801621 POLYGON ((7.157885983913106 50.7227...,1190.0,Bonn Zeisigweg,801621 ...,2100.0,POINT (7.09015 50.73736),POINT (7.07489 50.74243)
3485603,832265823,2023-07-31 23:25:00,2023-08-01 00:00:00,e-scooter,0 days 00:35:00,1.774445,1774.445116,1153.0,Bonn Heerstr.,801621 POLYGON ((7.157885983913106 50.7227...,1190.0,Bonn Zeisigweg,801621 ...,2100.0,POINT (7.09011 50.73738),POINT (7.07496 50.74244)


In [35]:
get_stats(delay_0_5_tier_station_station, ['trip_duration_seconds', 'trip_distance_kilometers'])

column,trip_duration_seconds,trip_distance_kilometers
rows,3859,3859
sum,2983560.0,5477.640594
mean,773.143301,1.419446
median,600.0,1.101235
mode,"0 600.0 Name: trip_duration_seconds, dtype:...",0 0.100174 1 0.100290 2 0.10...
max,6300.0,9.093761
min,300.0,0.100174
std_dev,504.730903,1.123655
variance,254753.28454,1.2626
skewness,4.039655,1.629763


In [36]:
delay_0_5_next = check_micromobility_datasets('next', delay_0_5)

In [37]:
get_stats(delay_0_5_next, ['trip_duration_seconds', 'trip_distance_kilometers'])

column,trip_duration_seconds,trip_distance_kilometers
rows,22128,22128
sum,19720560.0,38344.374792
mean,891.203905,1.732844
median,900.0,1.346696
mode,"0 600.0 Name: trip_duration_seconds, dtype:...","0 0.870199 Name: trip_distance_kilometers, ..."
max,7200.0,14.143293
min,300.0,0.100057
std_dev,591.413283,1.440025
variance,349769.671803,2.073673
skewness,3.946126,2.191074


In [38]:
delay_0_5_next_station_station = delay_0_5_next[delay_0_5_next['end_stop_id'].notna() & delay_0_5_next['start_stop_id'].notna()]
delay_0_5_next_station_station

Unnamed: 0,nextbike_trips_id,nextbike_trips_start_time,nextbike_trips_end_time,nextbike_trips_start_at_station,nextbike_trips_end_at_station,vehicle type,trip_duration,trip_distance_kilometers,trip_distance_meters,start_location,end_location,start_stop_id,start_stop_name,start_buffer_zone,end_stop_id,end_stop_name,end_buffer_zone,trip_duration_seconds
1417189,37961351,2023-07-01 05:41:00,2023-07-01 05:56:00,False,False,bike,0 days 00:15:00,1.671334,1671.334291,"b'\x01\x01\x00\x00\x00\xd9\n\x9a\x96XY\x1c@""\x...",b'\x01\x01\x00\x00\x00\x82\x8d\xeb\xdf\xf5I\x1...,1151.0,Bonn Frankenbad/Kunstverein,215356 ...,43.0,Bonn Propsthof Nord,215356 POLYGON ((7.120193524138334 50.7395...,900.0
1220393,37717932,2023-07-01 05:56:00,2023-07-01 06:11:00,False,False,bike,0 days 00:15:00,1.377444,1377.444442,b'\x01\x01\x00\x00\x00\xcfj\x81=&R\x1c@\x7f\xa...,b'\x01\x01\x00\x00\x00\x9f\xe4\x0e\x9b\xc8L\x1...,8437.0,Bonn An Der Josefshöhe,215356 ...,1190.0,Bonn Zeisigweg,215356 POLYGON ((7.120193524138334 50.7395...,900.0
1235649,37726317,2023-07-01 06:06:00,2023-07-01 06:21:00,False,False,bike,0 days 00:15:00,2.055591,2055.591133,"b""\x01\x01\x00\x00\x00\xf0\xf8\xf6\xaeA_\x1c@\...",b'\x01\x01\x00\x00\x00B\xd1<\x80En\x1c@\x84*5{...,1182.0,Bonn Chlodwigplatz,215356 ...,685.0,Bonn Juridicum,215356 POLYGON ((7.120193524138334 50.7395...,900.0
1267255,37827523,2023-07-01 06:06:00,2023-07-01 06:16:00,False,False,bike,0 days 00:10:00,1.000358,1000.357907,b'\x01\x01\x00\x00\x00\xc0\x06D\x88+g\x1c@\xef...,b'\x01\x01\x00\x00\x00\x8f\x89\x94f\xf3h\x1c@f...,1160.0,Bonn Nordstr.,215356 ...,1115.0,Bonn Bertha-Von-Suttner-Pl./Beethovenhaus,215356 POLYGON ((7.120193524138334 50.7395...,600.0
1638081,37769500,2023-07-01 06:11:00,2023-07-01 06:21:00,False,True,bike,0 days 00:10:00,0.914302,914.302426,b'\x01\x01\x00\x00\x00\xc5V\xd0\xb4\xc4Z\x1c@\...,b'\x01\x01\x00\x00\x00\x06.\x8f5#c\x1c@\x9dJ\x...,1153.0,Bonn Heerstr.,215356 ...,1102.0,Bonn Friedensplatz,215356 POLYGON ((7.120193524138334 50.7395...,600.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1333820,37861619,2023-07-31 22:16:00,2023-07-31 22:26:00,False,False,bike,0 days 00:10:00,0.864220,864.220203,b'\x01\x01\x00\x00\x00h\x06\xf1\x81\x1d_\x1c@\...,b'\x01\x01\x00\x00\x00\x1fh\x05\x86\xac^\x1c@\...,1104.0,Bonn Stadthaus,215356 ...,1175.0,Bonn Lvr-Klinik,215356 POLYGON ((7.120193524138334 50.7395...,600.0
1476595,38040111,2023-07-31 22:26:00,2023-07-31 22:41:00,False,True,bike,0 days 00:15:00,1.658928,1658.927837,b'\x01\x01\x00\x00\x00\xb1\xa3q\xa8\xdfe\x1c@r...,b'\x01\x01\x00\x00\x00\x1c`\xe6;\xf8Y\x1c@.\xe...,1143.0,Bonn Poppelsdorfer Allee,215356 ...,1240.0,Bonn Poppelsdorfer Platz,215356 POLYGON ((7.120193524138334 50.7395...,900.0
1443891,38014658,2023-07-31 22:51:00,2023-07-31 23:06:00,True,False,bike,0 days 00:15:00,2.202721,2202.720792,b'\x01\x01\x00\x00\x00\x06.\x8f5#c\x1c@\x9dJ\x...,b'\x01\x01\x00\x00\x00\x0fc\xd2\xdfKQ\x1c@~\x1...,1102.0,Bonn Friedensplatz,215356 ...,1620.0,Bonn Max-Bruch-Str.,215356 POLYGON ((7.120193524138334 50.7395...,900.0
1441769,38012536,2023-07-31 23:21:00,2023-07-31 23:41:00,False,False,bike,0 days 00:20:00,3.058475,3058.475311,b'\x01\x01\x00\x00\x00}\xca1Y\xdc\x7f\x1c@ILP\...,b'\x01\x01\x00\x00\x00\x8b\xc0X\xdf\xc0d\x1c@&...,1502.0,Bonn Beueler Bahnhofsplatz,215356 ...,687.0,Bonn Hbf,215356 POLYGON ((7.120193524138334 50.7395...,1200.0


In [39]:
get_stats(delay_0_5_next_station_station, ['trip_duration_seconds', 'trip_distance_kilometers'])

column,trip_duration_seconds,trip_distance_kilometers
rows,10377,10377
sum,8439540.0,14854.21983
mean,813.292859,1.431456
median,600.0,1.178042
mode,"0 600.0 Name: trip_duration_seconds, dtype:...","0 0.834842 Name: trip_distance_kilometers, ..."
max,7200.0,9.640671
min,300.0,0.100057
std_dev,502.613868,1.07655
variance,252620.699981,1.15896
skewness,4.286541,1.835179


#### 5 - 10 min

In [40]:
delay_5_10 = delay[(delay['departure_delay'] > pd.Timedelta(5, unit='m')) & (delay['departure_delay'] <= pd.Timedelta(10, unit='m'))]

In [41]:
delay_5_10.describe()

Unnamed: 0,route_id,agency_id,route_type,service_id,direction_id,shape_id,stop_id,actual_arrival_time,actual_departure_time,vrs_timestamp,...,departure_delay_float,actual_arrival_time_float,actual_departure_time_float,arrival_delay_span,weather_int,time_span_int_arrival,weekend,time_span_int,delay_category,cancelled_trip
count,2454.0,2454.0,2454.0,2454.0,2454.0,2454.0,2454.0,2322,2454,2454.0,...,2454.0,2454.0,2454.0,2454.0,2454.0,2454.0,2454.0,2454.0,2454.0,2454.0
mean,610.746944,6.0,2.996333,1214.081907,0.854523,2956.93643,1465.579055,2023-07-16 02:53:29.437984512,2023-07-16 03:03:49.987774976,136350.244091,...,5.873472,126634.58639,133849.107579,0.0,0.147514,3.06357,0.458843,3.255909,0.646699,0.0
min,66.0,6.0,0.0,3.0,0.0,138.0,692.0,2023-07-01 09:39:00,2023-07-01 09:12:45,13803.0,...,5.25,0.0,1300.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0
25%,604.0,6.0,3.0,64.0,1.0,138.0,1183.0,2023-07-08 13:12:41.249999872,2023-07-08 13:57:45,100807.0,...,5.75,100715.0,104400.0,0.0,0.0,2.0,0.0,2.0,0.0,0.0
50%,605.0,6.0,3.0,64.0,1.0,1927.0,1183.0,2023-07-15 11:22:15,2023-07-15 11:37:45,135305.0,...,5.75,135700.0,141345.0,0.0,0.0,3.0,0.0,3.0,1.0,0.0
75%,610.0,6.0,3.0,64.0,1.0,5633.0,1711.0,2023-07-22 16:09:45,2023-07-22 17:57:45,165304.0,...,6.0,165233.75,170245.0,0.0,0.0,4.0,1.0,5.0,1.0,0.0
max,690.0,6.0,3.0,26835.0,1.0,10390.0,2576.0,2023-08-01 03:05:15,2023-08-01 03:05:30,235807.0,...,10.0,231630.0,232500.0,0.0,2.0,8.0,3.0,8.0,1.0,0.0
std,25.956892,0.0,0.10485,5385.800247,0.352653,2949.167632,555.744482,,,49166.127677,...,0.474197,52942.347738,45282.139456,0.0,0.427642,1.737583,0.800363,1.640957,0.579824,0.0


In [42]:
get_stats(delay_5_10, ['tier_trips_count', 'tier_trips_end_at_station_count', 'nextbike_trips_count', 'nextbike_trips_end_at_station_count', 'current_temp', 'current_precipitation_volume'])

  qd = iqr / (percentile_75 + percentile_25)
  cvqd = iqr / (percentile_75 + percentile_25)
  qd = iqr / (percentile_75 + percentile_25)
  cvqd = iqr / (percentile_75 + percentile_25)
  qd = iqr / (percentile_75 + percentile_25)
  cvqd = iqr / (percentile_75 + percentile_25)
  qd = iqr / (percentile_75 + percentile_25)
  cvqd = iqr / (percentile_75 + percentile_25)


column,tier_trips_count,tier_trips_end_at_station_count,nextbike_trips_count,nextbike_trips_end_at_station_count,current_temp,current_precipitation_volume
rows,2454,2454,2454,2454,2454,2454
sum,22,19,52,43,54503.69,182.47
mean,0.008965,0.007742,0.02119,0.017522,22.210143,0.651679
median,0.0,0.0,0.0,0.0,21.83,0.37
mode,"0 0 Name: tier_trips_count, dtype: int64","0 0 Name: tier_trips_end_at_station_count, ...","0 0 Name: nextbike_trips_count, dtype: int64",0 0 Name: nextbike_trips_end_at_station_cou...,"0 26.41 Name: current_temp, dtype: float64","0 0.21 Name: current_precipitation_volume, ..."
max,2,2,1,1,35.37,3.89
min,0,0,0,0,11.35,0.11
std_dev,0.098507,0.092201,0.144046,0.131234,4.531153,0.685458
variance,0.009704,0.008501,0.020749,0.017222,20.531344,0.469852
skewness,11.677419,12.760758,6.653425,7.358926,0.511788,2.655764


In [43]:
delay_5_10_tier = check_micromobility_datasets('tier', delay_5_10)

In [44]:
delay_5_10_tier.describe()

Unnamed: 0,tier_trips_id,tier_trips_start_time,tier_trips_end_time,trip_duration,trip_distance_kilometers,trip_distance_meters,start_stop_id,end_stop_id,trip_duration_seconds
count,22.0,22,22,22,22.0,22.0,18.0,11.0,22.0
mean,795278000.0,2023-07-13 23:28:51.818181632,2023-07-13 23:42:57.272727296,0 days 00:14:05.454545454,2.410794,2410.793874,1269.555556,1090.181818,845.454545
min,782280100.0,2023-07-01 13:30:00,2023-07-01 13:45:00,0 days 00:05:00,0.390051,390.050898,692.0,687.0,300.0
25%,782374200.0,2023-07-08 09:27:30,2023-07-08 09:36:15,0 days 00:10:00,0.748948,748.94828,692.0,1110.5,600.0
50%,782420100.0,2023-07-12 15:57:30,2023-07-12 16:02:30,0 days 00:10:00,1.463215,1463.215029,1183.0,1133.0,600.0
75%,808720300.0,2023-07-17 15:46:15,2023-07-17 16:03:45,0 days 00:15:00,2.520781,2520.781474,1213.25,1155.5,900.0
max,832223000.0,2023-07-31 15:20:00,2023-07-31 15:30:00,0 days 00:40:00,10.123796,10123.795853,2576.0,1504.0,2400.0
std,17122780.0,,,0 days 00:09:27.976586310,2.797647,2797.647023,684.910404,227.940702,567.976586


In [45]:
get_stats(delay_5_10_tier, ['trip_duration_seconds', 'trip_distance_kilometers'])

column,trip_duration_seconds,trip_distance_kilometers
rows,22,22
sum,18600.0,53.037465
mean,845.454545,2.410794
median,600.0,1.463215
mode,"0 600.0 Name: trip_duration_seconds, dtype:...",0 0.390051 1 0.436452 2 0.61939...
max,2400.0,10.123796
min,300.0,0.390051
std_dev,567.976586,2.797647
variance,322597.402597,7.826829
skewness,2.000618,2.167771


In [46]:
# get entries where trips start and end at station
delay_5_10_tier_station_station = delay_5_10_tier[delay_5_10_tier['end_stop_id'].notna() & delay_5_10_tier['start_stop_id'].notna()]
delay_5_10_tier_station_station

Unnamed: 0,tier_trips_id,tier_trips_start_time,tier_trips_end_time,vehicle type,trip_duration,trip_distance_kilometers,trip_distance_meters,start_stop_id,start_stop_name,start_buffer_zone,end_stop_id,end_stop_name,end_buffer_zone,trip_duration_seconds,start_location,end_location
2954193,782280105,2023-07-01 13:30:00,2023-07-01 13:45:00,e-scooter,0 days 00:15:00,1.62877,1628.770239,1183.0,Bonn Nonnstr.,801621 POLYGON ((7.157885983913106 50.7227...,1115.0,Bonn Bertha-Von-Suttner-Pl./Beethovenhaus,801621 ...,900.0,POINT (7.08681 50.74210),POINT (7.10086 50.73792)
3043431,782307012,2023-07-05 16:20:00,2023-07-05 16:40:00,e-scooter,0 days 00:20:00,2.556306,2556.305953,692.0,Bonn Heussallee/Museumsmeile,801621 POLYGON ((7.157885983913106 50.7227...,1504.0,Bonn Beuel Bf,801621 ...,1200.0,POINT (7.12001 50.71695),POINT (7.12632 50.73923)
3238421,782417636,2023-07-06 07:30:00,2023-07-06 07:40:00,e-scooter,0 days 00:10:00,0.93166,931.659837,1184.0,Bonn Gerhardsplatz,801621 POLYGON ((7.157885983913106 50.7227...,1151.0,Bonn Frankenbad/Kunstverein,801621 ...,600.0,POINT (7.08185 50.74277),POINT (7.08987 50.74031)
3201643,782383392,2023-07-07 17:55:00,2023-07-07 18:15:00,e-scooter,0 days 00:20:00,3.163479,3163.479249,692.0,Bonn Heussallee/Museumsmeile,801621 POLYGON ((7.157885983913106 50.7227...,1115.0,Bonn Bertha-Von-Suttner-Pl./Beethovenhaus,801621 ...,1200.0,POINT (7.12212 50.71662),POINT (7.10285 50.73771)
2968505,782284828,2023-07-08 09:10:00,2023-07-08 09:20:00,e-scooter,0 days 00:10:00,0.628472,628.472061,1184.0,Bonn Gerhardsplatz,801621 POLYGON ((7.157885983913106 50.7227...,1151.0,Bonn Frankenbad/Kunstverein,801621 ...,600.0,POINT (7.08261 50.74321),POINT (7.08714 50.73980)
3206720,782388469,2023-07-08 10:20:00,2023-07-08 10:25:00,e-scooter,0 days 00:05:00,0.61939,619.389508,1183.0,Bonn Nonnstr.,801621 POLYGON ((7.157885983913106 50.7227...,1182.0,Bonn Chlodwigplatz,801621 ...,300.0,POINT (7.08662 50.74240),POINT (7.09212 50.74329)
3054264,782310806,2023-07-09 08:55:00,2023-07-09 09:05:00,e-scooter,0 days 00:10:00,0.436452,436.451715,1223.0,Bonn Beringstr.,801621 POLYGON ((7.157885983913106 50.7227...,687.0,Bonn Hbf,801621 ...,600.0,POINT (7.09387 50.72798),POINT (7.09537 50.73164)
3227710,782407836,2023-07-12 15:30:00,2023-07-12 15:35:00,e-scooter,0 days 00:05:00,0.739498,739.497583,1183.0,Bonn Nonnstr.,801621 POLYGON ((7.157885983913106 50.7227...,688.0,Bonn West,801621 ...,300.0,POINT (7.08665 50.74249),POINT (7.08265 50.73712)
2996518,808702668,2023-07-14 13:20:00,2023-07-14 13:30:00,e-scooter,0 days 00:10:00,1.435885,1435.88478,2576.0,Bonn An Den Markthallen,801621 POLYGON ((7.157885983913106 50.7227...,1106.0,Bonn Thomas-Mann-Str.,801621 ...,600.0,POINT (7.08386 50.73999),POINT (7.09471 50.73293)
3102489,808775258,2023-07-24 18:30:00,2023-07-24 18:40:00,e-scooter,0 days 00:10:00,1.490545,1490.545277,1183.0,Bonn Nonnstr.,801621 POLYGON ((7.157885983913106 50.7227...,1160.0,Bonn Nordstr.,801621 ...,600.0,POINT (7.08689 50.74202),POINT (7.09994 50.74511)


In [47]:
delay_5_10_tier_station_station.describe()

Unnamed: 0,tier_trips_id,tier_trips_start_time,tier_trips_end_time,trip_duration,trip_distance_kilometers,trip_distance_meters,start_stop_id,end_stop_id,trip_duration_seconds
count,11.0,11,11,11,11.0,11.0,11.0,11.0,11.0
mean,789548400.0,2023-07-11 09:03:10.909090816,2023-07-11 09:14:32.727272704,0 days 00:11:21.818181818,1.458606,1458.60584,1179.545455,1090.181818,681.818182
min,782280100.0,2023-07-01 13:30:00,2023-07-01 13:45:00,0 days 00:05:00,0.436452,436.451715,692.0,687.0,300.0
25%,782308900.0,2023-07-07 00:42:30,2023-07-07 00:57:30,0 days 00:10:00,0.683985,683.984822,937.5,1110.5,600.0
50%,782388500.0,2023-07-08 10:20:00,2023-07-08 10:25:00,0 days 00:10:00,1.435885,1435.88478,1183.0,1133.0,600.0
75%,795560200.0,2023-07-13 14:25:00,2023-07-13 14:32:30,0 days 00:12:30,2.021489,2021.489137,1184.0,1155.5,750.0
max,808775300.0,2023-07-25 16:35:00,2023-07-25 16:45:00,0 days 00:20:00,3.163479,3163.479249,2576.0,1504.0,1200.0
std,12333060.0,,,0 days 00:05:02.714987465,0.910396,910.395557,516.252141,227.940702,302.714987


In [152]:
# get statistics for the tier trips that start and end at a station
get_stats(delay_5_10_tier_station_station, ['trip_duration_seconds', 'trip_distance_kilometers'])

column,trip_duration_seconds,trip_distance_kilometers
rows,11,11
sum,7500.0,16.044664
mean,681.818182,1.458606
median,600.0,1.435885
mode,"0 600.0 Name: trip_duration_seconds, dtype:...",0 0.436452 1 0.619390 2 0.628472 3...
max,1200.0,3.163479
min,300.0,0.436452
std_dev,302.714987,0.910396
variance,91636.363636,0.82882
skewness,0.766869,0.713744


In [49]:
delay_5_10_next = check_micromobility_datasets('next', delay_5_10)

In [50]:
delay_5_10_next.describe()

Unnamed: 0,nextbike_trips_id,nextbike_trips_start_time,nextbike_trips_end_time,trip_duration,trip_distance_kilometers,trip_distance_meters,start_stop_id,end_stop_id,trip_duration_seconds
count,52.0,52,52,52,52.0,52.0,45.0,29.0,52.0
mean,37950400.0,2023-07-17 01:29:04.615384320,2023-07-17 01:43:06.923077120,0 days 00:14:02.307692307,1.781021,1781.021339,1330.733333,1911.068966,842.307692
min,37719330.0,2023-07-01 12:11:00,2023-07-01 12:46:00,0 days 00:05:00,0.164955,164.954577,692.0,685.0,300.0
25%,37842890.0,2023-07-08 13:46:00,2023-07-08 14:07:15,0 days 00:10:00,0.902067,902.067479,1183.0,1102.0,600.0
50%,37964530.0,2023-07-18 03:43:30,2023-07-18 04:01:00,0 days 00:15:00,1.458764,1458.764211,1183.0,1161.0,900.0
75%,38088370.0,2023-07-25 08:37:15,2023-07-25 08:48:30,0 days 00:15:00,2.060162,2060.161988,1183.0,1192.0,900.0
max,38166900.0,2023-07-31 17:31:00,2023-07-31 18:06:00,0 days 00:35:00,5.762156,5762.156193,2576.0,8813.0,2100.0
std,139339.0,,,0 days 00:06:56.025147168,1.283411,1283.411326,616.333639,2384.068236,416.025147


In [51]:
get_stats(delay_5_10_next, ['trip_duration_seconds', 'trip_distance_kilometers'])

column,trip_duration_seconds,trip_distance_kilometers
rows,52,52
sum,43800.0,92.61311
mean,842.307692,1.781021
median,900.0,1.458764
mode,0 600.0 1 900.0 Name: trip_duration_seco...,0 0.164955 1 0.175660 2 0.197400 3...
max,2100.0,5.762156
min,300.0,0.164955
std_dev,416.025147,1.283411
variance,173076.923077,1.647145
skewness,1.23074,1.574864


In [52]:
# get entries where trips start and end at station
delay_5_10_next_station_station = delay_5_10_next[delay_5_10_next['end_stop_id'].notna() & delay_5_10_next['start_stop_id'].notna()]
delay_5_10_next_station_station

Unnamed: 0,nextbike_trips_id,nextbike_trips_start_time,nextbike_trips_end_time,nextbike_trips_start_at_station,nextbike_trips_end_at_station,vehicle type,trip_duration,trip_distance_kilometers,trip_distance_meters,start_location,end_location,start_stop_id,start_stop_name,start_buffer_zone,end_stop_id,end_stop_name,end_buffer_zone,trip_duration_seconds
1444704,37970404,2023-07-01 12:11:00,2023-07-01 12:46:00,False,False,bike,0 days 00:35:00,0.564402,564.401609,b'\x01\x01\x00\x00\x00j\x89\x95\xd1\xc8W\x1c@\...,b'\x01\x01\x00\x00\x00\xf5\xb8o\xb5N\\\x1c@\xe...,1183.0,Bonn Nonnstr.,215356 ...,1151.0,Bonn Frankenbad/Kunstverein,215356 POLYGON ((7.120193524138334 50.7395...,2100.0
1556802,38102588,2023-07-05 07:36:00,2023-07-05 07:51:00,False,False,bike,0 days 00:15:00,2.710476,2710.476298,b'\x01\x01\x00\x00\x00\x06\xbba\xdb\xa2|\x1c@\...,b'\x01\x01\x00\x00\x00\xcf\x11\xf9.\xa5\x8e\x1...,692.0,Bonn Heussallee/Museumsmeile,215356 ...,698.0,Bonn Max-Löbner-Str./Friesdorf,215356 POLYGON ((7.120193524138334 50.7395...,900.0
1673562,37804981,2023-07-07 14:11:00,2023-07-07 14:16:00,False,False,bike,0 days 00:05:00,0.646438,646.437728,b'\x01\x01\x00\x00\x00\x1cA*\xc5\x8eV\x1c@d\xa...,b'\x01\x01\x00\x00\x00X\xc8\\\x19T[\x1c@B`\xe5...,2576.0,Bonn An Den Markthallen,215356 ...,1182.0,Bonn Chlodwigplatz,215356 POLYGON ((7.120193524138334 50.7395...,300.0
1489643,38050026,2023-07-08 09:51:00,2023-07-08 10:01:00,False,False,bike,0 days 00:10:00,1.248637,1248.636922,b'\x01\x01\x00\x00\x00N%\x03@\x15W\x1c@\x1e\xf...,b'\x01\x01\x00\x00\x00\xb3\x96\x02\xd2\xfeW\x1...,1183.0,Bonn Nonnstr.,215356 ...,1178.0,Bonn Heinrich-Hertz-Europakolleg,215356 POLYGON ((7.120193524138334 50.7395...,600.0
1361527,37934413,2023-07-08 10:31:00,2023-07-08 10:46:00,False,False,bike,0 days 00:15:00,2.020271,2020.271427,b'\x01\x01\x00\x00\x00;\xa9/K;U\x1c@_F\xb1\xdc...,b'\x01\x01\x00\x00\x00\x1b\x85$\xb3zg\x1c@\xca...,1183.0,Bonn Nonnstr.,215356 ...,1115.0,Bonn Bertha-Von-Suttner-Pl./Beethovenhaus,215356 POLYGON ((7.120193524138334 50.7395...,900.0
1406318,37991178,2023-07-08 12:51:00,2023-07-08 13:06:00,False,False,bike,0 days 00:15:00,1.828186,1828.18603,b'\x01\x01\x00\x00\x00\x87\xfb\xc8\xadIW\x1c@%...,b'\x01\x01\x00\x00\x00\xd9\xb4R\x08\xe4b\x1c@\...,1183.0,Bonn Nonnstr.,215356 ...,1172.0,Bonn Bataverweg,215356 POLYGON ((7.120193524138334 50.7395...,900.0
1540470,38110598,2023-07-08 13:31:00,2023-07-08 13:41:00,False,False,bike,0 days 00:10:00,0.624678,624.677559,b'\x01\x01\x00\x00\x00\xec4\xd2Ry[\x1c@J\xec\x...,b'\x01\x01\x00\x00\x00N+\x85@.a\x1c@h?RD\x86]I@',1145.0,Bonn Haydnstr.,215356 ...,1142.0,Bonn Colmantstr./Hbf,215356 POLYGON ((7.120193524138334 50.7395...,600.0
1310464,37849272,2023-07-08 13:51:00,2023-07-08 14:16:00,False,False,bike,0 days 00:25:00,0.164955,164.954577,b'\x01\x01\x00\x00\x00\xc0]\xf6\xebNW\x1c@e\xc...,b'\x01\x01\x00\x00\x00\xeb\xe0`obX\x1c@\xbf\x8...,1183.0,Bonn Nonnstr.,215356 ...,1151.0,Bonn Frankenbad/Kunstverein,215356 POLYGON ((7.120193524138334 50.7395...,1500.0
1594630,38134425,2023-07-10 11:31:00,2023-07-10 11:46:00,False,False,bike,0 days 00:15:00,2.179834,2179.833668,b'\x01\x01\x00\x00\x00r\xa6\t\xdbOV\x1c@\xe6\\...,b'\x01\x01\x00\x00\x00\x1c\xb5\xc2\xf4\xbdV\x1...,1183.0,Bonn Nonnstr.,215356 ...,8813.0,Bonn Kranenweg,215356 POLYGON ((7.120193524138334 50.7395...,900.0
1360597,37933483,2023-07-10 13:41:00,2023-07-10 13:51:00,False,False,bike,0 days 00:10:00,1.455996,1455.995649,b'\x01\x01\x00\x00\x00\xcelW\xe8\x83U\x1c@\xd1...,b'\x01\x01\x00\x00\x00{\x13Cr2a\x1c@C\xe3\x89 ...,2576.0,Bonn An Den Markthallen,215356 ...,687.0,Bonn Hbf,215356 POLYGON ((7.120193524138334 50.7395...,600.0


In [53]:
delay_5_10_next_station_station.describe()

Unnamed: 0,nextbike_trips_id,nextbike_trips_start_time,nextbike_trips_end_time,trip_duration,trip_distance_kilometers,trip_distance_meters,start_stop_id,end_stop_id,trip_duration_seconds
count,29.0,29,29,29,29.0,29.0,29.0,29.0,29.0
mean,37972180.0,2023-07-15 09:28:45.517241600,2023-07-15 09:42:12.413793024,0 days 00:13:26.896551724,1.566029,1566.028995,1436.034483,1911.068966,806.896552
min,37719330.0,2023-07-01 12:11:00,2023-07-01 12:46:00,0 days 00:05:00,0.164955,164.954577,692.0,685.0,300.0
25%,37824950.0,2023-07-08 13:51:00,2023-07-08 14:16:00,0 days 00:10:00,0.649046,649.045864,1183.0,1102.0,600.0
50%,37973060.0,2023-07-12 18:11:00,2023-07-12 18:26:00,0 days 00:15:00,1.455996,1455.995649,1183.0,1161.0,900.0
75%,38110600.0,2023-07-19 19:01:00,2023-07-19 19:06:00,0 days 00:15:00,1.937655,1937.654927,1183.0,1192.0,900.0
max,38166320.0,2023-07-31 15:51:00,2023-07-31 16:11:00,0 days 00:35:00,4.592004,4592.003743,2576.0,8813.0,2100.0
std,145537.0,,,0 days 00:06:25.385738562,1.022162,1022.161581,605.614592,2384.068236,385.385739


In [54]:
# get statistics for the tier trips that start and end at a station
get_stats(delay_5_10_next_station_station, ['trip_duration_seconds', 'trip_distance_kilometers'])

column,trip_duration_seconds,trip_distance_kilometers
rows,29,29
sum,23400.0,45.414841
mean,806.896552,1.566029
median,900.0,1.455996
mode,0 600.0 1 900.0 Name: trip_duration_seco...,0 0.164955 1 0.175660 2 0.197400 3...
max,2100.0,4.592004
min,300.0,0.164955
std_dev,385.385739,1.022162
variance,148522.167488,1.044814
skewness,1.391253,1.067101


#### 10 - 15 min

In [55]:
delay_10_15 = delay[(delay['departure_delay'] > pd.Timedelta(10, unit='m')) & (delay['departure_delay'] <= pd.Timedelta(15, unit='m'))]

In [56]:
delay_10_15.describe()

Unnamed: 0,route_id,agency_id,route_type,service_id,direction_id,shape_id,stop_id,actual_arrival_time,actual_departure_time,vrs_timestamp,...,departure_delay_float,actual_arrival_time_float,actual_departure_time_float,arrival_delay_span,weather_int,time_span_int_arrival,weekend,time_span_int,delay_category,cancelled_trip
count,762.0,762.0,762.0,762.0,762.0,762.0,762.0,732,762,762.0,...,762.0,762.0,762.0,762.0,762.0,762.0,762.0,762.0,762.0,762.0
mean,642.12336,6.0,2.984252,78.496063,0.006562,2453.249344,6384.675853,2023-07-17 08:40:41.857923328,2023-07-17 06:36:14.639107584,144540.066929,...,12.719051,127707.296588,132346.870079,0.0,0.161417,2.981627,1.053806,3.095801,0.92126,0.0
min,63.0,6.0,0.0,3.0,0.0,1812.0,371.0,2023-07-01 05:51:00,2023-07-01 05:51:15,13803.0,...,10.5,0.0,11000.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0
25%,639.0,6.0,3.0,43.0,0.0,1812.0,1684.0,2023-07-09 07:51:56.249999872,2023-07-09 06:06:15,84930.5,...,12.25,65215.0,75115.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0
50%,639.0,6.0,3.0,64.0,0.0,1812.0,8629.0,2023-07-17 23:21:37.500000,2023-07-17 21:13:45,151306.0,...,12.25,130600.0,133572.5,0.0,0.0,3.0,0.0,3.0,1.0,0.0
75%,639.0,6.0,3.0,130.0,0.0,1812.0,8629.0,2023-07-25 02:11:22.500000,2023-07-25 02:11:07.500000,205804.0,...,14.0,195016.25,195100.0,0.0,0.0,5.0,2.0,5.0,1.0,0.0
max,690.0,6.0,3.0,194.0,1.0,10390.0,8815.0,2023-08-01 02:11:45,2023-08-01 02:12:00,235807.0,...,14.5,235215.0,235230.0,0.0,2.0,8.0,3.0,8.0,1.0,0.0
std,45.265524,0.0,0.216928,57.225796,0.080791,2218.31987,3299.779339,,,65682.285104,...,1.18435,73209.521656,69284.757773,0.0,0.418284,2.594278,1.18399,2.552988,0.389203,0.0


In [57]:
get_stats(delay_10_15, ['tier_trips_count', 'tier_trips_end_at_station_count', 'nextbike_trips_count', 'nextbike_trips_end_at_station_count', 'current_temp', 'current_precipitation_volume'])

  qd = iqr / (percentile_75 + percentile_25)
  cvqd = iqr / (percentile_75 + percentile_25)
  qd = iqr / (percentile_75 + percentile_25)
  cvqd = iqr / (percentile_75 + percentile_25)
  qd = iqr / (percentile_75 + percentile_25)
  cvqd = iqr / (percentile_75 + percentile_25)
  cvr = data_range / mean
  cvm = mean / mean
  cvmed = median / mean
  cv = std_dev / mean # standard deviation divided by the mean; measures the relative variability of a dataset; allows comparison of variability of datasets with different units of measurement
  qd = iqr / (percentile_75 + percentile_25)
  cvcv = std_dev / mean
  cvqd = iqr / (percentile_75 + percentile_25)
  cs = skewness / std_dev
  ck = kurtosis / std_dev
  cvs = skewness / std_dev
  cvk = kurtosis / std_dev
  cvcs = skewness / std_dev
  cvck = kurtosis / std_dev


column,tier_trips_count,tier_trips_end_at_station_count,nextbike_trips_count,nextbike_trips_end_at_station_count,current_temp,current_precipitation_volume
rows,762,762,762,762,762,762
sum,3,3,1,0,15697.93,53.21
mean,0.003937,0.003937,0.001312,0.0,20.600958,0.521667
median,0.0,0.0,0.0,0.0,20.015,0.34
mode,"0 0 Name: tier_trips_count, dtype: int64","0 0 Name: tier_trips_end_at_station_count, ...","0 0 Name: nextbike_trips_count, dtype: int64",0 0 Name: nextbike_trips_end_at_station_cou...,"0 16.98 Name: current_temp, dtype: float64","0 0.24 Name: current_precipitation_volume, ..."
max,1,1,1,0,35.37,3.46
min,0,0,0,0,11.35,0.11
std_dev,0.062663,0.062663,0.036226,0.0,4.710215,0.517261
variance,0.003927,0.003927,0.001312,0.0,22.186127,0.267559
skewness,15.87437,15.87437,27.604347,0.0,0.804294,3.555893


In [58]:
delay_10_15_tier = check_micromobility_datasets('tier', delay_10_15)

In [59]:
delay_10_15_tier.describe()

Unnamed: 0,tier_trips_id,tier_trips_start_time,tier_trips_end_time,trip_duration,trip_distance_kilometers,trip_distance_meters,start_stop_id,end_stop_id,trip_duration_seconds
count,3.0,3,3,3,3.0,3.0,0.0,2.0,3.0
mean,816520900.0,2023-07-21 17:10:00,2023-07-21 17:28:20,0 days 00:18:20,1.018496,1018.495953,,431.0,1100.0
min,808624200.0,2023-07-14 08:55:00,2023-07-14 09:30:00,0 days 00:10:00,0.630204,630.204221,,161.0,600.0
25%,808706800.0,2023-07-17 22:25:00,2023-07-17 22:47:30,0 days 00:10:00,0.845753,845.753169,,296.0,600.0
50%,808789300.0,2023-07-21 11:55:00,2023-07-21 12:05:00,0 days 00:10:00,1.061302,1061.302117,,431.0,600.0
75%,820469300.0,2023-07-25 09:17:30,2023-07-25 09:27:30,0 days 00:22:30,1.212642,1212.641819,,566.0,1350.0
max,832149300.0,2023-07-29 06:40:00,2023-07-29 06:50:00,0 days 00:35:00,1.363982,1363.981521,,701.0,2100.0
std,13534790.0,,,0 days 00:14:26.025403784,0.368757,368.756772,,381.837662,866.025404


In [153]:
get_stats(delay_10_15_tier, ['trip_duration_seconds', 'trip_distance_kilometers'])

column,trip_duration_seconds,trip_distance_kilometers
rows,3,3
sum,3300.0,3.055488
mean,1100.0,1.018496
median,600.0,1.061302
mode,"0 600.0 Name: trip_duration_seconds, dtype:...",0 0.630204 1 1.061302 2 1.363982 Name...
max,2100.0,1.363982
min,600.0,0.630204
std_dev,866.025404,0.368757
variance,750000.0,0.135982
skewness,1.732051,-0.515332


In [61]:
# get entries where trips start and end at station
delay_10_15_tier_station_station = delay_10_15_tier[delay_10_15_tier['end_stop_id'].notna() & delay_10_15_tier['start_stop_id'].notna()]
delay_10_15_tier_station_station

Unnamed: 0,tier_trips_id,tier_trips_start_time,tier_trips_end_time,vehicle type,trip_duration,trip_distance_kilometers,trip_distance_meters,start_stop_id,start_stop_name,start_buffer_zone,end_stop_id,end_stop_name,end_buffer_zone,trip_duration_seconds,start_location,end_location


In [62]:
delay_10_15_tier_station_station.describe()

Unnamed: 0,tier_trips_id,tier_trips_start_time,tier_trips_end_time,trip_duration,trip_distance_kilometers,trip_distance_meters,start_stop_id,end_stop_id,trip_duration_seconds
count,0.0,0,0,0,0.0,0.0,0.0,0.0,0.0
mean,,NaT,NaT,NaT,,,,,
min,,NaT,NaT,NaT,,,,,
25%,,NaT,NaT,NaT,,,,,
50%,,NaT,NaT,NaT,,,,,
75%,,NaT,NaT,NaT,,,,,
max,,NaT,NaT,NaT,,,,,
std,,,,NaT,,,,,


In [63]:
# get statistics for the tier trips that start and end at a station
get_stats(delay_10_15_tier_station_station, ['trip_duration_seconds', 'trip_distance_kilometers'])

column,trip_duration_seconds,trip_distance_kilometers
rows,0,0
sum,0.0,0.0
mean,,
median,,
mode,"Series([], Name: trip_duration_seconds, dtype:...","Series([], Name: trip_distance_kilometers, dty..."
max,,
min,,
std_dev,,
variance,,
skewness,,


In [64]:
delay_10_15_next = check_micromobility_datasets('next', delay_10_15)

In [65]:
delay_10_15_next.describe()

Unnamed: 0,nextbike_trips_id,nextbike_trips_start_time,nextbike_trips_end_time,trip_duration,trip_distance_kilometers,trip_distance_meters,start_stop_id,end_stop_id,trip_duration_seconds
count,1.0,1,1,1,1.0,1.0,0.0,0.0,1.0
mean,37814106.0,2023-07-06 07:56:00,2023-07-06 08:01:00,0 days 00:05:00,0.15428,154.280229,,,300.0
min,37814106.0,2023-07-06 07:56:00,2023-07-06 08:01:00,0 days 00:05:00,0.15428,154.280229,,,300.0
25%,37814106.0,2023-07-06 07:56:00,2023-07-06 08:01:00,0 days 00:05:00,0.15428,154.280229,,,300.0
50%,37814106.0,2023-07-06 07:56:00,2023-07-06 08:01:00,0 days 00:05:00,0.15428,154.280229,,,300.0
75%,37814106.0,2023-07-06 07:56:00,2023-07-06 08:01:00,0 days 00:05:00,0.15428,154.280229,,,300.0
max,37814106.0,2023-07-06 07:56:00,2023-07-06 08:01:00,0 days 00:05:00,0.15428,154.280229,,,300.0
std,,,,NaT,,,,,


In [66]:
get_stats(delay_10_15_next, ['trip_duration_seconds', 'trip_distance_kilometers'])

column,trip_duration_seconds,trip_distance_kilometers
rows,1,1
sum,300.0,0.15428
mean,300.0,0.15428
median,300.0,0.15428
mode,"0 300.0 Name: trip_duration_seconds, dtype:...","0 0.15428 Name: trip_distance_kilometers, d..."
max,300.0,0.15428
min,300.0,0.15428
std_dev,,
variance,,
skewness,,


In [67]:
# get entries where trips start and end at station
delay_10_15_next_station_station = delay_10_15_next[delay_10_15_next['end_stop_id'].notna() & delay_10_15_next['start_stop_id'].notna()]
delay_10_15_next_station_station

Unnamed: 0,nextbike_trips_id,nextbike_trips_start_time,nextbike_trips_end_time,nextbike_trips_start_at_station,nextbike_trips_end_at_station,vehicle type,trip_duration,trip_distance_kilometers,trip_distance_meters,start_location,end_location,start_stop_id,start_stop_name,start_buffer_zone,end_stop_id,end_stop_name,end_buffer_zone,trip_duration_seconds


In [68]:
delay_10_15_next_station_station.describe()

Unnamed: 0,nextbike_trips_id,nextbike_trips_start_time,nextbike_trips_end_time,trip_duration,trip_distance_kilometers,trip_distance_meters,start_stop_id,end_stop_id,trip_duration_seconds
count,0.0,0,0,0,0.0,0.0,0.0,0.0,0.0
mean,,NaT,NaT,NaT,,,,,
min,,NaT,NaT,NaT,,,,,
25%,,NaT,NaT,NaT,,,,,
50%,,NaT,NaT,NaT,,,,,
75%,,NaT,NaT,NaT,,,,,
max,,NaT,NaT,NaT,,,,,
std,,,,NaT,,,,,


In [69]:
# get statistics for the tier trips that start and end at a station
get_stats(delay_10_15_next_station_station, ['trip_duration_seconds', 'trip_distance_kilometers'])

column,trip_duration_seconds,trip_distance_kilometers
rows,0,0
sum,0.0,0.0
mean,,
median,,
mode,"Series([], Name: trip_duration_seconds, dtype:...","Series([], Name: trip_distance_kilometers, dty..."
max,,
min,,
std_dev,,
variance,,
skewness,,


#### 15 - 20 min

In [70]:
delay_15_20 = delay[(delay['departure_delay'] > pd.Timedelta(15, unit='m')) & (delay['departure_delay'] <= pd.Timedelta(20, unit='m'))]

In [155]:
get_stats(delay_15_20, ['tier_trips_count', 'tier_trips_end_at_station_count', 'nextbike_trips_count', 'nextbike_trips_end_at_station_count', 'current_temp', 'current_precipitation_volume'])

  qd = iqr / (percentile_75 + percentile_25)
  cvqd = iqr / (percentile_75 + percentile_25)
  qd = iqr / (percentile_75 + percentile_25)
  cvqd = iqr / (percentile_75 + percentile_25)
  qd = iqr / (percentile_75 + percentile_25)
  cvqd = iqr / (percentile_75 + percentile_25)
  qd = iqr / (percentile_75 + percentile_25)
  cvqd = iqr / (percentile_75 + percentile_25)


column,tier_trips_count,tier_trips_end_at_station_count,nextbike_trips_count,nextbike_trips_end_at_station_count,current_temp,current_precipitation_volume
rows,1520,1520,1520,1520,1520,1520
sum,27,22,18,10,32061.25,118.49
mean,0.017763,0.014474,0.011842,0.006579,21.092928,0.553692
median,0.0,0.0,0.0,0.0,20.5,0.34
mode,"0 0 Name: tier_trips_count, dtype: int64","0 0 Name: tier_trips_end_at_station_count, ...","0 0 Name: nextbike_trips_count, dtype: int64",0 0 Name: nextbike_trips_end_at_station_cou...,"0 32.3 Name: current_temp, dtype: float64","0 0.24 Name: current_precipitation_volume, ..."
max,2,2,2,2,35.37,3.46
min,0,0,0,0,11.35,0.13
std_dev,0.137025,0.124861,0.119762,0.088638,4.856947,0.583974
variance,0.018776,0.01559,0.014343,0.007857,23.589936,0.341026
skewness,8.063746,9.132069,11.214788,14.922007,0.78043,3.291896


In [71]:
delay_15_20.describe()

Unnamed: 0,route_id,agency_id,route_type,service_id,direction_id,shape_id,stop_id,actual_arrival_time,actual_departure_time,vrs_timestamp,...,departure_delay_float,actual_arrival_time_float,actual_departure_time_float,arrival_delay_span,weather_int,time_span_int_arrival,weekend,time_span_int,delay_category,cancelled_trip
count,1520.0,1520.0,1520.0,1520.0,1520.0,1520.0,1520.0,1416,1520,1520.0,...,1520.0,1520.0,1520.0,1520.0,1520.0,1520.0,1520.0,1520.0,1520.0,1520.0
mean,642.667763,6.0,2.996053,73.898684,0.101974,2734.119079,1916.111842,2023-07-17 01:01:13.975988480,2023-07-16 22:04:45.101974016,143248.289474,...,17.807621,126319.440789,134867.286184,0.0,0.171711,2.933553,0.945395,3.128289,0.863158,0.0
min,63.0,6.0,0.0,3.0,0.0,1812.0,690.0,2023-07-01 05:53:00,2023-07-01 05:53:15,13803.0,...,15.666667,0.0,11300.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0
25%,639.0,6.0,3.0,64.0,0.0,1812.0,1604.0,2023-07-09 08:54:37.500000,2023-07-08 23:54:11.249999872,91305.0,...,16.25,73730.0,85315.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0
50%,639.0,6.0,3.0,64.0,0.0,1812.0,1683.0,2023-07-17 20:38:00,2023-07-17 16:53:15,144304.0,...,17.75,133730.0,140745.0,0.0,0.0,3.0,0.0,3.0,1.0,0.0
75%,639.0,6.0,3.0,130.0,0.0,1927.0,1684.0,2023-07-24 16:46:00,2023-07-24 15:16:15,200304.0,...,18.75,185445.0,190845.0,0.0,0.0,5.0,2.0,5.0,1.0,0.0
max,690.0,6.0,3.0,194.0,1.0,10390.0,9477.0,2023-08-01 02:14:30,2023-08-01 02:14:45,235807.0,...,20.0,235445.0,235500.0,0.0,2.0,8.0,3.0,8.0,1.0,0.0
std,25.525049,0.0,0.108786,48.789456,0.302713,1797.317784,1366.642087,,,62072.09176,...,1.41611,71537.21483,64282.011369,0.0,0.429479,2.427933,1.142992,2.354661,0.5051,0.0


In [72]:
delay_15_20_tier = check_micromobility_datasets('tier', delay_15_20)

In [73]:
delay_15_20_tier.describe()

Unnamed: 0,tier_trips_id,tier_trips_start_time,tier_trips_end_time,trip_duration,trip_distance_kilometers,trip_distance_meters,start_stop_id,end_stop_id,trip_duration_seconds
count,27.0,27,27,27,27.0,27.0,4.0,4.0,27.0
mean,805141200.0,2023-07-18 16:45:44.444444160,2023-07-18 17:01:28.888888832,0 days 00:15:44.444444444,1.527425,1527.424936,161.0,348.5,944.444444
min,782272800.0,2023-07-06 16:55:00,2023-07-06 17:05:00,0 days 00:05:00,0.131124,131.124011,161.0,161.0,300.0
25%,782453400.0,2023-07-10 01:35:00,2023-07-10 01:47:30,0 days 00:10:00,0.860767,860.766899,161.0,161.0,600.0
50%,808645700.0,2023-07-21 14:55:00,2023-07-21 15:10:00,0 days 00:10:00,1.402521,1402.521024,161.0,266.0,600.0
75%,808769900.0,2023-07-25 06:37:30,2023-07-25 06:47:30,0 days 00:17:30,2.223972,2223.97226,161.0,453.5,1050.0
max,832279700.0,2023-07-30 09:40:00,2023-07-30 10:00:00,0 days 01:35:00,3.126989,3126.989456,161.0,701.0,5700.0
std,18802730.0,,,0 days 00:16:51.219116989,0.873214,873.214056,0.0,255.0,1011.219117


In [74]:
get_stats(delay_15_20_tier, ['trip_duration_seconds', 'trip_distance_kilometers'])

column,trip_duration_seconds,trip_distance_kilometers
rows,27,27
sum,25500.0,41.240473
mean,944.444444,1.527425
median,600.0,1.402521
mode,"0 600.0 Name: trip_duration_seconds, dtype:...",0 0.131124 1 0.157112 2 0.445286 3...
max,5700.0,3.126989
min,300.0,0.131124
std_dev,1011.219117,0.873214
variance,1022564.102564,0.762503
skewness,4.274083,0.201332


In [75]:
# get entries where trips start and end at station
delay_15_20_tier_station_station = delay_15_20_tier[delay_15_20_tier['end_stop_id'].notna() & delay_15_20_tier['start_stop_id'].notna()]
delay_15_20_tier_station_station

Unnamed: 0,tier_trips_id,tier_trips_start_time,tier_trips_end_time,vehicle type,trip_duration,trip_distance_kilometers,trip_distance_meters,start_stop_id,start_stop_name,start_buffer_zone,end_stop_id,end_stop_name,end_buffer_zone,trip_duration_seconds,start_location,end_location


In [76]:
delay_15_20_tier_station_station.describe()

Unnamed: 0,tier_trips_id,tier_trips_start_time,tier_trips_end_time,trip_duration,trip_distance_kilometers,trip_distance_meters,start_stop_id,end_stop_id,trip_duration_seconds
count,0.0,0,0,0,0.0,0.0,0.0,0.0,0.0
mean,,NaT,NaT,NaT,,,,,
min,,NaT,NaT,NaT,,,,,
25%,,NaT,NaT,NaT,,,,,
50%,,NaT,NaT,NaT,,,,,
75%,,NaT,NaT,NaT,,,,,
max,,NaT,NaT,NaT,,,,,
std,,,,NaT,,,,,


In [77]:
# get statistics for the tier trips that start and end at a station
get_stats(delay_15_20_tier_station_station, ['trip_duration_seconds', 'trip_distance_kilometers'])

column,trip_duration_seconds,trip_distance_kilometers
rows,0,0
sum,0.0,0.0
mean,,
median,,
mode,"Series([], Name: trip_duration_seconds, dtype:...","Series([], Name: trip_distance_kilometers, dty..."
max,,
min,,
std_dev,,
variance,,
skewness,,


In [78]:
delay_15_20_next = check_micromobility_datasets('next', delay_15_20)

In [79]:
delay_15_20_next.describe()

Unnamed: 0,nextbike_trips_id,nextbike_trips_start_time,nextbike_trips_end_time,trip_duration,trip_distance_kilometers,trip_distance_meters,start_stop_id,end_stop_id,trip_duration_seconds
count,18.0,18,18,18,18.0,18.0,3.0,3.0,18.0
mean,37955450.0,2023-07-16 11:42:56.666666752,2023-07-16 11:57:23.333333504,0 days 00:14:26.666666666,1.719562,1719.562186,161.0,231.0,866.666667
min,37778860.0,2023-07-06 12:21:00,2023-07-06 12:31:00,0 days 00:05:00,0.161265,161.264705,161.0,161.0,300.0
25%,37848550.0,2023-07-09 23:24:45,2023-07-09 23:38:30,0 days 00:10:00,0.6676,667.60022,161.0,161.0,600.0
50%,37938440.0,2023-07-16 14:21:00,2023-07-16 14:31:00,0 days 00:15:00,1.683806,1683.805822,161.0,161.0,900.0
75%,38090360.0,2023-07-21 04:58:30,2023-07-21 05:21:00,0 days 00:15:00,2.389014,2389.014245,161.0,266.0,900.0
max,38165730.0,2023-07-28 10:21:00,2023-07-28 10:41:00,0 days 00:40:00,5.170561,5170.560811,161.0,371.0,2400.0
std,135247.3,,,0 days 00:08:01.419469779,1.288825,1288.824586,0.0,121.243557,481.41947


In [80]:
get_stats(delay_15_20_next, ['trip_duration_seconds', 'trip_distance_kilometers'])

column,trip_duration_seconds,trip_distance_kilometers
rows,18,18
sum,15600.0,30.952119
mean,866.666667,1.719562
median,900.0,1.683806
mode,"0 900.0 Name: trip_duration_seconds, dtype:...",0 0.161265 1 0.264935 2 0.413614 3...
max,2400.0,5.170561
min,300.0,0.161265
std_dev,481.41947,1.288825
variance,231764.705882,1.661069
skewness,2.027633,1.080226


In [81]:
# get entries where trips start and end at station
delay_15_20_next_station_station = delay_15_20_next[delay_15_20_next['end_stop_id'].notna() & delay_15_20_next['start_stop_id'].notna()]
delay_15_20_next_station_station

Unnamed: 0,nextbike_trips_id,nextbike_trips_start_time,nextbike_trips_end_time,nextbike_trips_start_at_station,nextbike_trips_end_at_station,vehicle type,trip_duration,trip_distance_kilometers,trip_distance_meters,start_location,end_location,start_stop_id,start_stop_name,start_buffer_zone,end_stop_id,end_stop_name,end_buffer_zone,trip_duration_seconds


In [82]:
delay_15_20_next_station_station.describe()

Unnamed: 0,nextbike_trips_id,nextbike_trips_start_time,nextbike_trips_end_time,trip_duration,trip_distance_kilometers,trip_distance_meters,start_stop_id,end_stop_id,trip_duration_seconds
count,0.0,0,0,0,0.0,0.0,0.0,0.0,0.0
mean,,NaT,NaT,NaT,,,,,
min,,NaT,NaT,NaT,,,,,
25%,,NaT,NaT,NaT,,,,,
50%,,NaT,NaT,NaT,,,,,
75%,,NaT,NaT,NaT,,,,,
max,,NaT,NaT,NaT,,,,,
std,,,,NaT,,,,,


In [83]:
# get statistics for the tier trips that start and end at a station
get_stats(delay_15_20_next_station_station, ['trip_duration_seconds', 'trip_distance_kilometers'])

column,trip_duration_seconds,trip_distance_kilometers
rows,0,0
sum,0.0,0.0
mean,,
median,,
mode,"Series([], Name: trip_duration_seconds, dtype:...","Series([], Name: trip_distance_kilometers, dty..."
max,,
min,,
std_dev,,
variance,,
skewness,,


In [84]:
get_stats(delay_15_20, ['tier_trips_count', 'tier_trips_end_at_station_count', 'nextbike_trips_count', 'nextbike_trips_end_at_station_count', 'current_temp', 'current_precipitation_volume'])

  qd = iqr / (percentile_75 + percentile_25)
  cvqd = iqr / (percentile_75 + percentile_25)
  qd = iqr / (percentile_75 + percentile_25)
  cvqd = iqr / (percentile_75 + percentile_25)
  qd = iqr / (percentile_75 + percentile_25)
  cvqd = iqr / (percentile_75 + percentile_25)
  qd = iqr / (percentile_75 + percentile_25)
  cvqd = iqr / (percentile_75 + percentile_25)


column,tier_trips_count,tier_trips_end_at_station_count,nextbike_trips_count,nextbike_trips_end_at_station_count,current_temp,current_precipitation_volume
rows,1520,1520,1520,1520,1520,1520
sum,27,22,18,10,32061.25,118.49
mean,0.017763,0.014474,0.011842,0.006579,21.092928,0.553692
median,0.0,0.0,0.0,0.0,20.5,0.34
mode,"0 0 Name: tier_trips_count, dtype: int64","0 0 Name: tier_trips_end_at_station_count, ...","0 0 Name: nextbike_trips_count, dtype: int64",0 0 Name: nextbike_trips_end_at_station_cou...,"0 32.3 Name: current_temp, dtype: float64","0 0.24 Name: current_precipitation_volume, ..."
max,2,2,2,2,35.37,3.46
min,0,0,0,0,11.35,0.13
std_dev,0.137025,0.124861,0.119762,0.088638,4.856947,0.583974
variance,0.018776,0.01559,0.014343,0.007857,23.589936,0.341026
skewness,8.063746,9.132069,11.214788,14.922007,0.78043,3.291896


#### 20 - 30 min

In [85]:
delay_20_30 = delay[(delay['departure_delay'] > pd.Timedelta(20, unit='m')) & (delay['departure_delay'] <= pd.Timedelta(30, unit='m'))]

In [86]:
delay_20_30.describe()

Unnamed: 0,route_id,agency_id,route_type,service_id,direction_id,shape_id,stop_id,actual_arrival_time,actual_departure_time,vrs_timestamp,...,departure_delay_float,actual_arrival_time_float,actual_departure_time_float,arrival_delay_span,weather_int,time_span_int_arrival,weekend,time_span_int,delay_category,cancelled_trip
count,1602.0,1602.0,1602.0,1602.0,1602.0,1602.0,1602.0,1518,1602,1602.0,...,1602.0,1602.0,1602.0,1602.0,1602.0,1602.0,1602.0,1602.0,1602.0,1602.0
mean,635.348315,6.0,2.925094,747.900749,0.111735,3314.405119,3055.265293,2023-07-16 23:19:38.557312256,2023-07-16 23:28:21.891386112,148341.237203,...,23.225655,108716.360799,115416.972534,0.0,0.15231,2.914482,0.589263,3.070537,0.895131,0.0
min,66.0,6.0,0.0,3.0,0.0,1812.0,699.0,2023-07-01 02:13:45,2023-07-01 02:14:00,2304.0,...,20.25,0.0,10900.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0
25%,639.0,6.0,3.0,64.0,0.0,1812.0,1681.0,2023-07-09 01:31:00,2023-07-09 02:15:18.750000128,93303.0,...,22.0,31645.0,64100.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0
50%,639.0,6.0,3.0,64.0,0.0,1812.0,1681.0,2023-07-17 14:11:15,2023-07-17 12:40:37.500000,151304.0,...,22.0,114045.0,121057.5,0.0,0.0,3.0,0.0,3.0,1.0,0.0
75%,639.0,6.0,3.0,64.0,0.0,5412.0,1687.0,2023-07-24 23:40:45,2023-07-25 01:15:30,220309.0,...,24.0,171000.0,171100.0,0.0,0.0,5.0,1.0,5.0,1.0,0.0
max,689.0,6.0,3.0,26835.0,1.0,8540.0,9706.0,2023-08-01 03:14:30,2023-08-01 03:14:45,235807.0,...,30.0,235545.0,235600.0,0.0,2.0,8.0,3.0,8.0,1.0,0.0
std,93.367246,0.0,0.468236,4176.1759,0.315139,2051.108931,3007.306145,,,65259.882443,...,2.837142,68868.807797,65589.666641,0.0,0.411299,2.459831,0.949616,2.41107,0.445942,0.0


In [87]:
get_stats(delay_20_30, ['tier_trips_count', 'tier_trips_end_at_station_count', 'nextbike_trips_count', 'nextbike_trips_end_at_station_count', 'current_temp', 'current_precipitation_volume'])

  qd = iqr / (percentile_75 + percentile_25)
  cvqd = iqr / (percentile_75 + percentile_25)
  qd = iqr / (percentile_75 + percentile_25)
  cvqd = iqr / (percentile_75 + percentile_25)
  qd = iqr / (percentile_75 + percentile_25)
  cvqd = iqr / (percentile_75 + percentile_25)
  qd = iqr / (percentile_75 + percentile_25)
  cvqd = iqr / (percentile_75 + percentile_25)


column,tier_trips_count,tier_trips_end_at_station_count,nextbike_trips_count,nextbike_trips_end_at_station_count,current_temp,current_precipitation_volume
rows,1602,1602,1602,1602,1602,1602
sum,8,7,20,13,33298.98,112.42
mean,0.004994,0.00437,0.012484,0.008115,20.78588,0.5621
median,0.0,0.0,0.0,0.0,20.43,0.36
mode,"0 0 Name: tier_trips_count, dtype: int64","0 0 Name: tier_trips_end_at_station_count, ...","0 0 Name: nextbike_trips_count, dtype: int64",0 0 Name: nextbike_trips_end_at_station_cou...,"0 21.84 Name: current_temp, dtype: float64","0 0.49 Name: current_precipitation_volume, ..."
max,1,1,2,2,35.37,3.46
min,0,0,0,0,11.35,0.11
std_dev,0.070512,0.065979,0.121798,0.096453,4.707146,0.574049
variance,0.004972,0.004353,0.014835,0.009303,22.157227,0.329532
skewness,14.057917,15.042779,10.767065,12.988661,0.692083,3.407679


In [88]:
delay_20_30_tier = check_micromobility_datasets('tier', delay_20_30)

In [89]:
delay_20_30_tier.describe()

Unnamed: 0,tier_trips_id,tier_trips_start_time,tier_trips_end_time,trip_duration,trip_distance_kilometers,trip_distance_meters,start_stop_id,end_stop_id,trip_duration_seconds
count,7.0,7,7,7,7.0,7.0,3.0,1.0,7.0
mean,789895600.0,2023-07-10 06:47:08.571428608,2023-07-10 07:00:00,0 days 00:12:51.428571428,1.354909,1354.908579,607.333333,1500.0,771.428571
min,782270000.0,2023-07-05 20:25:00,2023-07-05 20:40:00,0 days 00:10:00,0.279503,279.50332,161.0,1500.0,600.0
25%,782342900.0,2023-07-07 03:17:30,2023-07-07 03:30:00,0 days 00:10:00,0.852838,852.838103,161.0,1500.0,600.0
50%,782445300.0,2023-07-08 01:55:00,2023-07-08 02:05:00,0 days 00:15:00,1.367302,1367.30227,161.0,1500.0,900.0
75%,795546400.0,2023-07-12 14:50:00,2023-07-12 15:05:00,0 days 00:15:00,1.916233,1916.232973,830.5,1500.0,900.0
max,808775600.0,2023-07-18 12:55:00,2023-07-18 13:05:00,0 days 00:15:00,2.299412,2299.412309,1500.0,1500.0,900.0
std,12852880.0,,,0 days 00:02:40.356745147,0.739804,739.803715,773.07201,,160.356745


In [90]:
get_stats(delay_20_30_tier, ['trip_duration_seconds', 'trip_distance_kilometers'])

column,trip_duration_seconds,trip_distance_kilometers
rows,7,7
sum,5400.0,9.48436
mean,771.428571,1.354909
median,900.0,1.367302
mode,"0 900.0 Name: trip_duration_seconds, dtype:...",0 0.279503 1 0.628692 2 1.076985 3 ...
max,900.0,2.299412
min,600.0,0.279503
std_dev,160.356745,0.739804
variance,25714.285714,0.54731
skewness,-0.374166,-0.260086


In [91]:
# get entries where trips start and end at station
delay_20_30_tier_station_station = delay_20_30_tier[delay_20_30_tier['end_stop_id'].notna() & delay_20_30_tier['start_stop_id'].notna()]
delay_20_30_tier_station_station

Unnamed: 0,tier_trips_id,tier_trips_start_time,tier_trips_end_time,vehicle type,trip_duration,trip_distance_kilometers,trip_distance_meters,start_stop_id,start_stop_name,start_buffer_zone,end_stop_id,end_stop_name,end_buffer_zone,trip_duration_seconds,start_location,end_location
3268899,782445298,2023-07-06 11:45:00,2023-07-06 12:00:00,e-scooter,0 days 00:15:00,0.279503,279.50332,1500.0,Bonn Konrad-Adenauer-Platz,801621 POLYGON ((7.157885983913106 50.7227...,1500.0,Bonn Konrad-Adenauer-Platz,801621 ...,900.0,POINT (7.11722 50.73951),POINT (7.11960 50.74032)


In [92]:
delay_20_30_tier_station_station.describe()

Unnamed: 0,tier_trips_id,tier_trips_start_time,tier_trips_end_time,trip_duration,trip_distance_kilometers,trip_distance_meters,start_stop_id,end_stop_id,trip_duration_seconds
count,1.0,1,1,1,1.0,1.0,1.0,1.0,1.0
mean,782445298.0,2023-07-06 11:45:00,2023-07-06 12:00:00,0 days 00:15:00,0.279503,279.50332,1500.0,1500.0,900.0
min,782445298.0,2023-07-06 11:45:00,2023-07-06 12:00:00,0 days 00:15:00,0.279503,279.50332,1500.0,1500.0,900.0
25%,782445298.0,2023-07-06 11:45:00,2023-07-06 12:00:00,0 days 00:15:00,0.279503,279.50332,1500.0,1500.0,900.0
50%,782445298.0,2023-07-06 11:45:00,2023-07-06 12:00:00,0 days 00:15:00,0.279503,279.50332,1500.0,1500.0,900.0
75%,782445298.0,2023-07-06 11:45:00,2023-07-06 12:00:00,0 days 00:15:00,0.279503,279.50332,1500.0,1500.0,900.0
max,782445298.0,2023-07-06 11:45:00,2023-07-06 12:00:00,0 days 00:15:00,0.279503,279.50332,1500.0,1500.0,900.0
std,,,,NaT,,,,,


In [93]:
# get statistics for the tier trips that start and end at a station
get_stats(delay_20_30_tier_station_station, ['trip_duration_seconds', 'trip_distance_kilometers'])

column,trip_duration_seconds,trip_distance_kilometers
rows,1,1
sum,900.0,0.279503
mean,900.0,0.279503
median,900.0,0.279503
mode,"0 900.0 Name: trip_duration_seconds, dtype:...","0 0.279503 Name: trip_distance_kilometers, ..."
max,900.0,0.279503
min,900.0,0.279503
std_dev,,
variance,,
skewness,,


In [94]:
delay_20_30_next = check_micromobility_datasets('next', delay_20_30)

In [95]:
delay_20_30_next.describe()

Unnamed: 0,nextbike_trips_id,nextbike_trips_start_time,nextbike_trips_end_time,trip_duration,trip_distance_kilometers,trip_distance_meters,start_stop_id,end_stop_id,trip_duration_seconds
count,20.0,20,20,20,20.0,20.0,20.0,3.0,20.0
mean,37954960.0,2023-07-14 23:21:15,2023-07-14 23:35:00.000000512,0 days 00:13:45,1.591278,1591.277943,227.95,837.0,825.0
min,37754430.0,2023-07-03 14:51:00,2023-07-03 14:56:00,0 days 00:05:00,0.376674,376.67378,161.0,699.0,300.0
25%,37839900.0,2023-07-07 15:21:00,2023-07-07 15:43:30,0 days 00:10:00,0.836017,836.017068,161.0,700.0,600.0
50%,37926480.0,2023-07-14 14:51:00,2023-07-14 15:06:00,0 days 00:15:00,1.252239,1252.23909,161.0,701.0,900.0
75%,38104130.0,2023-07-20 05:21:00,2023-07-20 05:34:45,0 days 00:20:00,2.315183,2315.183214,161.0,906.0,1200.0
max,38165090.0,2023-07-31 15:21:00,2023-07-31 15:36:00,0 days 00:25:00,4.141524,4141.523718,1500.0,1111.0,1500.0
std,141095.8,,,0 days 00:05:49.247310956,1.119358,1119.35754,299.409502,237.293068,349.247311


In [96]:
get_stats(delay_20_30_next, ['trip_duration_seconds', 'trip_distance_kilometers'])

column,trip_duration_seconds,trip_distance_kilometers
rows,20,20
sum,16500.0,31.825559
mean,825.0,1.591278
median,900.0,1.252239
mode,"0 600.0 Name: trip_duration_seconds, dtype:...","0 0.376674 Name: trip_distance_kilometers, ..."
max,1500.0,4.141524
min,300.0,0.376674
std_dev,349.247311,1.119358
variance,121973.684211,1.252961
skewness,0.097297,1.085103


In [97]:
# get entries where trips start and end at station
delay_20_30_next_station_station = delay_20_30_next[delay_20_30_next['end_stop_id'].notna() & delay_20_30_next['start_stop_id'].notna()]
delay_20_30_next_station_station

Unnamed: 0,nextbike_trips_id,nextbike_trips_start_time,nextbike_trips_end_time,nextbike_trips_start_at_station,nextbike_trips_end_at_station,vehicle type,trip_duration,trip_distance_kilometers,trip_distance_meters,start_location,end_location,start_stop_id,start_stop_name,start_buffer_zone,end_stop_id,end_stop_name,end_buffer_zone,trip_duration_seconds
1371422,37914606,2023-07-06 10:26:00,2023-07-06 10:36:00,False,False,bike,0 days 00:10:00,1.368433,1368.432896,b'\x01\x01\x00\x00\x00l\xb4\x1c\xe8\xa1v\x1c@f...,b'\x01\x01\x00\x00\x00\x10\x94\xdb\xf6=j\x1c@:...,1500.0,Bonn Konrad-Adenauer-Platz,215356 ...,1111.0,Bonn Beethovenhalle Und Swb,215356 POLYGON ((7.120193524138334 50.7395...,600.0
1541292,38090207,2023-07-14 11:51:00,2023-07-14 12:11:00,True,True,bike,0 days 00:20:00,2.48111,2481.110474,b'\x01\x01\x00\x00\x00\xfb\\m\xc5\xfe\xa2\x1c@...,b'\x01\x01\x00\x00\x00\xaea\x86\xc6\x13\x91\x1...,161.0,Bonn Bad Godesberg Bf,215356 ...,699.0,Bonn Hochkreuz/Deutsches Museum Bonn,215356 POLYGON ((7.120193524138334 50.7395...,1200.0
1586283,38128552,2023-07-21 14:51:00,2023-07-21 15:01:00,True,False,bike,0 days 00:10:00,0.882866,882.866185,b'\x01\x01\x00\x00\x00\xfb\\m\xc5\xfe\xa2\x1c@...,b'\x01\x01\x00\x00\x00\xf0M\xd3g\x07\x9c\x1c@\...,161.0,Bonn Bad Godesberg Bf,215356 ...,701.0,Bonn Plittersdorfer Str.,215356 POLYGON ((7.120193524138334 50.7395...,600.0


In [98]:
delay_20_30_next_station_station.describe()

Unnamed: 0,nextbike_trips_id,nextbike_trips_start_time,nextbike_trips_end_time,trip_duration,trip_distance_kilometers,trip_distance_meters,start_stop_id,end_stop_id,trip_duration_seconds
count,3.0,3,3,3,3.0,3.0,3.0,3.0,3.0
mean,38044460.0,2023-07-14 04:22:40,2023-07-14 04:36:00,0 days 00:13:20,1.57747,1577.469852,607.333333,837.0,800.0
min,37914610.0,2023-07-06 10:26:00,2023-07-06 10:36:00,0 days 00:10:00,0.882866,882.866185,161.0,699.0,600.0
25%,38002410.0,2023-07-10 11:08:30,2023-07-10 11:23:30,0 days 00:10:00,1.12565,1125.649541,161.0,700.0,600.0
50%,38090210.0,2023-07-14 11:51:00,2023-07-14 12:11:00,0 days 00:10:00,1.368433,1368.432896,161.0,701.0,600.0
75%,38109380.0,2023-07-18 01:21:00,2023-07-18 01:36:00,0 days 00:15:00,1.924772,1924.771685,830.5,906.0,900.0
max,38128550.0,2023-07-21 14:51:00,2023-07-21 15:01:00,0 days 00:20:00,2.48111,2481.110474,1500.0,1111.0,1200.0
std,114075.2,,,0 days 00:05:46.410161513,0.819371,819.370819,773.07201,237.293068,346.410162


In [99]:
# get statistics for the tier trips that start and end at a station
get_stats(delay_20_30_next_station_station, ['trip_duration_seconds', 'trip_distance_kilometers'])

column,trip_duration_seconds,trip_distance_kilometers
rows,3,3
sum,2400.0,4.73241
mean,800.0,1.57747
median,600.0,1.368433
mode,"0 600.0 Name: trip_duration_seconds, dtype:...",0 0.882866 1 1.368433 2 2.481110 Name...
max,1200.0,2.48111
min,600.0,0.882866
std_dev,346.410162,0.819371
variance,120000.0,0.671369
skewness,1.732051,1.073314


#### 30 - 45 min

In [100]:
delay_30_45 = delay[(delay['departure_delay'] > pd.Timedelta(30, unit='m')) & (delay['departure_delay'] <= pd.Timedelta(45, unit='m'))]

In [101]:
delay_30_45.describe()

Unnamed: 0,route_id,agency_id,route_type,service_id,direction_id,shape_id,stop_id,actual_arrival_time,actual_departure_time,vrs_timestamp,...,departure_delay_float,actual_arrival_time_float,actual_departure_time_float,arrival_delay_span,weather_int,time_span_int_arrival,weekend,time_span_int,delay_category,cancelled_trip
count,1501.0,1501.0,1501.0,1501.0,1501.0,1501.0,1501.0,1478,1501,1501.0,...,1501.0,1501.0,1501.0,1501.0,1501.0,1501.0,1501.0,1501.0,1501.0,1501.0
mean,684.626915,6.0,2.996003,181.862092,0.0,5246.967355,1572.786809,2023-07-17 09:34:37.405277440,2023-07-17 10:49:08.337774848,201183.283811,...,37.503387,22686.479014,22947.458361,0.0,0.097268,2.11992,1.183877,2.13058,0.968688,0.0
min,66.0,6.0,0.0,43.0,0.0,1927.0,690.0,2023-07-01 02:15:45,2023-07-01 02:16:00,1805.0,...,30.25,0.0,11300.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0
25%,684.0,6.0,3.0,43.0,0.0,2734.0,1104.0,2023-07-09 02:21:30,2023-07-09 02:23:00,232304.0,...,34.25,12030.0,12045.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
50%,685.0,6.0,3.0,179.0,0.0,4731.0,1204.0,2023-07-18 02:17:45,2023-07-18 02:18:45,233808.0,...,36.5,21830.0,21845.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0
75%,687.0,6.0,3.0,194.0,0.0,7803.0,1500.0,2023-07-25 02:19:26.249999872,2023-07-25 02:21:30,234804.0,...,41.25,31530.0,31600.0,0.0,0.0,8.0,2.0,8.0,1.0,0.0
max,690.0,6.0,3.0,26835.0,0.0,10390.0,8832.0,2023-08-01 03:15:45,2023-08-01 03:16:00,235807.0,...,45.0,123640.0,131850.0,0.0,2.0,8.0,3.0,8.0,1.0,0.0
std,22.716031,0.0,0.109472,975.931136,0.0,2506.233667,1737.966434,,,75580.765274,...,4.079171,12269.623029,12079.503251,0.0,0.324344,3.528967,1.238615,3.534629,0.24702,0.0


In [102]:
get_stats(delay_30_45, ['tier_trips_count', 'tier_trips_end_at_station_count', 'nextbike_trips_count', 'nextbike_trips_end_at_station_count', 'current_temp', 'current_precipitation_volume'])

  qd = iqr / (percentile_75 + percentile_25)
  cvqd = iqr / (percentile_75 + percentile_25)
  qd = iqr / (percentile_75 + percentile_25)
  cvqd = iqr / (percentile_75 + percentile_25)
  qd = iqr / (percentile_75 + percentile_25)
  cvqd = iqr / (percentile_75 + percentile_25)
  qd = iqr / (percentile_75 + percentile_25)
  cvqd = iqr / (percentile_75 + percentile_25)


column,tier_trips_count,tier_trips_end_at_station_count,nextbike_trips_count,nextbike_trips_end_at_station_count,current_temp,current_precipitation_volume
rows,1501,1501,1501,1501,1501,1501
sum,29,27,10,9,25881.05,68.95
mean,0.01932,0.017988,0.006662,0.005996,17.242538,0.518421
median,0.0,0.0,0.0,0.0,16.94,0.37
mode,"0 0 Name: tier_trips_count, dtype: int64","0 0 Name: tier_trips_end_at_station_count, ...","0 0 Name: nextbike_trips_count, dtype: int64",0 0 Name: nextbike_trips_end_at_station_cou...,"0 16.98 Name: current_temp, dtype: float64","0 0.95 Name: current_precipitation_volume, ..."
max,2,2,2,2,22.6,0.95
min,0,0,0,0,11.35,0.12
std_dev,0.164194,0.160238,0.089194,0.085425,2.783455,0.308692
variance,0.02696,0.025676,0.007956,0.007297,7.74762,0.095291
skewness,9.447148,9.883264,14.827389,15.851948,-0.037717,0.327082


In [103]:
delay_30_45_tier = check_micromobility_datasets('tier', delay_30_45)

In [104]:
delay_30_45_tier.describe()

Unnamed: 0,tier_trips_id,tier_trips_start_time,tier_trips_end_time,trip_duration,trip_distance_kilometers,trip_distance_meters,start_stop_id,end_stop_id,trip_duration_seconds
count,20.0,20,20,20,20.0,20.0,19.0,16.0,20.0
mean,800227300.0,2023-07-15 08:01:45.000000256,2023-07-15 08:18:00,0 days 00:16:15,1.895352,1895.352117,1102.473684,987.6875,975.0
min,782283800.0,2023-07-01 01:45:00,2023-07-01 01:50:00,0 days 00:05:00,0.28803,288.030156,691.0,686.0,300.0
25%,782378300.0,2023-07-07 12:58:45,2023-07-07 13:16:15,0 days 00:10:00,0.820637,820.63717,1115.0,687.0,600.0
50%,795522100.0,2023-07-13 14:12:30,2023-07-13 14:27:30,0 days 00:15:00,1.503045,1503.044986,1115.0,1106.5,900.0
75%,808738400.0,2023-07-23 15:15:00,2023-07-23 15:28:45,0 days 00:20:00,2.308457,2308.457461,1139.0,1186.75,1200.0
max,832263100.0,2023-08-01 01:45:00,2023-08-01 02:05:00,0 days 00:55:00,5.401119,5401.1192,1620.0,1500.0,3300.0
std,20155150.0,,,0 days 00:11:34.243245707,1.529288,1529.287734,266.253089,290.680058,694.243246


In [105]:
get_stats(delay_30_45_tier, ['trip_duration_seconds', 'trip_distance_kilometers'])

column,trip_duration_seconds,trip_distance_kilometers
rows,20,20
sum,19500.0,37.907042
mean,975.0,1.895352
median,900.0,1.503045
mode,0 600.0 1 1200.0 Name: trip_duration_se...,0 0.288030 1 0.308480 2 0.561271 3...
max,3300.0,5.401119
min,300.0,0.28803
std_dev,694.243246,1.529288
variance,481973.684211,2.338721
skewness,2.068609,1.17532


In [106]:
# get entries where trips start and end at station
delay_30_45_tier_station_station = delay_30_45_tier[delay_30_45_tier['end_stop_id'].notna() & delay_30_45_tier['start_stop_id'].notna()]
delay_30_45_tier_station_station

Unnamed: 0,tier_trips_id,tier_trips_start_time,tier_trips_end_time,vehicle type,trip_duration,trip_distance_kilometers,trip_distance_meters,start_stop_id,start_stop_name,start_buffer_zone,end_stop_id,end_stop_name,end_buffer_zone,trip_duration_seconds,start_location,end_location
3209527,782391260,2023-07-01 01:45:00,2023-07-01 01:50:00,e-scooter,0 days 00:05:00,0.670973,670.973151,1115.0,Bonn Bertha-Von-Suttner-Pl./Beethovenhaus,801621 POLYGON ((7.157885983913106 50.7227...,687.0,Bonn Hbf,801621 ...,300.0,POINT (7.10180 50.73728),POINT (7.09852 50.73217)
3210269,782391943,2023-07-01 01:45:00,2023-07-01 01:50:00,e-scooter,0 days 00:05:00,0.838693,838.692953,1115.0,Bonn Bertha-Von-Suttner-Pl./Beethovenhaus,801621 POLYGON ((7.157885983913106 50.7227...,687.0,Bonn Hbf,801621 ...,300.0,POINT (7.10410 50.73699),POINT (7.09815 50.73232)
2965768,782283933,2023-07-02 00:40:00,2023-07-02 01:10:00,e-scooter,0 days 00:30:00,0.887267,887.266893,1115.0,Bonn Bertha-Von-Suttner-Pl./Beethovenhaus,801621 POLYGON ((7.157885983913106 50.7227...,1102.0,Bonn Friedensplatz,801621 ...,1800.0,POINT (7.10378 50.73714),POINT (7.09584 50.73637)
3238195,782417418,2023-07-02 03:45:00,2023-07-02 03:55:00,e-scooter,0 days 00:10:00,1.411129,1411.128592,699.0,Bonn Hochkreuz/Deutsches Museum Bonn,801621 POLYGON ((7.157885983913106 50.7227...,690.0,Bonn Olof-Palme-Allee,801621 ...,600.0,POINT (7.14178 50.69691),POINT (7.13241 50.70554)
3219403,782400255,2023-07-06 01:40:00,2023-07-06 01:50:00,e-scooter,0 days 00:10:00,2.059067,2059.066655,1620.0,Bonn Max-Bruch-Str.,801621 POLYGON ((7.157885983913106 50.7227...,687.0,Bonn Hbf,801621 ...,600.0,POINT (7.07754 50.72564),POINT (7.09504 50.73173)
3120146,782332692,2023-07-09 02:45:00,2023-07-09 02:50:00,e-scooter,0 days 00:05:00,1.941132,1941.131761,1115.0,Bonn Bertha-Von-Suttner-Pl./Beethovenhaus,801621 POLYGON ((7.157885983913106 50.7227...,1500.0,Bonn Konrad-Adenauer-Platz,801621 ...,300.0,POINT (7.10134 50.73730),POINT (7.11867 50.73941)
3141621,782340035,2023-07-09 02:45:00,2023-07-09 03:05:00,e-scooter,0 days 00:20:00,0.852029,852.028961,1115.0,Bonn Bertha-Von-Suttner-Pl./Beethovenhaus,801621 POLYGON ((7.157885983913106 50.7227...,687.0,Bonn Hbf,801621 ...,1200.0,POINT (7.10310 50.73743),POINT (7.09712 50.73259)
3209349,782391086,2023-07-12 00:45:00,2023-07-12 01:00:00,e-scooter,0 days 00:15:00,2.17465,2174.65037,1115.0,Bonn Bertha-Von-Suttner-Pl./Beethovenhaus,801621 POLYGON ((7.157885983913106 50.7227...,1172.0,Bonn Bataverweg,801621 ...,900.0,POINT (7.10301 50.73752),POINT (7.09320 50.75457)
2969523,808684481,2023-07-15 03:40:00,2023-07-15 03:55:00,e-scooter,0 days 00:15:00,0.561271,561.270947,1115.0,Bonn Bertha-Von-Suttner-Pl./Beethovenhaus,801621 POLYGON ((7.157885983913106 50.7227...,686.0,Bonn Universität/Markt,801621 ...,900.0,POINT (7.10351 50.73732),POINT (7.10052 50.73323)
2941852,808665648,2023-07-22 00:45:00,2023-07-22 00:55:00,e-scooter,0 days 00:10:00,0.91348,913.480425,1240.0,Bonn Poppelsdorfer Platz,801621 POLYGON ((7.157885983913106 50.7227...,1231.0,Bonn Sebastianstr.,801621 ...,600.0,POINT (7.08768 50.72224),POINT (7.07946 50.72229)


In [107]:
delay_30_45_tier_station_station.describe()

Unnamed: 0,tier_trips_id,tier_trips_start_time,tier_trips_end_time,trip_duration,trip_distance_kilometers,trip_distance_meters,start_stop_id,end_stop_id,trip_duration_seconds
count,16.0,16,16,16,16.0,16.0,16.0,16.0,16.0
mean,799936500.0,2023-07-15 08:10:00,2023-07-15 08:23:45,0 days 00:13:45,1.534437,1534.437064,1100.6875,987.6875,825.0
min,782283900.0,2023-07-01 01:45:00,2023-07-01 01:50:00,0 days 00:05:00,0.28803,288.030156,692.0,686.0,300.0
25%,782391200.0,2023-07-05 02:11:15,2023-07-05 02:21:15,0 days 00:08:45,0.796763,796.763002,1115.0,687.0,525.0
50%,795522100.0,2023-07-13 14:12:30,2023-07-13 14:27:30,0 days 00:12:30,1.162305,1162.304509,1115.0,1106.5,750.0
75%,808704600.0,2023-07-23 15:15:00,2023-07-23 15:28:45,0 days 00:20:00,2.060241,2060.241352,1119.5,1186.75,1200.0
max,832263100.0,2023-08-01 01:45:00,2023-08-01 02:05:00,0 days 00:30:00,4.917155,4917.15521,1620.0,1500.0,1800.0
std,19960580.0,,,0 days 00:07:38.257569495,1.195833,1195.8327,251.241642,290.680058,458.257569


In [108]:
# get statistics for the tier trips that start and end at a station
get_stats(delay_30_45_tier_station_station, ['trip_duration_seconds', 'trip_distance_kilometers'])

column,trip_duration_seconds,trip_distance_kilometers
rows,16,16
sum,13200.0,24.550993
mean,825.0,1.534437
median,750.0,1.162305
mode,0 300.0 1 600.0 Name: trip_duration_seco...,0 0.288030 1 0.308480 2 0.561271 3...
max,1800.0,4.917155
min,300.0,0.28803
std_dev,458.257569,1.195833
variance,210000.0,1.430016
skewness,0.609229,1.623208


In [109]:
delay_30_45_next = check_micromobility_datasets('next', delay_30_45)

In [110]:
delay_30_45_next.describe()

Unnamed: 0,nextbike_trips_id,nextbike_trips_start_time,nextbike_trips_end_time,trip_duration,trip_distance_kilometers,trip_distance_meters,start_stop_id,end_stop_id,trip_duration_seconds
count,8.0,8,8,8,8.0,8.0,8.0,4.0,8.0
mean,37970160.0,2023-07-12 13:58:30,2023-07-12 14:12:15,0 days 00:13:45,2.227567,2227.567331,1011.5,2916.75,825.0
min,37804040.0,2023-07-04 00:46:00,2023-07-04 00:56:00,0 days 00:05:00,0.329491,329.491408,692.0,1102.0,300.0
25%,37877770.0,2023-07-07 12:42:15,2023-07-07 12:52:15,0 days 00:10:00,1.120273,1120.27298,696.5,1216.75,600.0
50%,38000600.0,2023-07-10 12:41:00,2023-07-10 12:53:30,0 days 00:12:30,1.805026,1805.025821,1115.0,1420.0,750.0
75%,38045550.0,2023-07-17 03:12:15,2023-07-17 03:31:00,0 days 00:20:00,3.06748,3067.48002,1140.0,3120.0,1200.0
max,38131790.0,2023-07-23 04:46:00,2023-07-23 05:06:00,0 days 00:20:00,4.714229,4714.228791,1500.0,7725.0,1200.0
std,117171.9,,,0 days 00:05:49.489423506,1.667007,1667.006928,291.675945,3211.830047,349.489424


In [111]:
get_stats(delay_30_45_next, ['trip_duration_seconds', 'trip_distance_kilometers'])

column,trip_duration_seconds,trip_distance_kilometers
rows,8,8
sum,6600.0,17.820539
mean,825.0,2.227567
median,750.0,1.805026
mode,0 600.0 1 1200.0 Name: trip_duration_se...,0 0.329491 1 0.714223 2 1.255623 3 ...
max,1200.0,4.714229
min,300.0,0.329491
std_dev,349.489424,1.667007
variance,122142.857143,2.778912
skewness,-0.090357,0.754758


In [112]:
# get entries where trips start and end at station
delay_30_45_next_station_station = delay_30_45_next[delay_30_45_next['end_stop_id'].notna() & delay_30_45_next['start_stop_id'].notna()]
delay_30_45_next_station_station

Unnamed: 0,nextbike_trips_id,nextbike_trips_start_time,nextbike_trips_end_time,nextbike_trips_start_at_station,nextbike_trips_end_at_station,vehicle type,trip_duration,trip_distance_kilometers,trip_distance_meters,start_location,end_location,start_stop_id,start_stop_name,start_buffer_zone,end_stop_id,end_stop_name,end_buffer_zone,trip_duration_seconds
1405738,37990598,2023-07-06 00:46:00,2023-07-06 00:56:00,True,False,bike,0 days 00:10:00,2.5365,2536.499647,b'\x01\x01\x00\x00\x00/1\x96\xe9\x97x\x1c@\x86...,b'\x01\x01\x00\x00\x00\x01\xa46qr\x8f\x1c@\x90...,1500.0,Bonn Konrad-Adenauer-Platz,215356 ...,7725.0,Bonn Maria-Montessori-Allee,215356 POLYGON ((7.120193524138334 50.7395...,600.0
1590889,38131786,2023-07-08 00:41:00,2023-07-08 01:01:00,False,False,bike,0 days 00:20:00,4.660421,4660.42114,b'\x01\x01\x00\x00\x00[\xb2*\xc2Mf\x1c@\x98\x8...,b'\x01\x01\x00\x00\x00O\x04q\x1eN\x90\x1c@\x9b...,1115.0,Bonn Bertha-Von-Suttner-Pl./Beethovenhaus,215356 ...,1585.0,Bonn Küdinghoven,215356 POLYGON ((7.120193524138334 50.7395...,1200.0
1672625,37804044,2023-07-13 00:41:00,2023-07-13 00:46:00,False,False,bike,0 days 00:05:00,0.329491,329.491408,b'\x01\x01\x00\x00\x00\xa2\r\xc0\x06Dh\x1c@\x1...,b'\x01\x01\x00\x00\x00\xceT\x88G\xe2e\x1c@c\x9...,1115.0,Bonn Bertha-Von-Suttner-Pl./Beethovenhaus,215356 ...,1102.0,Bonn Friedensplatz,215356 POLYGON ((7.120193524138334 50.7395...,300.0
1261047,37825085,2023-07-23 04:46:00,2023-07-23 05:06:00,False,False,bike,0 days 00:20:00,1.978261,1978.261273,b'\x01\x01\x00\x00\x00\x9a\x96X\x19\x8d|\x1c@\...,b'\x01\x01\x00\x00\x00N\xed\x0cS[j\x1c@\xab\x9...,692.0,Bonn Heussallee/Museumsmeile,215356 ...,1255.0,Bonn Graf-Stauffenberg-Str.,215356 POLYGON ((7.120193524138334 50.7395...,1200.0


In [113]:
delay_30_45_next_station_station.describe()

Unnamed: 0,nextbike_trips_id,nextbike_trips_start_time,nextbike_trips_end_time,trip_duration,trip_distance_kilometers,trip_distance_meters,start_stop_id,end_stop_id,trip_duration_seconds
count,4.0,4,4,4,4.0,4.0,4.0,4.0,4.0
mean,37937880.0,2023-07-12 13:43:30,2023-07-12 13:57:15,0 days 00:13:45,2.376168,2376.168367,1105.5,2916.75,825.0
min,37804040.0,2023-07-06 00:46:00,2023-07-06 00:56:00,0 days 00:05:00,0.329491,329.491408,692.0,1102.0,300.0
25%,37819820.0,2023-07-07 12:42:15,2023-07-07 12:59:45,0 days 00:08:45,1.566069,1566.068807,1009.25,1216.75,525.0
50%,37907840.0,2023-07-10 12:41:00,2023-07-10 12:53:30,0 days 00:15:00,2.25738,2257.38046,1115.0,1420.0,900.0
75%,38025900.0,2023-07-15 13:42:15,2023-07-15 13:51:00,0 days 00:20:00,3.06748,3067.48002,1211.25,3120.0,1200.0
max,38131790.0,2023-07-23 04:46:00,2023-07-23 05:06:00,0 days 00:20:00,4.660421,4660.42114,1500.0,7725.0,1200.0
std,153854.4,,,0 days 00:07:30,1.787991,1787.990817,330.046966,3211.830047,450.0


In [114]:
# get statistics for the tier trips that start and end at a station
get_stats(delay_30_45_next_station_station, ['trip_duration_seconds', 'trip_distance_kilometers'])

column,trip_duration_seconds,trip_distance_kilometers
rows,4,4
sum,3300.0,9.504673
mean,825.0,2.376168
median,900.0,2.25738
mode,"0 1200.0 Name: trip_duration_seconds, dtype...",0 0.329491 1 1.978261 2 2.536500 3 ...
max,1200.0,4.660421
min,300.0,0.329491
std_dev,450.0,1.787991
variance,202500.0,3.196911
skewness,-0.37037,0.383321


In [115]:
check_micromobility_datasets('tier', delay_30_45)

Unnamed: 0,tier_trips_id,tier_trips_start_time,tier_trips_end_time,vehicle type,trip_duration,trip_distance_kilometers,trip_distance_meters,start_stop_id,start_stop_name,start_buffer_zone,end_stop_id,end_stop_name,end_buffer_zone,trip_duration_seconds,start_location,end_location
3209527,782391260,2023-07-01 01:45:00,2023-07-01 01:50:00,e-scooter,0 days 00:05:00,0.670973,670.973151,1115.0,Bonn Bertha-Von-Suttner-Pl./Beethovenhaus,801621 POLYGON ((7.157885983913106 50.7227...,687.0,Bonn Hbf,801621 ...,300.0,POINT (7.10180 50.73728),POINT (7.09852 50.73217)
3210269,782391943,2023-07-01 01:45:00,2023-07-01 01:50:00,e-scooter,0 days 00:05:00,0.838693,838.692953,1115.0,Bonn Bertha-Von-Suttner-Pl./Beethovenhaus,801621 POLYGON ((7.157885983913106 50.7227...,687.0,Bonn Hbf,801621 ...,300.0,POINT (7.10410 50.73699),POINT (7.09815 50.73232)
2965768,782283933,2023-07-02 00:40:00,2023-07-02 01:10:00,e-scooter,0 days 00:30:00,0.887267,887.266893,1115.0,Bonn Bertha-Von-Suttner-Pl./Beethovenhaus,801621 POLYGON ((7.157885983913106 50.7227...,1102.0,Bonn Friedensplatz,801621 ...,1800.0,POINT (7.10378 50.73714),POINT (7.09584 50.73637)
3238195,782417418,2023-07-02 03:45:00,2023-07-02 03:55:00,e-scooter,0 days 00:10:00,1.411129,1411.128592,699.0,Bonn Hochkreuz/Deutsches Museum Bonn,801621 POLYGON ((7.157885983913106 50.7227...,690.0,Bonn Olof-Palme-Allee,801621 ...,600.0,POINT (7.14178 50.69691),POINT (7.13241 50.70554)
3219403,782400255,2023-07-06 01:40:00,2023-07-06 01:50:00,e-scooter,0 days 00:10:00,2.059067,2059.066655,1620.0,Bonn Max-Bruch-Str.,801621 POLYGON ((7.157885983913106 50.7227...,687.0,Bonn Hbf,801621 ...,600.0,POINT (7.07754 50.72564),POINT (7.09504 50.73173)
3032696,782303719,2023-07-08 00:45:00,2023-07-08 01:05:00,e-scooter,0 days 00:20:00,5.401119,5401.1192,1500.0,Bonn Konrad-Adenauer-Platz,801621 POLYGON ((7.157885983913106 50.7227...,,,801621 ...,1200.0,POINT (7.11791 50.73942),POINT (7.07044 50.72906)
2965658,782283823,2023-07-09 00:45:00,2023-07-09 01:40:00,e-scooter,0 days 00:55:00,2.709879,2709.878736,,,801621 POLYGON ((7.157885983913106 50.7227...,,,801621 ...,3300.0,POINT (7.05985 50.73586),POINT (7.04343 50.71771)
3120146,782332692,2023-07-09 02:45:00,2023-07-09 02:50:00,e-scooter,0 days 00:05:00,1.941132,1941.131761,1115.0,Bonn Bertha-Von-Suttner-Pl./Beethovenhaus,801621 POLYGON ((7.157885983913106 50.7227...,1500.0,Bonn Konrad-Adenauer-Platz,801621 ...,300.0,POINT (7.10134 50.73730),POINT (7.11867 50.73941)
3141621,782340035,2023-07-09 02:45:00,2023-07-09 03:05:00,e-scooter,0 days 00:20:00,0.852029,852.028961,1115.0,Bonn Bertha-Von-Suttner-Pl./Beethovenhaus,801621 POLYGON ((7.157885983913106 50.7227...,687.0,Bonn Hbf,801621 ...,1200.0,POINT (7.10310 50.73743),POINT (7.09712 50.73259)
3209349,782391086,2023-07-12 00:45:00,2023-07-12 01:00:00,e-scooter,0 days 00:15:00,2.17465,2174.65037,1115.0,Bonn Bertha-Von-Suttner-Pl./Beethovenhaus,801621 POLYGON ((7.157885983913106 50.7227...,1172.0,Bonn Bataverweg,801621 ...,900.0,POINT (7.10301 50.73752),POINT (7.09320 50.75457)


In [116]:

check_micromobility_datasets('next', delay_30_45)

Unnamed: 0,nextbike_trips_id,nextbike_trips_start_time,nextbike_trips_end_time,nextbike_trips_start_at_station,nextbike_trips_end_at_station,vehicle type,trip_duration,trip_distance_kilometers,trip_distance_meters,start_location,end_location,start_stop_id,start_stop_name,start_buffer_zone,end_stop_id,end_stop_name,end_buffer_zone,trip_duration_seconds
1508396,38064629,2023-07-04 00:46:00,2023-07-04 00:56:00,False,False,bike,0 days 00:10:00,1.63179,1631.790369,b'\x01\x01\x00\x00\x00\x01\x85z\xfa\x08\x8c\x1...,b'\x01\x01\x00\x00\x00*\x90\xd9Y\xf4~\x1c@\xdf...,698.0,Bonn Max-Löbner-Str./Friesdorf,215356 ...,,,215356 POLYGON ((7.120193524138334 50.7395...,600.0
1405738,37990598,2023-07-06 00:46:00,2023-07-06 00:56:00,True,False,bike,0 days 00:10:00,2.5365,2536.499647,b'\x01\x01\x00\x00\x00/1\x96\xe9\x97x\x1c@\x86...,b'\x01\x01\x00\x00\x00\x01\xa46qr\x8f\x1c@\x90...,1500.0,Bonn Konrad-Adenauer-Platz,215356 ...,7725.0,Bonn Maria-Montessori-Allee,215356 POLYGON ((7.120193524138334 50.7395...,600.0
1475674,38039190,2023-07-08 00:41:00,2023-07-08 00:51:00,False,True,bike,0 days 00:10:00,0.714223,714.223073,b'\x01\x01\x00\x00\x004\xbb\xee\xadH\\\x1c@K\x...,b'\x01\x01\x00\x00\x00\xdfnI\x0e\xd8U\x1c@75\x...,1140.0,Bonn Mozartstr.,215356 ...,,,215356 POLYGON ((7.120193524138334 50.7395...,600.0
1590889,38131786,2023-07-08 00:41:00,2023-07-08 01:01:00,False,False,bike,0 days 00:20:00,4.660421,4660.42114,b'\x01\x01\x00\x00\x00[\xb2*\xc2Mf\x1c@\x98\x8...,b'\x01\x01\x00\x00\x00O\x04q\x1eN\x90\x1c@\x9b...,1115.0,Bonn Bertha-Von-Suttner-Pl./Beethovenhaus,215356 ...,1585.0,Bonn Küdinghoven,215356 POLYGON ((7.120193524138334 50.7395...,1200.0
1672625,37804044,2023-07-13 00:41:00,2023-07-13 00:46:00,False,False,bike,0 days 00:05:00,0.329491,329.491408,b'\x01\x01\x00\x00\x00\xa2\r\xc0\x06Dh\x1c@\x1...,b'\x01\x01\x00\x00\x00\xceT\x88G\xe2e\x1c@c\x9...,1115.0,Bonn Bertha-Von-Suttner-Pl./Beethovenhaus,215356 ...,1102.0,Bonn Friedensplatz,215356 POLYGON ((7.120193524138334 50.7395...,300.0
1335534,37895338,2023-07-15 02:41:00,2023-07-15 03:01:00,False,False,bike,0 days 00:20:00,4.714229,4714.228791,b'\x01\x01\x00\x00\x00)v4\x0e\xf5[\x1c@4\xbcY\...,b'\x01\x01\x00\x00\x00\x7f\xdc~\xf9d5\x1c@G\x1...,1140.0,Bonn Mozartstr.,215356 ...,,,215356 POLYGON ((7.120193524138334 50.7395...,1200.0
1261047,37825085,2023-07-23 04:46:00,2023-07-23 05:06:00,False,False,bike,0 days 00:20:00,1.978261,1978.261273,b'\x01\x01\x00\x00\x00\x9a\x96X\x19\x8d|\x1c@\...,b'\x01\x01\x00\x00\x00N\xed\x0cS[j\x1c@\xab\x9...,692.0,Bonn Heussallee/Museumsmeile,215356 ...,1255.0,Bonn Graf-Stauffenberg-Str.,215356 POLYGON ((7.120193524138334 50.7395...,1200.0
1438785,38010595,2023-07-23 04:46:00,2023-07-23 05:01:00,False,False,bike,0 days 00:15:00,1.255623,1255.622949,b'\x01\x01\x00\x00\x00\x0c>\xcd\xc9\x8b|\x1c@\...,"b'\x01\x01\x00\x00\x00,I\x9e\xeb\xfbp\x1c@d\x9...",692.0,Bonn Heussallee/Museumsmeile,215356 ...,,,215356 POLYGON ((7.120193524138334 50.7395...,900.0


#### 45 - 60 min

In [117]:
delay_45_60 = delay[(delay['departure_delay'] > pd.Timedelta(45, unit='m')) & (delay['departure_delay'] <= pd.Timedelta(60, unit='m'))]

In [118]:
delay_45_60.describe()

Unnamed: 0,route_id,agency_id,route_type,service_id,direction_id,shape_id,stop_id,actual_arrival_time,actual_departure_time,vrs_timestamp,...,departure_delay_float,actual_arrival_time_float,actual_departure_time_float,arrival_delay_span,weather_int,time_span_int_arrival,weekend,time_span_int,delay_category,cancelled_trip
count,213.0,213.0,213.0,213.0,213.0,213.0,213.0,212,213,213.0,...,213.0,213.0,213.0,213.0,213.0,213.0,213.0,213.0,213.0,213.0
mean,688.28169,6.0,3.0,157.924883,0.0,9624.13615,1208.596244,2023-07-17 13:54:04.245283072,2023-07-17 13:16:55.704225024,226475.13615,...,48.665493,20595.962441,20675.422535,0.0,0.093897,1.164319,0.892019,1.164319,0.99061,0.0
min,681.0,6.0,3.0,43.0,0.0,5352.0,1106.0,2023-07-03 01:25:15,2023-07-03 01:25:15,22304.0,...,45.5,0.0,12230.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0
25%,690.0,6.0,3.0,179.0,0.0,10390.0,1123.0,2023-07-10 01:24:30,2023-07-10 01:25:15,232805.0,...,47.25,12545.0,12600.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
50%,690.0,6.0,3.0,179.0,0.0,10390.0,1124.0,2023-07-18 01:23:52.500000,2023-07-18 01:22:45,234305.0,...,49.0,22515.0,22515.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
75%,690.0,6.0,3.0,179.0,0.0,10390.0,1459.0,2023-07-24 08:11:00,2023-07-24 02:27:30,234804.0,...,51.5,22715.0,22730.0,0.0,0.0,0.0,2.0,0.0,1.0,0.0
max,690.0,6.0,3.0,194.0,0.0,10390.0,1459.0,2023-08-01 02:27:15,2023-08-01 02:27:30,235807.0,...,51.5,52715.0,52730.0,0.0,1.0,8.0,3.0,8.0,1.0,0.0
std,3.394709,0.0,0.0,53.585006,0.0,1514.086939,148.265336,,,36254.932662,...,2.097819,8743.822571,8646.606288,0.0,0.292372,2.827801,1.190601,2.827801,0.137038,0.0


In [119]:
get_stats(delay_45_60, ['tier_trips_count', 'tier_trips_end_at_station_count', 'nextbike_trips_count', 'nextbike_trips_end_at_station_count', 'current_temp', 'current_precipitation_volume'])

  qd = iqr / (percentile_75 + percentile_25)
  cvqd = iqr / (percentile_75 + percentile_25)
  qd = iqr / (percentile_75 + percentile_25)
  cvqd = iqr / (percentile_75 + percentile_25)
  qd = iqr / (percentile_75 + percentile_25)
  cvqd = iqr / (percentile_75 + percentile_25)
  qd = iqr / (percentile_75 + percentile_25)
  cvqd = iqr / (percentile_75 + percentile_25)


column,tier_trips_count,tier_trips_end_at_station_count,nextbike_trips_count,nextbike_trips_end_at_station_count,current_temp,current_precipitation_volume
rows,213,213,213,213,213,213
sum,2,1,4,3,3656.58,10.15
mean,0.00939,0.004695,0.018779,0.014085,17.167042,0.5075
median,0.0,0.0,0.0,0.0,16.98,0.355
mode,"0 0 Name: tier_trips_count, dtype: int64","0 0 Name: tier_trips_end_at_station_count, ...","0 0 Name: nextbike_trips_count, dtype: int64",0 0 Name: nextbike_trips_end_at_station_cou...,"0 16.98 Name: current_temp, dtype: float64","0 0.95 Name: current_precipitation_volume, ..."
max,1,1,1,1,22.29,0.95
min,0,0,0,0,11.35,0.13
std_dev,0.096672,0.068519,0.136065,0.118117,2.725546,0.333022
variance,0.009345,0.004695,0.018514,0.013952,7.428598,0.110904
skewness,10.246259,14.59452,7.140457,8.305683,0.001428,0.523456


In [120]:
delay_45_60_tier = check_micromobility_datasets('tier', delay_45_60)

In [121]:
delay_45_60_tier.describe()

Unnamed: 0,tier_trips_id,tier_trips_start_time,tier_trips_end_time,trip_duration,trip_distance_kilometers,trip_distance_meters,start_stop_id,end_stop_id,trip_duration_seconds
count,2.0,2,2,2,2.0,2.0,0.0,1.0,2.0
mean,795622200.0,2023-07-18 00:40:00,2023-07-18 00:45:00,0 days 00:05:00,0.48349,483.489633,,684.0,300.0
min,782455800.0,2023-07-11 00:40:00,2023-07-11 00:45:00,0 days 00:05:00,0.30958,309.580257,,684.0,300.0
25%,789039000.0,2023-07-14 12:40:00,2023-07-14 12:45:00,0 days 00:05:00,0.396535,396.534945,,684.0,300.0
50%,795622200.0,2023-07-18 00:40:00,2023-07-18 00:45:00,0 days 00:05:00,0.48349,483.489633,,684.0,300.0
75%,802205300.0,2023-07-21 12:40:00,2023-07-21 12:45:00,0 days 00:05:00,0.570444,570.444321,,684.0,300.0
max,808788500.0,2023-07-25 00:40:00,2023-07-25 00:45:00,0 days 00:05:00,0.657399,657.399009,,684.0,300.0
std,18620010.0,,,0 days 00:00:00,0.245945,245.944998,,,0.0


In [156]:
#get_stats(delay_45_60_tier, ['trip_duration_seconds', 'trip_distance_kilometers'])

ZeroDivisionError: float division by zero

In [123]:
# get entries where trips start and end at station
delay_45_60_tier_station_station = delay_45_60_tier[delay_45_60_tier['end_stop_id'].notna() & delay_45_60_tier['start_stop_id'].notna()]
delay_45_60_tier_station_station

Unnamed: 0,tier_trips_id,tier_trips_start_time,tier_trips_end_time,vehicle type,trip_duration,trip_distance_kilometers,trip_distance_meters,start_stop_id,start_stop_name,start_buffer_zone,end_stop_id,end_stop_name,end_buffer_zone,trip_duration_seconds,start_location,end_location


In [124]:
delay_45_60_tier_station_station.describe()

Unnamed: 0,tier_trips_id,tier_trips_start_time,tier_trips_end_time,trip_duration,trip_distance_kilometers,trip_distance_meters,start_stop_id,end_stop_id,trip_duration_seconds
count,0.0,0,0,0,0.0,0.0,0.0,0.0,0.0
mean,,NaT,NaT,NaT,,,,,
min,,NaT,NaT,NaT,,,,,
25%,,NaT,NaT,NaT,,,,,
50%,,NaT,NaT,NaT,,,,,
75%,,NaT,NaT,NaT,,,,,
max,,NaT,NaT,NaT,,,,,
std,,,,NaT,,,,,


In [125]:
# get statistics for the tier trips that start and end at a station
get_stats(delay_45_60_tier_station_station, ['trip_duration_seconds', 'trip_distance_kilometers'])

column,trip_duration_seconds,trip_distance_kilometers
rows,0,0
sum,0.0,0.0
mean,,
median,,
mode,"Series([], Name: trip_duration_seconds, dtype:...","Series([], Name: trip_distance_kilometers, dty..."
max,,
min,,
std_dev,,
variance,,
skewness,,


In [126]:
delay_45_60_next = check_micromobility_datasets('next', delay_45_60)

In [127]:
delay_45_60_next.describe()

Unnamed: 0,nextbike_trips_id,nextbike_trips_start_time,nextbike_trips_end_time,trip_duration,trip_distance_kilometers,trip_distance_meters,start_stop_id,end_stop_id,trip_duration_seconds
count,4.0,4,4,4,4.0,4.0,2.0,3.0,4.0
mean,37975000.0,2023-07-21 01:09:45,2023-07-21 01:24:45,0 days 00:15:00,1.718408,1718.4075,1124.5,1081.0,900.0
min,37727560.0,2023-07-10 00:41:00,2023-07-10 01:01:00,0 days 00:10:00,0.198349,198.348805,1106.0,687.0,600.0
25%,37905590.0,2023-07-18 07:26:00,2023-07-18 07:46:00,0 days 00:10:00,0.449883,449.882531,1115.25,918.5,600.0
50%,38019740.0,2023-07-22 13:41:00,2023-07-22 13:56:00,0 days 00:15:00,1.529495,1529.494845,1124.5,1150.0,900.0
75%,38089150.0,2023-07-25 07:24:45,2023-07-25 07:34:45,0 days 00:20:00,2.79802,2798.019814,1133.75,1278.0,1200.0
max,38132960.0,2023-07-29 00:36:00,2023-07-29 00:46:00,0 days 00:20:00,3.616292,3616.291507,1143.0,1406.0,1200.0
std,179062.2,,,0 days 00:05:46.410161513,1.629624,1629.62418,26.162951,364.432435,346.410162


In [128]:
get_stats(delay_45_60_next, ['trip_duration_seconds', 'trip_distance_kilometers'])

column,trip_duration_seconds,trip_distance_kilometers
rows,4,4
sum,3600.0,6.87363
mean,900.0,1.718408
median,900.0,1.529495
mode,0 600.0 1 1200.0 Name: trip_duration_se...,0 0.198349 1 0.533727 2 2.525263 3 ...
max,1200.0,3.616292
min,600.0,0.198349
std_dev,346.410162,1.629624
variance,120000.0,2.655675
skewness,0.0,0.33682


In [129]:
# get entries where trips start and end at station
delay_45_60_next_station_station = delay_45_60_next[delay_45_60_next['end_stop_id'].notna() & delay_45_60_next['start_stop_id'].notna()]
delay_45_60_next_station_station

Unnamed: 0,nextbike_trips_id,nextbike_trips_start_time,nextbike_trips_end_time,nextbike_trips_start_at_station,nextbike_trips_end_at_station,vehicle type,trip_duration,trip_distance_kilometers,trip_distance_meters,start_location,end_location,start_stop_id,start_stop_name,start_buffer_zone,end_stop_id,end_stop_name,end_buffer_zone,trip_duration_seconds
1424381,37964929,2023-07-21 01:41:00,2023-07-21 02:01:00,False,False,bike,0 days 00:20:00,3.616292,3616.291507,"b'\x01\x01\x00\x00\x00\xba\x15\xc2j,a\x1c@o/i\...",b'\x01\x01\x00\x00\x00\x92\xb1\xda\xfc\xbfJ\x1...,1106.0,Bonn Thomas-Mann-Str.,215356 ...,1406.0,Bonn Kopenhagener Str.,215356 POLYGON ((7.120193524138334 50.7395...,1200.0
1592554,38132957,2023-07-24 01:41:00,2023-07-24 01:51:00,False,False,bike,0 days 00:10:00,0.198349,198.348805,b'\x01\x01\x00\x00\x00\xd8\r\xdb\x16ef\x1c@x\x...,b'\x01\x01\x00\x00\x00n\xa5\xd7fce\x1c@Mjh\x03...,1143.0,Bonn Poppelsdorfer Allee,215356 ...,687.0,Bonn Hbf,215356 POLYGON ((7.120193524138334 50.7395...,600.0


In [130]:
delay_45_60_next_station_station.describe()

Unnamed: 0,nextbike_trips_id,nextbike_trips_start_time,nextbike_trips_end_time,trip_duration,trip_distance_kilometers,trip_distance_meters,start_stop_id,end_stop_id,trip_duration_seconds
count,2.0,2,2,2,2.0,2.0,2.0,2.0,2.0
mean,38048940.0,2023-07-22 13:41:00,2023-07-22 13:56:00,0 days 00:15:00,1.90732,1907.320156,1124.5,1046.5,900.0
min,37964930.0,2023-07-21 01:41:00,2023-07-21 02:01:00,0 days 00:10:00,0.198349,198.348805,1106.0,687.0,600.0
25%,38006940.0,2023-07-21 19:41:00,2023-07-21 19:58:30,0 days 00:12:30,1.052834,1052.83448,1115.25,866.75,750.0
50%,38048940.0,2023-07-22 13:41:00,2023-07-22 13:56:00,0 days 00:15:00,1.90732,1907.320156,1124.5,1046.5,900.0
75%,38090950.0,2023-07-23 07:41:00,2023-07-23 07:53:30,0 days 00:17:30,2.761806,2761.805831,1133.75,1226.25,1050.0
max,38132960.0,2023-07-24 01:41:00,2023-07-24 01:51:00,0 days 00:20:00,3.616292,3616.291507,1143.0,1406.0,1200.0
std,118813.7,,,0 days 00:07:04.264068711,2.41685,2416.850462,26.162951,508.409776,424.264069


In [131]:
# get statistics for the tier trips that start and end at a station
get_stats(delay_45_60_next_station_station, ['trip_duration_seconds', 'trip_distance_kilometers'])

column,trip_duration_seconds,trip_distance_kilometers
rows,2,2
sum,1800.0,3.81464
mean,900.0,1.90732
median,900.0,1.90732
mode,0 600.0 1 1200.0 Name: trip_duration_se...,0 0.198349 1 3.616292 Name: trip_distanc...
max,1200.0,3.616292
min,600.0,0.198349
std_dev,424.264069,2.41685
variance,180000.0,5.841166
skewness,,


In [132]:
# DELAY SPALTE (WENN ES SIE GIBT) CHECKEN

In [133]:
data.columns

Index(['route_id', 'agency_id', 'route_short_name', 'route_type',
       'route_type_name', 'agency_name', 'service_id', 'trip_id',
       'trip_headsign', 'direction_id', 'shape_id', 'stop_id',
       'actual_arrival_time', 'actual_departure_time', 'vrs_timestamp',
       'stop_sequence', 'stop_headsign', 'stop_name', 'scheduled_arrival_time',
       'scheduled_departure_time', 'arrival_delay', 'departure_delay',
       'service', 'date', 'weekday', 'transfer_stop', 'tier_trips_count',
       'tier_trips_id', 'tier_trips_end_at_station_count',
       'tier_trips_end_at_station_id', 'station_point', 'buffer_zone',
       'nextbike_trips_count', 'nextbike_trips_id',
       'nextbike_trips_end_at_station_count',
       'nextbike_trips_end_at_station_id', 'current_time', 'current_temp',
       'current_feels_like', 'current_cloudiness', 'current_visibility',
       'current_wind_speed', 'current_description',
       'current_precipitation_volume', 'arrival_delay_float',
       'departure_

In [134]:
unique_delays = delay['delay_span'].unique()

In [135]:
unique_delays

array(['0', '0-5', '30-45', '20-30', '15-20', '10-15', '5-10', '45-60'],
      dtype=object)

In [136]:
grouped_data = delay.groupby('delay_span').agg({'tier_trips_count': 'sum', 'nextbike_trips_count': 'sum'})

In [137]:
grouped_data

Unnamed: 0_level_0,tier_trips_count,nextbike_trips_count
delay_span,Unnamed: 1_level_1,Unnamed: 2_level_1
0,23,12
0-5,12367,33393
10-15,3,1
15-20,29,18
20-30,6,20
30-45,29,10
45-60,2,4
5-10,13,26


In [138]:
l = delay[delay['delay_span'] == '0']

In [139]:
l

Unnamed: 0,route_id,agency_id,route_short_name,route_type,route_type_name,agency_name,service_id,trip_id,trip_headsign,direction_id,...,delay_span,arrival_delay_span,weather_int,time_span_arrival,time_span_int_arrival,weekend,time_span,time_span_int,delay_category,cancelled_trip
0,551,8,551,3,Bus,RSVG Rhein-Sieg-Verkehrsgesellschaft mbH,125,93-551-008-687.2.36:014000-33-157_49E69401-92E...,Troisdorf Bf,0,...,0,0,0,0,0,1,0,0,-1,0
1,551,8,551,3,Bus,RSVG Rhein-Sieg-Verkehrsgesellschaft mbH,125,93-551-008-687.2.36:014000-33-157_49E69401-92E...,Troisdorf Bf,0,...,0,0,0,0,0,1,0,0,-1,0
2,551,8,551,3,Bus,RSVG Rhein-Sieg-Verkehrsgesellschaft mbH,125,93-551-008-687.2.36:014000-33-157_49E69401-92E...,Troisdorf Bf,0,...,0,0,0,0,0,1,0,0,-1,0
3,551,8,551,3,Bus,RSVG Rhein-Sieg-Verkehrsgesellschaft mbH,125,93-551-008-687.2.36:014000-33-157_49E69401-92E...,Troisdorf Bf,0,...,0,0,0,0,0,1,0,0,-1,0
4,551,8,551,3,Bus,RSVG Rhein-Sieg-Verkehrsgesellschaft mbH,125,93-551-008-687.2.36:014000-33-157_49E69401-92E...,Troisdorf Bf,0,...,0,0,0,0,0,1,0,0,-1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2340592,551,8,551,3,Bus,RSVG Rhein-Sieg-Verkehrsgesellschaft mbH,151,317-551-008-2071.2.22:053200-34-157_05F23F70-0...,Bonn Hbf,1,...,0,0,0,0,0,2,morning rush,1,-1,0
2340774,551,8,551,3,Bus,RSVG Rhein-Sieg-Verkehrsgesellschaft mbH,151,317-551-008-2071.2.22:053200-34-157_05F23F70-0...,Bonn Hbf,1,...,0,0,0,0,0,2,morning rush,1,-1,0
2340821,551,8,551,3,Bus,RSVG Rhein-Sieg-Verkehrsgesellschaft mbH,151,317-551-008-2071.2.22:053200-34-157_05F23F70-0...,Bonn Hbf,1,...,0,0,0,0,0,2,morning rush,1,-1,0
2340911,551,8,551,3,Bus,RSVG Rhein-Sieg-Verkehrsgesellschaft mbH,151,317-551-008-2071.2.22:053200-34-157_05F23F70-0...,Bonn Hbf,1,...,0,0,0,0,0,2,morning rush,1,-1,0


In [140]:
p = l[['arrival_delay', 'arrival_delay_float', 'delay_span']]

In [141]:
p

Unnamed: 0,arrival_delay,arrival_delay_float,delay_span
0,NaT,,0
1,NaT,,0
2,NaT,,0
3,NaT,,0
4,NaT,,0
...,...,...,...
2340592,NaT,,0
2340774,NaT,,0
2340821,NaT,,0
2340911,NaT,,0


In [142]:
data['arrival_delay'].unique().tolist()

[NaT,
 Timedelta('1 days 00:00:00'),
 Timedelta('0 days 00:00:00'),
 Timedelta('0 days 00:53:00'),
 Timedelta('0 days 00:52:00'),
 Timedelta('0 days 00:46:00'),
 Timedelta('0 days 00:00:15'),
 Timedelta('0 days 00:00:30'),
 Timedelta('0 days 00:00:20'),
 Timedelta('0 days 00:41:45'),
 Timedelta('0 days 00:40:00'),
 Timedelta('0 days 00:42:45'),
 Timedelta('0 days 00:37:45'),
 Timedelta('0 days 00:35:45'),
 Timedelta('0 days 00:39:30'),
 Timedelta('0 days 00:36:15'),
 Timedelta('0 days 00:35:30'),
 Timedelta('0 days 00:37:30'),
 Timedelta('0 days 00:33:30'),
 Timedelta('0 days 00:34:00'),
 Timedelta('0 days 00:00:10'),
 Timedelta('0 days 00:30:00'),
 Timedelta('0 days 00:29:45'),
 Timedelta('0 days 00:21:45'),
 Timedelta('0 days 00:50:00'),
 Timedelta('0 days 00:00:45'),
 Timedelta('0 days 00:01:00'),
 Timedelta('0 days 00:24:00'),
 Timedelta('0 days 00:19:45'),
 Timedelta('0 days 00:18:15'),
 Timedelta('0 days 00:16:00'),
 Timedelta('0 days 00:14:15'),
 Timedelta('0 days 00:03:00'),
 T

In [143]:
grouped_data = data.groupby('delay_span').agg({'tier_trips_count': 'sum', 'nextbike_trips_count': 'sum'})

In [144]:
grouped_data

Unnamed: 0_level_0,tier_trips_count,nextbike_trips_count
delay_span,Unnamed: 1_level_1,Unnamed: 2_level_1
0,36674,89023
0-5,13691,35569
10-15,3,1
15-20,30,20
20-30,31,42
30-45,33,11
45-60,118,80
5-10,13,26


In [145]:
nan_delay['arrival_delay']

41        NaT
42        NaT
43        NaT
44        NaT
45        NaT
           ..
2564854   NaT
2564856   NaT
2564857   NaT
2564858   NaT
2564860   NaT
Name: arrival_delay, Length: 1005613, dtype: timedelta64[ns]

In [146]:
nan = nan_delay.agg({'tier_trips_count': 'sum', 'nextbike_trips_count': 'sum'})
nan

tier_trips_count        19067
nextbike_trips_count    48198
dtype: int64

In [147]:
# CHECK WIE VIELE EINTRÄGE WELCHEN DELAY HABEN UND DANN WELCHE TIER TRIPS = 0 UND WELCHJE != 0

In [148]:
data['arrival_delay'].unique()

<TimedeltaArray>
[              NaT, '1 days 00:00:00', '0 days 00:00:00', '0 days 00:53:00',
 '0 days 00:52:00', '0 days 00:46:00', '0 days 00:00:15', '0 days 00:00:30',
 '0 days 00:00:20', '0 days 00:41:45',
 ...
 '0 days 00:48:15', '0 days 00:46:15', '0 days 00:27:15', '0 days 00:03:30',
 '0 days 00:42:00', '0 days 00:38:00', '0 days 00:33:15', '0 days 00:30:45',
 '0 days 00:26:45', '0 days 00:04:30']
Length: 184, dtype: timedelta64[ns]

In [149]:
d = data[data['arrival_delay'] > pd.Timedelta(0)]

In [150]:
e = d[d['tier_trips_count'] > 0]


In [151]:
e['tier_trips_count'].sum()

8832