In [1]:
import pandas as pd
from src.analyze.analyze_punctuality import *
from src.analyze.analyze_avg_speed import combine_bus_locations_within_hour, group_by_bus
from visualisation.common import *

To analyze buses punctuality, first we will take a look at a tables that we will be working with. 

In [2]:
df_bus_stop_info_bus_stops_coord = pd.read_json(bus_stops_coordinates)
df_timetables = pd.read_json(timetables)

print(df_bus_stop_info_bus_stops_coord.head())
print(df_timetables.head())

  zespol  slupek nazwa_zespolu  id_ulicy   szer_geo   dlug_geo
0   1001       1      Kijowska      2201  52.248455  21.044827
1   1001       2      Kijowska      2201  52.249078  21.044443
2   1001       3      Kijowska      2201  52.248928  21.044169
3   1001       4      Kijowska      2201  52.249969  21.041588
4   1001       5      Kijowska      1203  52.250319  21.043861
  busstopId  busstopNr                                            rozklad
0      1001          2  {'138': [{'czas': '08:08:00', 'brygada': '012'...
1      1001          8  {'120': [{'czas': '08:09:00', 'brygada': '4'},...
2      1002          1  {'120': [{'czas': '08:07:00', 'brygada': '4'},...
3      1001          7  {'120': [{'czas': '08:01:00', 'brygada': '2'},...
4      1001          1  {'102': [{'czas': '08:08:00', 'brygada': '543'...


In [3]:
df_bus_stop_info = pd.merge(df_bus_stop_info_bus_stops_coord, df_timetables, left_on=["zespol", "slupek"], right_on=["busstopId", "busstopNr"])
df_bus_stop_info = df_bus_stop_info.drop(columns=["id_ulicy", "zespol", "nazwa_zespolu", "busstopNr"])
print(df_bus_stop_info)

      slupek   szer_geo   dlug_geo busstopId  \
0          1  52.248455  21.044827      1001   
1          2  52.249078  21.044443      1001   
2          7  52.250228  21.043690      1001   
3          8  52.249944  21.044087      1001   
4          1  52.251325  21.038457      1002   
...      ...        ...        ...       ...   
4704       1  52.252005  21.014461      7104   
4705       2  52.253089  21.013974      7104   
4706       2  52.244780  21.024285      7106   
4707       1  52.216206  21.043533      7107   
4708       0  52.196412  20.922925      R-11   

                                                rozklad  
0     {'102': [{'czas': '08:08:00', 'brygada': '543'...  
1     {'138': [{'czas': '08:08:00', 'brygada': '012'...  
2     {'120': [{'czas': '08:01:00', 'brygada': '2'},...  
3     {'120': [{'czas': '08:09:00', 'brygada': '4'},...  
4     {'120': [{'czas': '08:07:00', 'brygada': '4'},...  
...                                                 ...  
4704  {'185': [{'

We will take a quick look at table's shape to see how it looks like.

In [4]:
print(df_bus_stop_info.shape)

(4709, 5)


Let's again take a look at location's table.

In [5]:
df_locations = combine_bus_locations_within_hour(buses_locations_at_8)
df_locations = group_by_bus(df_locations)
print(df_locations)

      Lines  Brigade                                               Data
0       102      541  [{'Time': '2024-02-26 08:27:17', 'Lon': 21.041...
1       102      542  [{'Time': '2024-02-26 08:42:40', 'Lon': 21.042...
2       102      543  [{'Time': '2024-02-26 07:59:37', 'Lon': 21.041...
3       102      544  [{'Time': '2024-02-26 08:08:48', 'Lon': 21.042...
4       102      545  [{'Time': '2024-02-26 08:54:55', 'Lon': 21.042...
...     ...      ...                                                ...
1456    817        1  [{'Time': '2024-02-26 08:00:34', 'Lon': 20.800...
1457    817        2  [{'Time': '2024-02-26 07:59:33', 'Lon': 20.890...
1458    817        3  [{'Time': '2024-02-26 07:59:36', 'Lon': 20.950...
1459    850        1  [{'Time': '2024-02-26 08:22:08', 'Lon': 20.941...
1460    850        3  [{'Time': '2024-02-26 07:59:35', 'Lon': 20.938...

[1461 rows x 3 columns]


In [15]:
def analyze_punctuality_thread_executor(row, bus_stop_timetables, lat, lon):
    with ThreadPoolExecutor() as executor:
        result = executor.submit(analyze_punctuality_for_a_bus_stop, df_locations, row, bus_stop_timetables, lat, lon)
        return result.result()

df_bus_stop_info['delay'] = df_bus_stop_info.head(400).apply(lambda row: analyze_punctuality_thread_executor(row, 'rozklad', 'szer_geo', 'dlug_geo'), axis=1)

df_bus_stop_info = df_bus_stop_info.dropna(subset=['delay'], axis=0)
df_bus_stop_info = df_bus_stop_info[df_bus_stop_info['delay'] != -1]

print(df_bus_stop_info.head(55)['delay'])


0     5.570833
1     1.888889
2     9.433333
3     1.483333
4     1.179167
5    29.105556
6    33.977778
7     6.962281
8    11.216667
9    19.416667
Name: delay, dtype: float64


Now we will see the mean value of  (in minutes) of buses on x bus stops. 

In [16]:
print(df_bus_stop_info['delay'].mean())

12.023450292397662
