In [388]:
import numpy as np
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
from collections import defaultdict



In [389]:
df_trips = pd.read_csv("data/trips.txt")
df_routes = pd.read_csv("data/routes.txt")
df_stops = pd.read_csv("data/stops.txt")
df_stop_times = pd.read_csv("data/stop_times.txt")
df_pathways = pd.read_csv("data/pathways.txt")

# Egy megállóból elérhető szomszédok keresése

## Járatok

In [393]:
df_trips.head(5)

Unnamed: 0,route_id,trip_id,service_id,trip_headsign,direction_id,block_id,shape_id,wheelchair_accessible,bikes_allowed
0,8140,C0418310,C04183AHPMAA-011,"Soroksár, Molnár-sziget",0,C04183_8140_1_5,Y701,1.0,1.0
1,8140,C0418311,C04183AHPMAA-011,Csepel-Királyerdő,1,C04183_8140_1_6,Y702,1.0,1.0
2,8140,C0418312,C04183AHPMAA-011,"Soroksár, Molnár-sziget",0,C04183_8140_1_7,Y701,1.0,1.0
3,8140,C0418313,C04183AHPMAA-011,Csepel-Királyerdő,1,C04183_8140_1_8,Y702,1.0,1.0
4,8140,C0418314,C04183AHPMAA-011,"Soroksár, Molnár-sziget",0,C04183_8140_1_9,Y701,1.0,1.0


## Melyik járat, melyik megállóban mikor áll meg?

In [391]:
df_stop_times.head(5)


Unnamed: 0,trip_id,stop_id,arrival_time,departure_time,stop_sequence,stop_headsign,pickup_type,drop_off_type,shape_dist_traveled
0,C0418310,F04181,08:03:00,08:03:00,0,,,,0.0
1,C0418310,F04526,08:06:00,08:06:00,1,,,,160.0
2,C0418311,F04526,08:30:00,08:30:00,0,,,,0.0
3,C0418311,F04181,08:33:00,08:33:00,1,,,,160.0
4,C0418312,F04181,08:33:00,08:33:00,0,,,,0.0


## Most a string arrival_time-ből csináljunk számokat
Mert igy jóval gyorsabb lesz a lekérdezés

In [392]:
def str_time_to_min(astr):
    [h,m,s]=astr.split(':')
    return int(h)*60+int(m)


In [394]:
df_stop_times["arrival_time_min"]=df_stop_times["arrival_time"].apply(str_time_to_min)
df_stop_times["departure_time_min"]=df_stop_times["departure_time"].apply(str_time_to_min)

df_stop_times[["trip_id","stop_id","arrival_time","stop_sequence","arrival_time_min","departure_time_min"]].head(5)

Unnamed: 0,trip_id,stop_id,arrival_time,stop_sequence,arrival_time_min,departure_time_min
0,C0418310,F04181,08:03:00,0,483,483
1,C0418310,F04526,08:06:00,1,486,486
2,C0418311,F04526,08:30:00,0,510,510
3,C0418311,F04181,08:33:00,1,513,513
4,C0418312,F04181,08:33:00,0,513,513


Mennyi ideig kell sétálni?

In [395]:
def getminutestowalk(dst,walkspeed=3.5):
    # km/h to m / minutes
    walkspeed = walkspeed * 1000/60
    return dst / walkspeed

In [396]:
getminutestowalk(1000)

17.142857142857142

## Adott megállóban melyik járatok állnak meg X időpont után

"008152", # Mester utca / könyveskalman
"008163" # Újpest Városkapu
"F00191", # Margit híd, budai hídfő H
"F01083" # Király utca / Erzsébet körút
"F01755", # Örs vezér tere M+H'
"F01083" #  Oktogon M
"F02268", # KFKI
"009019" # Gödöllő

In [397]:
stop_id = "F00191"
#stop_id = "F00189"
arrival_time_min=str_time_to_min("9:01:00")
arrival_time_min

541

In [418]:
a=df_stops.query(f'stop_id == "{stop_id}"')["stop_name"]
stop_name=a.values[0]
stop_name

'Margit híd, budai hídfő H'

In [399]:
df_megallo_jaratok = pd.merge( df_stop_times.query(f'stop_id == "{stop_id}" and arrival_time_min > {arrival_time_min}'), df_trips, left_on="trip_id", right_on="trip_id").sort_values(["arrival_time_min"]).drop_duplicates("route_id")
df_megallo_jaratok["stop_id"]=stop_id
df_megallo_jaratok["stop_name"]=stop_name
df_megallo_jaratok[["stop_id","stop_name","trip_id", "route_id","arrival_time", "stop_headsign","stop_sequence"]]


Unnamed: 0,stop_id,stop_name,trip_id,route_id,arrival_time,stop_headsign,stop_sequence
36,F00191,"Margit híd, budai hídfő H",D0427910466,1910,09:02:00,,2
1102,F00191,"Margit híd, budai hídfő H",D063983577,2910,09:05:00,"Zugliget, Libegő",2
271,F00191,"Margit híd, budai hídfő H",D044333587,0910,09:15:00,Rózsadomb ► Széll Kálmán tér M,2
416,F00191,"Margit híd, budai hídfő H",D05242569,VP06,23:33:00,,11


In [400]:
df_megallo_jaratok2 = pd.merge( df_megallo_jaratok, df_routes, left_on="route_id", right_on="route_id")
df_megallo_jaratok2[["stop_id","stop_name","trip_id", "route_id","route_short_name", "arrival_time", "arrival_time_min", "stop_headsign","stop_sequence"]]

Unnamed: 0,stop_id,stop_name,trip_id,route_id,route_short_name,arrival_time,arrival_time_min,stop_headsign,stop_sequence
0,F00191,"Margit híd, budai hídfő H",D0427910466,1910,191,09:02:00,542,,2
1,F00191,"Margit híd, budai hídfő H",D063983577,2910,291,09:05:00,545,"Zugliget, Libegő",2
2,F00191,"Margit híd, budai hídfő H",D044333587,0910,91,09:15:00,555,Rózsadomb ► Széll Kálmán tér M,2
3,F00191,"Margit híd, budai hídfő H",D05242569,VP06,6,23:33:00,1413,,11


## Kezdjük el keresi a megálló szomszédait

### stop_times mintha erre lenne kitalálva, mert valójában egy trip későbbi megállóit keresem!

In [403]:
trip_id="D063983577"
#trip_id = "D015152828"
#trip_id = "D057685654"
arrival_time_min = 541

In [404]:
df_stop_times.query(f'trip_id == "{trip_id}" and arrival_time_min > {arrival_time_min}')

Unnamed: 0,trip_id,stop_id,arrival_time,departure_time,stop_sequence,stop_headsign,pickup_type,drop_off_type,shape_dist_traveled,arrival_time_min,departure_time_min
5512832,D063983577,F00929,09:02:00,09:02:00,1,"Zugliget, Libegő",,,560.0,542,542
5512833,D063983577,F00191,09:05:00,09:05:00,2,"Zugliget, Libegő",,,1640.0,545,545
5512834,D063983577,F00233,09:07:00,09:07:00,3,"Zugliget, Libegő",,,2131.0,547,547
5512835,D063983577,F00230,09:08:00,09:08:00,4,"Zugliget, Libegő",,,2414.0,548,548
5512836,D063983577,F00228,09:09:00,09:09:00,5,"Zugliget, Libegő",,,2756.0,549,549
5512837,D063983577,F00223,09:10:00,09:10:00,6,"Zugliget, Libegő",,,3130.0,550,550
5512838,D063983577,F00221,09:11:00,09:11:00,7,"Zugliget, Libegő",,,3497.0,551,551
5512839,D063983577,F00219,09:13:00,09:13:00,8,"Zugliget, Libegő",,,3942.0,553,553
5512840,D063983577,F00217,09:14:00,09:14:00,9,"Zugliget, Libegő",,,4190.0,554,554
5512841,D063983577,F00213,09:15:00,09:15:00,10,"Zugliget, Libegő",,,4427.0,555,555


# Járattal elérhető szomszédok meghatározása

Először valamilyen ciklusra gondoltam, de az inner join sokkal jobb

Előről kezdem és megpróbálom a részfolyamatokat összeilleszteni

In [405]:
stop_id = "F01034"
stop_id = "F00191"
#stop_id = "F00189"
arrival_time_min=str_time_to_min("9:01:00")
arrival_time_min

541

In [406]:
a=df_stops.query(f'stop_id == "{stop_id}"')["stop_name"]
stop_name=a.values[0]
stop_name

'Margit híd, budai hídfő H'

Melyik járatok állnak meg itt X időpont után:

In [407]:
df_megallo_jaratok = pd.merge( df_stop_times.query(f'stop_id == "{stop_id}" and arrival_time_min > {arrival_time_min}'), df_trips, left_on="trip_id", right_on="trip_id").sort_values(["arrival_time_min"])
df_megallo_jaratok


Unnamed: 0,trip_id,stop_id,arrival_time,departure_time,stop_sequence,stop_headsign,pickup_type,drop_off_type,shape_dist_traveled,arrival_time_min,departure_time_min,route_id,service_id,trip_headsign,direction_id,block_id,shape_id,wheelchair_accessible,bikes_allowed
36,D0427910466,F00191,09:02:00,09:02:00,2,,,,1640.0,542,542,1910,D04279K1HPJOP-021,Szemlőhegy,0,D04279_0910_9_4,ZC97,1.0,2.0
699,D0576812124,F00191,09:02:00,09:02:00,2,Szemlőhegy,,,1640.0,542,542,1910,D05768AHCJOP-011,Szemlőhegy,0,D05768_0910_9_4,ZC97,1.0,2.0
564,D0576712124,F00191,09:02:00,09:02:00,2,Szemlőhegy,,,1640.0,542,542,1910,D05767AHCJOP-011,Szemlőhegy,0,D05767_0910_9_4,ZC97,1.0,2.0
1102,D063983577,F00191,09:05:00,09:05:00,2,"Zugliget, Libegő",,,1640.0,545,545,2910,D06398RAAVVRR-0011,"Zugliget, Libegő",0,D06398_0910_1_13,CZ60,1.0,2.0
159,D043853577,F00191,09:05:00,09:05:00,2,"Zugliget, Libegő",,,1640.0,545,545,2910,D04385ASZJOP-011,"Zugliget, Libegő",0,D04385_0910_1_13,CZ60,1.0,2.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
33,C989061775,F00191,28:07:00,28:07:00,15,Széll Kálmán tér M,,,7146.0,1687,1687,VP06,C98906REHCBR-0011,Széll Kálmán tér M,0,C98906_VP06_33_6,RM56,1.0,2.0
450,D053951775,F00191,28:07:00,28:07:00,15,,,,7146.0,1687,1687,VP06,D05395RA2HCWA-0011,Széll Kálmán tér M,0,D05395_VP06_33_7,RM56,1.0,2.0
34,C989061779,F00191,28:22:00,28:22:00,15,Széll Kálmán tér M,,,7146.0,1702,1702,VP06,C98906REHCBR-0011,Széll Kálmán tér M,0,C98906_VP06_3_6,RM56,1.0,2.0
451,D053951779,F00191,28:22:00,28:22:00,15,,,,7146.0,1702,1702,VP06,D05395RA2HCWA-0011,Széll Kálmán tér M,0,D05395_VP06_21_7,RM56,1.0,2.0


In [408]:
df_megallo_jaratok = pd.merge( df_stop_times.query(f'stop_id == "{stop_id}" and arrival_time_min > {arrival_time_min}'), df_trips, left_on="trip_id", right_on="trip_id").sort_values(["arrival_time_min"]).drop_duplicates("route_id")


In [409]:
df_megallo_jaratok

Unnamed: 0,trip_id,stop_id,arrival_time,departure_time,stop_sequence,stop_headsign,pickup_type,drop_off_type,shape_dist_traveled,arrival_time_min,departure_time_min,route_id,service_id,trip_headsign,direction_id,block_id,shape_id,wheelchair_accessible,bikes_allowed
36,D0427910466,F00191,09:02:00,09:02:00,2,,,,1640.0,542,542,1910,D04279K1HPJOP-021,Szemlőhegy,0,D04279_0910_9_4,ZC97,1.0,2.0
1102,D063983577,F00191,09:05:00,09:05:00,2,"Zugliget, Libegő",,,1640.0,545,545,2910,D06398RAAVVRR-0011,"Zugliget, Libegő",0,D06398_0910_1_13,CZ60,1.0,2.0
271,D044333587,F00191,09:15:00,09:15:00,2,Rózsadomb ► Széll Kálmán tér M,,,1640.0,555,555,0910,D04433AVVJOP-011,Széll Kálmán tér M,0,D04433_0910_5_6,RA90,1.0,2.0
416,D05242569,F00191,23:33:00,23:33:00,11,,,,5225.0,1413,1413,VP06,D05242RA2VVWA-0011,Széll Kálmán tér M,0,D05242_0850_42_0,SM71,1.0,2.0


In [410]:
#df_megallo_jaratok["stop_id"]=stop_id
df_megallo_jaratok["stop_name"]=stop_name
df_megallo_jaratok[["stop_id","stop_name","trip_id", "route_id","arrival_time", "arrival_time_min", "stop_headsign","stop_sequence"]]

Unnamed: 0,stop_id,stop_name,trip_id,route_id,arrival_time,arrival_time_min,stop_headsign,stop_sequence
36,F00191,"Margit híd, budai hídfő H",D0427910466,1910,09:02:00,542,,2
1102,F00191,"Margit híd, budai hídfő H",D063983577,2910,09:05:00,545,"Zugliget, Libegő",2
271,F00191,"Margit híd, budai hídfő H",D044333587,0910,09:15:00,555,Rózsadomb ► Széll Kálmán tér M,2
416,F00191,"Margit híd, budai hídfő H",D05242569,VP06,23:33:00,1413,,11


Kozmetika, hogy legyen járatszám amit értek is

In [411]:
df_megallo_jaratok2 = pd.merge( df_megallo_jaratok, df_routes[["route_id","route_short_name"]], left_on="route_id", right_on="route_id")
df_megallo_jaratok2[["stop_id","stop_name","trip_id", "route_id","route_short_name", "arrival_time", "arrival_time_min", "stop_headsign","stop_sequence"]]

Unnamed: 0,stop_id,stop_name,trip_id,route_id,route_short_name,arrival_time,arrival_time_min,stop_headsign,stop_sequence
0,F00191,"Margit híd, budai hídfő H",D0427910466,1910,191,09:02:00,542,,2
1,F00191,"Margit híd, budai hídfő H",D063983577,2910,291,09:05:00,545,"Zugliget, Libegő",2
2,F00191,"Margit híd, budai hídfő H",D044333587,0910,91,09:15:00,555,Rózsadomb ► Széll Kálmán tér M,2
3,F00191,"Margit híd, budai hídfő H",D05242569,VP06,6,23:33:00,1413,,11


### Szomszédokat egy left join-nal fogok keresni

In [412]:
df_szomszedok = pd.merge( df_megallo_jaratok2, df_stop_times[["trip_id","stop_id","arrival_time_min","arrival_time","departure_time","departure_time_min"]], how="left", left_on="trip_id", right_on="trip_id")
df_szomszedok.columns

Index(['trip_id', 'stop_id_x', 'arrival_time_x', 'departure_time_x',
       'stop_sequence', 'stop_headsign', 'pickup_type', 'drop_off_type',
       'shape_dist_traveled', 'arrival_time_min_x', 'departure_time_min_x',
       'route_id', 'service_id', 'trip_headsign', 'direction_id', 'block_id',
       'shape_id', 'wheelchair_accessible', 'bikes_allowed', 'stop_name',
       'route_short_name', 'stop_id_y', 'arrival_time_min_y', 'arrival_time_y',
       'departure_time_y', 'departure_time_min_y'],
      dtype='object')

Túl korai járatokat törlöm

In [413]:
df_szomszedok.rename(columns={"stop_name": "stop_name_x", "arrival_time_y":"arrival_time", "arrival_time_min_y":"arrival_time_min", "stop_id_y": "stop_id", }, inplace=True)

In [356]:
#indices_to_drop = df_szomszedok [ df_szomszedok["arrival_time_min"] <= arrival_time_min ].index
#df_szomszedok.drop( indices_to_drop, inplace= True)

Saját magamat sem akarom látni mint szomszédot

In [357]:
#indices_to_drop = df_szomszedok [ df_szomszedok["stop_id_x"] == df_szomszedok["stop_id"] ].index
#df_szomszedok.drop( indices_to_drop, inplace= True)

In [414]:
df_szomszedok[["trip_id","stop_id_x","arrival_time_x","stop_name_x","stop_id","arrival_time_min"]]

Unnamed: 0,trip_id,stop_id_x,arrival_time_x,stop_name_x,stop_id,arrival_time_min
0,D0427910466,F00191,09:02:00,"Margit híd, budai hídfő H",F01028,537
1,D0427910466,F00191,09:02:00,"Margit híd, budai hídfő H",F00929,538
2,D0427910466,F00191,09:02:00,"Margit híd, budai hídfő H",F00191,542
3,D0427910466,F00191,09:02:00,"Margit híd, budai hídfő H",F00233,543
4,D0427910466,F00191,09:02:00,"Margit híd, budai hídfő H",049903,544
...,...,...,...,...,...,...
58,D05242569,F00191,23:33:00,"Margit híd, budai hídfő H",F00929,1411
59,D05242569,F00191,23:33:00,"Margit híd, budai hídfő H",F00191,1413
60,D05242569,F00191,23:33:00,"Margit híd, budai hídfő H",F00197,1415
61,D05242569,F00191,23:33:00,"Margit híd, budai hídfő H",F00306,1416


In [415]:
df_szomszedok2 = pd.merge(df_szomszedok, df_stops[["stop_id","stop_name","stop_lat","stop_lon"]], how = "left", left_on="stop_id", right_on="stop_id")
df_szomszedok2.columns

Index(['trip_id', 'stop_id_x', 'arrival_time_x', 'departure_time_x',
       'stop_sequence', 'stop_headsign', 'pickup_type', 'drop_off_type',
       'shape_dist_traveled', 'arrival_time_min_x', 'departure_time_min_x',
       'route_id', 'service_id', 'trip_headsign', 'direction_id', 'block_id',
       'shape_id', 'wheelchair_accessible', 'bikes_allowed', 'stop_name_x',
       'route_short_name', 'stop_id', 'arrival_time_min', 'arrival_time',
       'departure_time_y', 'departure_time_min_y', 'stop_name', 'stop_lat',
       'stop_lon'],
      dtype='object')

In [420]:
df_szomszedok2[["trip_id","route_short_name", "stop_id_x","stop_name_x","departure_time_x","arrival_time_x", "arrival_time_min_x", "stop_id", "stop_name", "arrival_time","arrival_time_min", "stop_lon","stop_lat"]]


Unnamed: 0,trip_id,route_short_name,stop_id_x,stop_name_x,departure_time_x,arrival_time_x,arrival_time_min_x,stop_id,stop_name,arrival_time,arrival_time_min,stop_lon,stop_lat
2,D0427910466,191,F00191,"Margit híd, budai hídfő H",09:02:00,09:02:00,542,F00191,"Margit híd, budai hídfő H",09:02:00,542,19.03665,47.51468
3,D0427910466,191,F00191,"Margit híd, budai hídfő H",09:02:00,09:02:00,542,F00233,Apostol utca,09:03:00,543,19.033212,47.514635
4,D0427910466,191,F00191,"Margit híd, budai hídfő H",09:02:00,09:02:00,542,049903,Mansfeld Péter park,09:04:00,544,19.033228,47.517064
7,D063983577,291,F00191,"Margit híd, budai hídfő H",09:05:00,09:05:00,545,F00191,"Margit híd, budai hídfő H",09:05:00,545,19.03665,47.51468
8,D063983577,291,F00191,"Margit híd, budai hídfő H",09:05:00,09:05:00,545,F00233,Apostol utca,09:07:00,547,19.033212,47.514635
9,D063983577,291,F00191,"Margit híd, budai hídfő H",09:05:00,09:05:00,545,F00230,Zivatar utca,09:08:00,548,19.031714,47.515607
10,D063983577,291,F00191,"Margit híd, budai hídfő H",09:05:00,09:05:00,545,F00228,Szemlőhegy utca,09:09:00,549,19.027798,47.516919
11,D063983577,291,F00191,"Margit híd, budai hídfő H",09:05:00,09:05:00,545,F00223,Mandula utca,09:10:00,550,19.028845,47.519816
12,D063983577,291,F00191,"Margit híd, budai hídfő H",09:05:00,09:05:00,545,F00221,Vérhalom tér,09:11:00,551,19.02514,47.520903
13,D063983577,291,F00191,"Margit híd, budai hídfő H",09:05:00,09:05:00,545,F00219,Cimbalom utca,09:13:00,553,19.023001,47.524347


In [417]:
indices_to_drop = df_szomszedok2 [ df_szomszedok2["arrival_time_min_x"] > df_szomszedok2["arrival_time_min"] ].index
df_szomszedok2.drop( indices_to_drop, inplace= True)

Itt nézd

In [422]:
df_szomszedok2[["trip_id","route_short_name", "stop_id_x","stop_name_x","departure_time_x", "arrival_time_x","stop_id", "stop_name", "arrival_time","stop_lon","stop_lat"]]

Unnamed: 0,trip_id,route_short_name,stop_id_x,stop_name_x,departure_time_x,arrival_time_x,stop_id,stop_name,arrival_time,stop_lon,stop_lat
2,D0427910466,191,F00191,"Margit híd, budai hídfő H",09:02:00,09:02:00,F00191,"Margit híd, budai hídfő H",09:02:00,19.03665,47.51468
3,D0427910466,191,F00191,"Margit híd, budai hídfő H",09:02:00,09:02:00,F00233,Apostol utca,09:03:00,19.033212,47.514635
4,D0427910466,191,F00191,"Margit híd, budai hídfő H",09:02:00,09:02:00,049903,Mansfeld Péter park,09:04:00,19.033228,47.517064
7,D063983577,291,F00191,"Margit híd, budai hídfő H",09:05:00,09:05:00,F00191,"Margit híd, budai hídfő H",09:05:00,19.03665,47.51468
8,D063983577,291,F00191,"Margit híd, budai hídfő H",09:05:00,09:05:00,F00233,Apostol utca,09:07:00,19.033212,47.514635
9,D063983577,291,F00191,"Margit híd, budai hídfő H",09:05:00,09:05:00,F00230,Zivatar utca,09:08:00,19.031714,47.515607
10,D063983577,291,F00191,"Margit híd, budai hídfő H",09:05:00,09:05:00,F00228,Szemlőhegy utca,09:09:00,19.027798,47.516919
11,D063983577,291,F00191,"Margit híd, budai hídfő H",09:05:00,09:05:00,F00223,Mandula utca,09:10:00,19.028845,47.519816
12,D063983577,291,F00191,"Margit híd, budai hídfő H",09:05:00,09:05:00,F00221,Vérhalom tér,09:11:00,19.02514,47.520903
13,D063983577,291,F00191,"Margit híd, budai hídfő H",09:05:00,09:05:00,F00219,Cimbalom utca,09:13:00,19.023001,47.524347


In [425]:
df_szomszedok2[["stop_id", "trip_id", "route_short_name", "stop_name", "departure_time_x","arrival_time","stop_lon","stop_lat"]].values

array([['F00191', 'D0427910466', '191', 'Margit híd, budai hídfő H',
        '09:02:00', '09:02:00', 19.03665, 47.51468],
       ['F00233', 'D0427910466', '191', 'Apostol utca', '09:02:00',
        '09:03:00', 19.033212, 47.514635],
       ['049903', 'D0427910466', '191', 'Mansfeld Péter park',
        '09:02:00', '09:04:00', 19.033228, 47.517064],
       ['F00191', 'D063983577', '291', 'Margit híd, budai hídfő H',
        '09:05:00', '09:05:00', 19.03665, 47.51468],
       ['F00233', 'D063983577', '291', 'Apostol utca', '09:05:00',
        '09:07:00', 19.033212, 47.514635],
       ['F00230', 'D063983577', '291', 'Zivatar utca', '09:05:00',
        '09:08:00', 19.031714, 47.515607],
       ['F00228', 'D063983577', '291', 'Szemlőhegy utca', '09:05:00',
        '09:09:00', 19.027798, 47.516919],
       ['F00223', 'D063983577', '291', 'Mandula utca', '09:05:00',
        '09:10:00', 19.028845, 47.519816],
       ['F00221', 'D063983577', '291', 'Vérhalom tér', '09:05:00',
        '09:11:00'

In [426]:
#retdict = defaultdict(list)
retdict=df_szomszedok2[["stop_id", "trip_id", "route_short_name", "stop_name", "departure_time_min_x", "arrival_time_min","stop_lon","stop_lat"]].to_dict('records')
retdict

[{'stop_id': 'F00191',
  'trip_id': 'D0427910466',
  'route_short_name': '191',
  'stop_name': 'Margit híd, budai hídfő H',
  'departure_time_min_x': 542,
  'arrival_time_min': 542,
  'stop_lon': 19.03665,
  'stop_lat': 47.51468},
 {'stop_id': 'F00233',
  'trip_id': 'D0427910466',
  'route_short_name': '191',
  'stop_name': 'Apostol utca',
  'departure_time_min_x': 542,
  'arrival_time_min': 543,
  'stop_lon': 19.033212,
  'stop_lat': 47.514635},
 {'stop_id': '049903',
  'trip_id': 'D0427910466',
  'route_short_name': '191',
  'stop_name': 'Mansfeld Péter park',
  'departure_time_min_x': 542,
  'arrival_time_min': 544,
  'stop_lon': 19.033228,
  'stop_lat': 47.517064},
 {'stop_id': 'F00191',
  'trip_id': 'D063983577',
  'route_short_name': '291',
  'stop_name': 'Margit híd, budai hídfő H',
  'departure_time_min_x': 545,
  'arrival_time_min': 545,
  'stop_lon': 19.03665,
  'stop_lat': 47.51468},
 {'stop_id': 'F00233',
  'trip_id': 'D063983577',
  'route_short_name': '291',
  'stop_name'

## Keressük meg egy útvonalon a megállók neveit

Megoldási terv: Keresek egy routet-t és az első trip-et benne.

Ahhoz a triphez lekérdezzük a megállókat.
Utána majd mellétesszük a neveket is



In [365]:
df_trips.groupby(["route_id"]).agg({"route_id":"first","trip_id":"first", "trip_headsign":"first"})

Unnamed: 0_level_0,route_id,trip_id,trip_headsign
route_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0050,0050,D0166810,Pasaréti tér
0070,0070,D06142101,"Újpalota, Nyírpalota út"
0075,0075,D0172110,Blaha Lujza tér M
0078,0078,C755691,"Újpalota, Nyírpalota út"
0085,0085,D0166110,Kelenföld vasútállomás M
...,...,...,...
VP41,VP41,D056341087,Kamaraerdei Ifj. Park
VP50,VP50,D064891,"Pestszentlőrinc, Béke tér"
VP52,VP52,D048871,"P.erzsébet, Pacsirtatelep"
VP60,VP60,C937781,Széchenyi-hegy


### Melyik megállókban áll meg a 4-6-os ?

#### Mikor indul reggel a 4-6-os

A 3060-as járat első indulását keresem.
A groupby és agg funckiók miatt fontos

In [366]:
df_trips[ df_trips["route_id"].isin(["3060"]) ].groupby(["route_id"]).agg({"route_id":"first","trip_id":"first", "trip_headsign":"first"})["trip_id"]

route_id
3060    D015151403
Name: trip_id, dtype: object

Megvan a trip_id

In [367]:
df_stop_times[ df_stop_times["trip_id"] == "D015151066"]

Unnamed: 0,trip_id,stop_id,arrival_time,departure_time,stop_sequence,stop_headsign,pickup_type,drop_off_type,shape_dist_traveled,arrival_time_min,departure_time_min
1428865,D015151066,F01998,05:47:00,05:47:00,0,Széll Kálmán tér M,,,0.0,347,347
1428866,D015151066,F01992,05:49:00,05:49:00,1,Széll Kálmán tér M,,,544.0,349,349
1428867,D015151066,F02225,05:50:00,05:50:00,2,Széll Kálmán tér M,,,1087.0,350,350
1428868,D015151066,F01374,05:51:00,05:51:00,3,Széll Kálmán tér M,,,1750.0,351,351
1428869,D015151066,F01380,05:53:00,05:53:00,4,Széll Kálmán tér M,,,2073.0,353,353
1428870,D015151066,F01191,05:55:00,05:55:00,5,Széll Kálmán tér M,,,2395.0,355,355
1428871,D015151066,F01199,05:57:00,05:57:00,6,Széll Kálmán tér M,,,2948.0,357,357
1428872,D015151066,F01200,05:58:00,05:58:00,7,Széll Kálmán tér M,,,3233.0,358,358
1428873,D015151066,F01168,06:00:00,06:00:00,8,Széll Kálmán tér M,,,3677.0,360,360
1428874,D015151066,F01111,06:01:00,06:01:00,9,Széll Kálmán tér M,,,4109.0,361,361


### Most tegyük mellé a neveket is!

A hátsó stop_name oszlopban látszik

In [368]:
pd.merge( df_stop_times[ df_stop_times["trip_id"] == "D015151066"], df_stops, on="stop_id")

Unnamed: 0,trip_id,stop_id,arrival_time,departure_time,stop_sequence,stop_headsign,pickup_type,drop_off_type,shape_dist_traveled,arrival_time_min,departure_time_min,stop_name,stop_lat,stop_lon,stop_code,location_type,location_sub_type,parent_station,wheelchair_boarding
0,D015151066,F01998,05:47:00,05:47:00,0,Széll Kálmán tér M,,,0.0,347,347,Újbuda-központ M,47.474042,19.046862,F01998,,,,1.0
1,D015151066,F01992,05:49:00,05:49:00,1,Széll Kálmán tér M,,,544.0,349,349,Budafoki út / Szerémi sor,47.474114,19.053747,F01992,,,,1.0
2,D015151066,F02225,05:50:00,05:50:00,2,Széll Kálmán tér M,,,1087.0,350,350,"Petőfi híd, budai hídfő",47.476956,19.059572,F02225,,,,1.0
3,D015151066,F01374,05:51:00,05:51:00,3,Széll Kálmán tér M,,,1750.0,351,351,Boráros tér H,47.4804,19.066697,F01374,,,,2.0
4,D015151066,F01380,05:53:00,05:53:00,4,Széll Kálmán tér M,,,2073.0,353,353,Mester utca / Ferenc körút,47.482775,19.068848,F01380,,,,1.0
5,D015151066,F01191,05:55:00,05:55:00,5,Széll Kálmán tér M,,,2395.0,355,355,Corvin-negyed M,47.4855,19.069924,F01191,,,,1.0
6,D015151066,F01199,05:57:00,05:57:00,6,Széll Kálmán tér M,,,2948.0,357,357,Harminckettesek tere,47.490357,19.070841,F01199,,,,1.0
7,D015151066,F01200,05:58:00,05:58:00,7,Széll Kálmán tér M,,,3233.0,358,358,Rákóczi tér M,47.492866,19.071187,F01200,,,,1.0
8,D015151066,F01168,06:00:00,06:00:00,8,Széll Kálmán tér M,,,3677.0,360,360,Blaha Lujza tér M,47.49677,19.070725,F01168,,,,1.0
9,D015151066,F01111,06:01:00,06:01:00,9,Széll Kálmán tér M,,,4109.0,361,361,Wesselényi utca / Erzsébet körút,47.50035,19.068934,F01111,,,,1.0


# Kódrészlet ami egy vonal megállóit keresi meg
Most egy kis kódrészlet keresi nekem az adott vonal megállóit.
Ezt lehet majd beletenni egy függvénybe

In [369]:
#route_id = "3060"
#route_id="3040"
route_id="0090"
elso_trip = df_trips[ df_trips["route_id"] == route_id ].groupby(["route_id"]).agg({"route_id":"first","trip_id":"first", "trip_headsign":"first"})["trip_id"].iloc[0]
a=pd.merge( df_stop_times[ df_stop_times["trip_id"] == elso_trip], df_stops, on="stop_id")
a["route_id"]=route_id
a[["route_id","trip_id","stop_id","stop_name","arrival_time","departure_time","stop_lat","stop_lon"]]

Unnamed: 0,route_id,trip_id,stop_id,stop_name,arrival_time,departure_time,stop_lat,stop_lon
0,90,D033981412,F01636,Kőbánya alsó vasútállomás,04:36:00,04:36:00,47.483652,19.127839
1,90,D033981412,F01653,Szent László tér,04:37:00,04:37:00,47.485925,19.131093
2,90,D033981412,F01641,Liget tér,04:39:00,04:39:00,47.482795,19.130358
3,90,D033981412,F01571,Kőbánya alsó vasútállomás (Mázsa tér),04:40:00,04:40:00,47.482879,19.125715
4,90,D033981412,F01563,Egészségház,04:42:00,04:42:00,47.484126,19.116668
5,90,D033981412,F01733,Eiffel Műhelyház,04:43:00,04:43:00,47.485136,19.112914
6,90,D033981412,F01267,Kőbányai út / Könyves Kálmán körút,04:45:00,04:45:00,47.48674,19.106971
7,90,D033981412,008036,Kőbányai út 31.,04:45:00,04:45:00,47.487461,19.104252
8,90,D033981412,F01187,Orczy tér,04:47:00,04:48:00,47.48947,19.090495
9,90,D033981412,F01223,Kálvária tér,04:49:00,04:49:00,47.48889,19.085146


## Melyik járatok állnak meg ebben a megállóban ?

In [370]:
df_stop_times[ df_stop_times["stop_id"] == "048506"]

Unnamed: 0,trip_id,stop_id,arrival_time,departure_time,stop_sequence,stop_headsign,pickup_type,drop_off_type,shape_dist_traveled,arrival_time_min,departure_time_min
548787,C96610106,048506,07:27:00,07:27:00,9,Zugló vasútállomás,,,3713.0,447,447
548839,C96610111,048506,07:37:00,07:37:00,9,Zugló vasútállomás,,,3713.0,457,457
548889,C96610116,048506,07:44:00,07:44:00,9,Zugló vasútállomás,,,3713.0,464,464
548941,C96610121,048506,07:52:00,07:52:00,9,Zugló vasútállomás,,,3713.0,472,472
548966,C966101221,048506,16:35:00,16:35:00,9,Zugló vasútállomás,,,3713.0,995,995
...,...,...,...,...,...,...,...,...,...,...,...
5688598,D066221107,048506,03:09:00,03:09:00,42,"Rákospalota, Székely Elek út",,,20957.0,189,189
5688676,D066221115,048506,03:30:00,03:30:00,42,"Rákospalota, Kossuth utca",,,20957.0,210,210
5688727,D066221125,048506,03:50:00,03:50:00,26,,,,13366.0,230,230
5688784,D066221131,048506,04:09:00,04:09:00,42,Újpest-központ M,,,20957.0,249,249


Látszik, hogy jó sok indulás van ebből a megállóból

## Adatvizsgálat: hogyan lesznek meg a fő megállók?


In [371]:
stop_id = "048506" # Szent István Bazilika
#stop_id = "F01029" # Nyugati
parent_stop_id = df_stops[ df_stops["stop_id"] == stop_id]["parent_station"].iloc[0]
parent_stop_id

nan

In [372]:
if parent_stop_id is not np.nan:
        stop_id = parent_stop_id
stop_name = df_stops[ df_stops["stop_id"] == stop_id]["stop_name"].iloc[0]
stop_name

'Szent István Bazilika'

Itt látszik, hogy ez igy nem lesz OK. Az adatok nem tartalmaznak információt arra, hogy melyik az al, vagy melyik a fő megálló

## Melyik útvonalak állnak meg ebben a megállóban?


In [373]:
df_megallo_jaratok = pd.merge( df_stop_times[ df_stop_times["stop_id"] == stop_id], df_trips, left_on="trip_id", right_on="trip_id").drop_duplicates("route_id")
df_megallo_jaratok["stop_id"]=stop_id
df_megallo_jaratok["stop_name"]=stop_name

df_megallo_jaratok[["stop_id","stop_name","route_id","stop_headsign","stop_sequence"]]


Unnamed: 0,stop_id,stop_name,route_id,stop_headsign,stop_sequence
0,48506,Szent István Bazilika,4720,Zugló vasútállomás,9
544,48506,Szent István Bazilika,9140,"Káposztásmegyer, Mogyoródi-patak",30
558,48506,Szent István Bazilika,9310,Nyugati pályaudvar M ► Zöldmál,2
581,48506,Szent István Bazilika,9141,Újpest-központ M,12
607,48506,Szent István Bazilika,9500,"Rákospalota, Székely Elek út",42
608,48506,Szent István Bazilika,9501,,42
635,48506,Szent István Bazilika,90,"Óbuda, Bogdáni út",17


## Sajnos nem világos miért nem lehet két mezőt könnyen selectezni logikai operátorokkal

In [374]:
#df_megallo_jaratok = pd.merge( df_stop_times[ df_stop_times["stop_id"] == stop_id and df_stop_times["arrival_time_min"] > 60*8+15 ] , df_trips, left_on="trip_id", right_on="trip_id").drop_duplicates("route_id")
#df_megallo_jaratok["stop_id"]=stop_id
#df_megallo_jaratok["stop_name"]=stop_name

#df_megallo_jaratok[["stop_id","stop_name","route_id","stop_headsign","stop_sequence"]]

In [375]:
 a = pd.Series(df_stop_times["stop_id"] == stop_id)
 a

0          False
1          False
2          False
3          False
4          False
           ...  
5752577    False
5752578    False
5752579    False
5752580    False
5752581    False
Name: stop_id, Length: 5752582, dtype: bool

In [376]:
 b = pd.Series(df_stop_times["arrival_time_min"] > 60*8+15 )
 b

0          False
1          False
2           True
3           True
4           True
           ...  
5752577     True
5752578     True
5752579     True
5752580     True
5752581     True
Name: arrival_time_min, Length: 5752582, dtype: bool

In [377]:
a & b

0          False
1          False
2          False
3          False
4          False
           ...  
5752577    False
5752578    False
5752579    False
5752580    False
5752581    False
Length: 5752582, dtype: bool

## Melyik járatok állnak meg ebben a megállóban reggel 9:00 körül (1)

In [378]:
df_megallo_jaratok = pd.merge( df_stop_times.query(f'stop_id == "{stop_id}" and arrival_time.str.startswith("09").values'), df_trips, left_on='trip_id', right_on='trip_id').sort_values(["arrival_time"]).drop_duplicates("route_id")
df_megallo_jaratok["stop_id"]=stop_id
df_megallo_jaratok["stop_name"]=stop_name

df_megallo_jaratok[["stop_id","stop_name","route_id","arrival_time", "arrival_time_min","stop_headsign","stop_sequence"]]

Unnamed: 0,stop_id,stop_name,route_id,arrival_time,arrival_time_min,stop_headsign,stop_sequence
54,48506,Szent István Bazilika,90,09:00:00,540,"Óbuda, Bogdáni út",17
18,48506,Szent István Bazilika,4720,09:00:00,540,Zugló vasútállomás,9


## Melyik járatok állnak meg ebben a megállóban reggel 10:00 körül (2)


In [379]:
df_megallo_jaratok = pd.merge( df_stop_times.query(f'stop_id == "{stop_id}" and arrival_time > "10:00:00"'), df_trips, left_on="trip_id", right_on="trip_id").sort_values(["arrival_time"]).drop_duplicates("route_id")
df_megallo_jaratok["stop_id"]=stop_id
df_megallo_jaratok["stop_name"]=stop_name

df_megallo_jaratok[["stop_id","stop_name","route_id","arrival_time", "arrival_time_min", "stop_headsign","stop_sequence"]]

Unnamed: 0,stop_id,stop_name,route_id,arrival_time,arrival_time_min,stop_headsign,stop_sequence
3,48506,Szent István Bazilika,4720,10:01:00,601,Zugló vasútállomás,9
1093,48506,Szent István Bazilika,90,10:06:00,606,"Óbuda, Bogdáni út",17
401,48506,Szent István Bazilika,9500,24:09:00,1449,"Rákospalota, Székely Elek út",12
486,48506,Szent István Bazilika,9140,24:19:00,1459,"Káposztásmegyer, Mogyoródi-patak",30


## Most inkább nem string-et keresünk, hanem számot
Kétszer olyan gyors lett a futtatás

In [380]:
df_megallo_jaratok = pd.merge( df_stop_times.query(f'stop_id == "{stop_id}" and arrival_time_min > 60*10'), df_trips, left_on="trip_id", right_on="trip_id").sort_values(["arrival_time"]).drop_duplicates("route_id")
df_megallo_jaratok["stop_id"]=stop_id
df_megallo_jaratok["stop_name"]=stop_name

df_megallo_jaratok[["stop_id","stop_name","route_id","arrival_time", "arrival_time_min", "stop_headsign","stop_sequence"]]

Unnamed: 0,stop_id,stop_name,route_id,arrival_time,arrival_time_min,stop_headsign,stop_sequence
3,48506,Szent István Bazilika,4720,10:01:00,601,Zugló vasútállomás,9
1093,48506,Szent István Bazilika,90,10:06:00,606,"Óbuda, Bogdáni út",17
401,48506,Szent István Bazilika,9500,24:09:00,1449,"Rákospalota, Székely Elek út",12
486,48506,Szent István Bazilika,9140,24:19:00,1459,"Káposztásmegyer, Mogyoródi-patak",30


## Most kiegészítjük az útvonal azonosítóval is (járatszám)

Ez még egy join

In [381]:
df_megallo_jaratok2 = pd.merge( df_megallo_jaratok, df_routes, left_on="route_id", right_on="route_id")
df_megallo_jaratok2[["stop_id","stop_name","route_id","arrival_time", "route_short_name", "stop_headsign","stop_sequence"]]

Unnamed: 0,stop_id,stop_name,route_id,arrival_time,route_short_name,stop_headsign,stop_sequence
0,48506,Szent István Bazilika,4720,10:01:00,72,Zugló vasútállomás,9
1,48506,Szent István Bazilika,90,10:06:00,9,"Óbuda, Bogdáni út",17
2,48506,Szent István Bazilika,9500,24:09:00,950,"Rákospalota, Székely Elek út",12
3,48506,Szent István Bazilika,9140,24:19:00,914,"Káposztásmegyer, Mogyoródi-patak",30


In [382]:
stop_id = "F00933"
stop_id = "F01029"

parent_stop_id = df_stops[ df_stops["stop_id"] == stop_id]["parent_station"].iloc[0]
parent_stop_id

nan

Melyek azok az állomások amiknek van főmegállója?

# KÖZELI ÁTSZÁLLÁSI PONTOK

Ez úgy tűnik földrajzi koordináták alapján kell megkeresni.

In [383]:

# Create a list of Point objects
points = [Point(xy) for xy in zip(df_stops['stop_lon'], df_stops['stop_lat'])]

# Create a GeoSeries from the list of points with the correct CRS
gs = gpd.GeoSeries(points, crs='EPSG:4326')

gdf_stops = gpd.GeoDataFrame(df_stops, geometry=gs, crs="EPSG:4326")
gdf_stops.head(5)

Unnamed: 0,stop_id,stop_name,stop_lat,stop_lon,stop_code,location_type,location_sub_type,parent_station,wheelchair_boarding,geometry
0,2133,"Örs vezér tere M+H, déli tárolótér",47.500366,19.1357,2133,,,,,POINT (19.1357 47.50037)
1,2138,Kőbánya alsó vasútállomás,47.483139,19.127891,2138,,,,2.0,POINT (19.12789 47.48314)
2,3002,Puskás Ferenc Stadion M,47.500368,19.103406,3002,,,,,POINT (19.10341 47.50037)
3,4716,"ÉD metró járműtelep,porta",47.469651,19.12909,4716,,,,2.0,POINT (19.12909 47.46965)
4,4948,Metró ÉD járműtelep (kapu),47.465239,19.142612,4948,,,,,POINT (19.14261 47.46524)


Az EPSG:4326 nem jó távolságmérésre, mert nem tudom miért.
De a 3035 az jó, Németországhoz kötött valami. Ezért átmegyek arra:

In [384]:
gdf_stops.to_crs("epsg:3035",inplace=True)
gdf_stops.head(5)

Unnamed: 0,stop_id,stop_name,stop_lat,stop_lon,stop_code,location_type,location_sub_type,parent_station,wheelchair_boarding,geometry
0,2133,"Örs vezér tere M+H, déli tárolótér",47.500366,19.1357,2133,,,,,POINT (5007781.301 2752100.781)
1,2138,Kőbánya alsó vasútállomás,47.483139,19.127891,2138,,,,2.0,POINT (5007426.257 2750125.866)
2,3002,Puskás Ferenc Stadion M,47.500368,19.103406,3002,,,,,POINT (5005367.638 2751801.574)
3,4716,"ÉD metró járműtelep,porta",47.469651,19.12909,4716,,,,2.0,POINT (5007694.861 2748647.499)
4,4948,Metró ÉD járműtelep (kapu),47.465239,19.142612,4948,,,,,POINT (5008764.678 2748285.946)


Most a Nyugatihoz viszonyítva fogunk közeli megállókat keresni

In [385]:
stop_id = "F01029" # Nyugati
gdf_nyugati = gdf_stops [ gdf_stops["stop_id"] == stop_id ]
gdf_megallo = gdf_nyugati.to_crs(epsg=3035)
gdf_megallo

Unnamed: 0,stop_id,stop_name,stop_lat,stop_lon,stop_code,location_type,location_sub_type,parent_station,wheelchair_boarding,geometry
2579,F01029,Nyugati pályaudvar M,47.509806,19.057151,F01029,,,,1.0,POINT (5001786.02 2752416.904)


In [386]:
gdf_nearest_stops = gdf_megallo.sjoin_nearest(gdf_stops,distance_col="distance", how="right", exclusive = True, max_distance = 500)
gdf_nearest_stops.columns
#gdf_nearest_stops[["stop_id_left","stop_name_left","stop_lat_left","stop_lon_left","stop_id_right","stop_name_right","stop_lat_right","stop_lon_right","distance"]]


Index(['index_left', 'stop_id_left', 'stop_name_left', 'stop_lat_left',
       'stop_lon_left', 'stop_code_left', 'location_type_left',
       'location_sub_type_left', 'parent_station_left',
       'wheelchair_boarding_left', 'stop_id_right', 'stop_name_right',
       'stop_lat_right', 'stop_lon_right', 'stop_code_right',
       'location_type_right', 'location_sub_type_right',
       'parent_station_right', 'wheelchair_boarding_right', 'geometry',
       'distance'],
      dtype='object')

In [387]:
gdf_nearest_stops.nsmallest(40,'distance')[["stop_id_left","stop_name_left","stop_lat_left","stop_lon_left","stop_id_right","stop_name_right","stop_lat_right","stop_lon_right","distance"]]

Unnamed: 0,stop_id_left,stop_name_left,stop_lat_left,stop_lon_left,stop_id_right,stop_name_right,stop_lat_right,stop_lon_right,distance
2503,F01029,Nyugati pályaudvar M,47.509806,19.057151,F00935,Nyugati pályaudvar M,47.509922,19.056658,39.253332
6099,F01029,Nyugati pályaudvar M,47.509806,19.057151,LM3NYI,Nyugati pályaudvar [I],47.50987,19.056404,56.669957
2502,F01029,Nyugati pályaudvar M,47.509806,19.057151,F00934,Nyugati pályaudvar M,47.509896,19.056394,57.835671
6095,F01029,Nyugati pályaudvar M,47.509806,19.057151,LM3NYE,Nyugati pályaudvar [E],47.510447,19.056426,89.626772
6098,F01029,Nyugati pályaudvar M,47.509806,19.057151,LM3NYH,Nyugati pályaudvar [H],47.510233,19.056103,91.944253
2501,F01029,Nyugati pályaudvar M,47.509806,19.057151,F00933,Nyugati pályaudvar M,47.5103,19.056167,92.071854
6097,F01029,Nyugati pályaudvar M,47.509806,19.057151,LM3NYG,Nyugati pályaudvar [G],47.510326,19.056196,92.107868
6090,F01029,Nyugati pályaudvar M,47.509806,19.057151,LM3NY3,Nyugati pályaudvar (lift » M3),47.51004,19.055784,106.075445
129,F01029,Nyugati pályaudvar M,47.509806,19.057151,008137,Nyugati pályaudvar,47.510571,19.056072,117.41862
6096,F01029,Nyugati pályaudvar M,47.509806,19.057151,LM3NYF,Nyugati pályaudvar [F],47.510703,19.056257,120.137919
