In [1]:
!pip install pyrosm



## GeoData usage

In [2]:
VERBOSE = False

In [65]:
import geopandas as gpd
import pandas as pd
import pyrosm

from tqdm.notebook import tqdm

tqdm.pandas()

In [None]:
# Initialize the OSM object 
osm = pyrosm.OSM('datasets/pbf/odessa_oblast-latest.osm.pbf')

# See the type
print("Type of 'osm' instance: ", type(osm))

Type of 'osm' instance:  <class 'pyrosm.pyrosm.OSM'>


In [None]:
nodes_net, drive_net = osm.get_network(nodes=True, network_type="driving")
print(nodes_net.shape)
print(drive_net.shape)

(515386, 8)
(552051, 38)


In [None]:
if VERBOSE:
    drive_net.plot()

In [None]:
if VERBOSE:
    nodes_net.plot()

In [78]:
def get_geolocation_df(path_to_df):
    nodes_df = pd.read_csv(path_to_df)
    nodes_df = nodes_df.merge(nodes_net, left_on=['node_start'], right_on=['id'], how='inner')
    nodes_df = nodes_df.merge(nodes_net, left_on=['node_finish'], right_on=['id'], how='inner', suffixes=('_start' ,'_end'))
    geolocation_df = nodes_df.drop(columns=['timestamp_start', 'timestamp_end', 'version_start', 'version_end', 'changeset_start', 'changeset_end', 'id_start', 'id_end', 'geometry_end', 'geometry_start'])
    
    geolocation_df = pd.concat([geolocation_df, geolocation_df['tags_start'].progress_apply(pd.Series)], axis=1)
    geolocation_df = pd.concat([geolocation_df, geolocation_df['tags_end'].progress_apply(pd.Series)], axis=1)
    
    return geolocation_df

In [77]:
geolocation_train_df = get_geolocation_df("datasets/csv/nodes_train.csv")
geolocation_train_df.to_csv('datasets/csv/geolocation_train.csv', index=False)

  0%|          | 0/398688 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [23]:
geolocation_train_df.isna().sum() 

Id                  0
node_start          0
node_finish         0
distance            0
speed            3658
lon_start           0
lat_start           0
tags_start     321021
lon_end             0
lat_end             0
tags_end       320925
dtype: int64

In [24]:
geolocation_train_df

Unnamed: 0,Id,node_start,node_finish,distance,speed,lon_start,lat_start,tags_start,lon_end,lat_end,tags_end
0,-2627062893189810184,10980432,2133368107,17.414917,32.0,30.739248,46.472665,{'highway': 'traffic_signals'},30.739022,46.472682,{'highway': 'crossing'}
1,-6374252502568484586,10980432,2133368107,17.414917,25.0,30.739248,46.472665,{'highway': 'traffic_signals'},30.739022,46.472682,{'highway': 'crossing'}
2,-8332603950975862627,10980432,2133368107,17.414917,30.0,30.739248,46.472665,{'highway': 'traffic_signals'},30.739022,46.472682,{'highway': 'crossing'}
3,6728710084682273587,10980432,2133368107,17.414917,27.0,30.739248,46.472665,{'highway': 'traffic_signals'},30.739022,46.472682,{'highway': 'crossing'}
4,8390209597485549084,10980432,2133368107,17.415147,38.0,30.739248,46.472665,{'highway': 'traffic_signals'},30.739022,46.472682,{'highway': 'crossing'}
...,...,...,...,...,...,...,...,...,...,...,...
398683,-2224898000409909488,7121991801,2395852314,7.103973,,30.660740,46.365771,"{'crossing': 'marked', 'highway': 'crossing'}",30.660648,46.365764,
398684,-2224898000409909488,7121991819,490942781,49.888677,30.0,30.654700,46.365367,"{'crossing': 'marked', 'highway': 'crossing'}",30.654052,46.365324,
398685,-2224898000409909488,7121991853,5224951056,7.375840,,30.649665,46.365012,"{'crossing': 'marked', 'highway': 'crossing'}",30.649569,46.365009,"{'bus': 'yes', 'name': 'Селище Таїрове', 'name..."
398686,-2224898000409909488,7121991854,418739615,1.206030,,30.648214,46.364845,"{'crossing': 'marked', 'highway': 'crossing'}",30.648125,46.364790,


In [79]:
geolocation_test_df = get_geolocation_df("datasets/csv/nodes_test.csv")
geolocation_test_df.to_csv('datasets/csv/geolocation_test.csv', index=False)

  0%|          | 0/81416 [00:00<?, ?it/s]

  0%|          | 0/81416 [00:00<?, ?it/s]

In [80]:
geolocation_test_df.isna().sum() 

Id                    0
node_start            0
node_finish           0
distance              0
speed                 0
                  ...  
crossing:light    81415
supervised        81415
locked            81412
lift_gate:type    81414
vehicle           81415
Length: 127, dtype: int64

In [81]:
geolocation_test_df

Unnamed: 0,Id,node_start,node_finish,distance,speed,lon_start,lat_start,tags_start,lon_end,lat_end,...,foot,traffic_calming,bollard,entrance,crossing:bell,crossing:light,supervised,locked,lift_gate:type,vehicle
0,6198,2059503754,4548172320,95.273001,33.0,30.724095,46.431127,,30.724951,46.431748,...,,,,,,,,,,
1,87982,2059503754,4548172320,43.679520,32.0,30.724095,46.431127,,30.724951,46.431748,...,,,,,,,,,,
2,203684,2059503754,4548172320,95.273001,23.0,30.724095,46.431127,,30.724951,46.431748,...,,,,,,,,,,
3,503657,2059503754,4548172320,95.273001,35.0,30.724095,46.431127,,30.724951,46.431748,...,,,,,,,,,,
4,192389,290801002,4548172320,24.765464,22.0,30.725185,46.431933,{'highway': 'traffic_signals'},30.724951,46.431748,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
81411,523327,3127872048,4773233957,38.709740,31.0,30.719983,46.426385,,30.720316,46.426646,...,,,,,,,,,,
81412,525706,2034419504,2034419510,82.856410,15.0,30.723165,46.414473,,30.722073,46.415021,...,,,,,,,,,,
81413,524664,2034419499,2034419508,0.367236,21.0,30.721439,46.414164,,30.721980,46.415003,...,,,,,,,,,,
81414,525706,4775841528,4775841529,48.985825,14.0,30.710972,46.429601,,30.710445,46.428981,...,,,,,,,,,,
