In [4]:
import pandas as pd
import overpy

In [11]:
df = pd.read_csv("../bicikelj_postaje.csv")

In [12]:
df.head()

Unnamed: 0,number,contract_name,name,address,banking,bonus,bike_stands,available_bike_stands,available_bikes,status,last_update,position_lat,position_lon
0,59,ljubljana,LIDL BEŽIGRAD,Bežigrad 11,False,False,20,19,1,OPEN,1702663056000,46.063797,14.506854
1,54,ljubljana,ŠMARTINSKI PARK,Smartinska cesta 58,False,False,20,7,13,OPEN,1702663132000,46.065206,14.529911
2,45,ljubljana,SAVSKO NASELJE 1-ŠMARTINSKA CESTA,Savska cesta 1,False,False,20,20,0,OPEN,1702663157000,46.062475,14.524321
3,68,ljubljana,ČRNUČE,Dunajska cesta,False,False,20,17,3,OPEN,1702662921000,46.102446,14.530213
4,11,ljubljana,VILHARJEVA CESTA,Vilharjeva cesta 23,False,False,20,2,18,OPEN,1702663070000,46.06005,14.51302


In [13]:
def count_categories(tags, categories):

    counter = 0
    for tag in tags:
        if 'amenity' in tag:
            if tag['amenity'] in categories:
                counter += 1
    return counter


In [14]:
def num_bus_stations(lat, lon, other_categories=False):
    """
    Find number of bus stations in radius 500 meters.
    """
    api = overpy.Overpass()
    query = f"""
            (
            node(around:500,{lat},{lon});
            );out;
            """.format(lat=...,lon=...)    
    result = api.query(query)
    n_nodes = len(result.nodes)
    # print(n_nodes)

    counter = 0
    tags = []
    for node in result.nodes:
        if node.tags:
            counter += 1
            tags.append(node.tags)

    lpp = []
    for t in tags:
        if 'public_transport' in t:
            if t['public_transport'] == 'platform':
                    lpp.append(t)

    cafes = ['cafe', 'bar', 'pub']
    school = ['university', 'library', 'school'] 
    restaurants = ['restaurant', 'fast_food', 'research_institute', 'student_accomodation', 'food_court']
    health = ['hospital', 'dentist', 'pharmacy', 'doctors']
    culture = ['culture_centre', 'theatre', 'cinema']

    count_cafes = count_categories(tags, cafes)
    count_school = count_categories(tags, school)
    count_restaurants = count_categories(tags, restaurants)
    count_health = count_categories(tags, health)
    count_culture = count_categories(tags, culture)

    if other_categories:
         return count_cafes, count_school, count_restaurants, count_health, count_culture

    return len(lpp)

In [15]:
# test za FMF
lat = 46.042372
lon = 14.490436
cnt = num_bus_stations(lat, lon)
print(cnt)

# bavarski dvor
lat = 46.104820
lon = 14.442480
lat, lon = 46.05682,14.50551
cnt = num_bus_stations(lat, lon)
print(cnt)

6
15


In [16]:
res = num_bus_stations(lat, lon, other_categories=True)

In [17]:
res

(33, 0, 38, 2, 2)

In [18]:
df['num_bus_stations'] = df.apply(lambda row: num_bus_stations(row['position_lat'], row['position_lon']), axis=1)
df['num_cafes'] = df.apply(lambda row : num_bus_stations(row['position_lat'], row['position_lon'], other_categories=True)[0], axis=1)
df['num_school_stuff'] = df.apply(lambda row : num_bus_stations(row['position_lat'], row['position_lon'], other_categories=True)[1], axis=1)
df['num_restaurants'] = df.apply(lambda row : num_bus_stations(row['position_lat'], row['position_lon'], other_categories=True)[2], axis=1)
df['num_health_stuff'] = df.apply(lambda row : num_bus_stations(row['position_lat'], row['position_lon'], other_categories=True)[3], axis=1)
df['num_culture_stuff'] = df.apply(lambda row : num_bus_stations(row['position_lat'], row['position_lon'], other_categories=True)[4], axis=1)

In [15]:
df.head()

Unnamed: 0,number,contract_name,name,address,banking,bonus,bike_stands,available_bike_stands,available_bikes,status,last_update,position_lat,position_lon,num_bus_stations
0,59,ljubljana,LIDL BEŽIGRAD,Bežigrad 11,False,False,20,19,1,OPEN,1702663056000,46.063797,14.506854,13
1,54,ljubljana,ŠMARTINSKI PARK,Smartinska cesta 58,False,False,20,7,13,OPEN,1702663132000,46.065206,14.529911,16
2,45,ljubljana,SAVSKO NASELJE 1-ŠMARTINSKA CESTA,Savska cesta 1,False,False,20,20,0,OPEN,1702663157000,46.062475,14.524321,11
3,68,ljubljana,ČRNUČE,Dunajska cesta,False,False,20,17,3,OPEN,1702662921000,46.102446,14.530213,7
4,11,ljubljana,VILHARJEVA CESTA,Vilharjeva cesta 23,False,False,20,2,18,OPEN,1702663070000,46.06005,14.51302,13


In [26]:
df.to_csv("bicikelj_postaje.csv")
df.rename(columns={'name': 'station'}, inplace=True)
df.head()

Unnamed: 0,number,contract_name,station,address,banking,bonus,bike_stands,available_bike_stands,available_bikes,status,last_update,position_lat,position_lon,num_bus_stations
0,59,ljubljana,LIDL BEŽIGRAD,Bežigrad 11,False,False,20,19,1,OPEN,1702663056000,46.063797,14.506854,13
1,54,ljubljana,ŠMARTINSKI PARK,Smartinska cesta 58,False,False,20,7,13,OPEN,1702663132000,46.065206,14.529911,16
2,45,ljubljana,SAVSKO NASELJE 1-ŠMARTINSKA CESTA,Savska cesta 1,False,False,20,20,0,OPEN,1702663157000,46.062475,14.524321,11
3,68,ljubljana,ČRNUČE,Dunajska cesta,False,False,20,17,3,OPEN,1702662921000,46.102446,14.530213,7
4,11,ljubljana,VILHARJEVA CESTA,Vilharjeva cesta 23,False,False,20,2,18,OPEN,1702663070000,46.06005,14.51302,13


In [21]:
preprocessed_data = pd.read_csv("../data/data/bicikelj_preprocessed.csv")

In [22]:
preprocessed_data.head()

Unnamed: 0.1,Unnamed: 0,timestamp,station,target,time_of_day_sin,time_of_day_cos,temperature,precipitation,snow_depth,cloud_cover,...,bonus,bike_stands,available_bike_stands,available_bikes,status,last_update,position_lat,position_lon,distance_to_center,is_weekend
0,0,2022-08-02 13:35:00,ALEJA - CELOVŠKA CESTA,1,-0.402747,-0.915311,14.9,0.0,0.0,31.0,...,False,20,9,11,OPEN,1702663252000,46.077302,14.482581,3.39777,False
1,1,2022-08-02 13:44:00,ALEJA - CELOVŠKA CESTA,0,-0.438371,-0.898794,14.9,0.0,0.0,31.0,...,False,20,9,11,OPEN,1702663252000,46.077302,14.482581,3.39777,False
2,2,2022-08-02 13:55:00,ALEJA - CELOVŠKA CESTA,0,-0.480989,-0.876727,14.9,0.0,0.0,31.0,...,False,20,9,11,OPEN,1702663252000,46.077302,14.482581,3.39777,False
3,3,2022-08-02 14:05:00,ALEJA - CELOVŠKA CESTA,1,-0.518773,-0.854912,14.9,0.0,0.0,31.0,...,False,20,9,11,OPEN,1702663252000,46.077302,14.482581,3.39777,False
4,4,2022-08-02 14:15:00,ALEJA - CELOVŠKA CESTA,2,-0.55557,-0.83147,14.9,0.0,0.0,31.0,...,False,20,9,11,OPEN,1702663252000,46.077302,14.482581,3.39777,False


In [29]:
new_preprocessed_data = pd.merge(preprocessed_data, df, on='station', how='left')

In [30]:
new_preprocessed_data.head()

Unnamed: 0.1,Unnamed: 0,timestamp,station,target,time_of_day_sin,time_of_day_cos,temperature,precipitation,snow_depth,cloud_cover,...,banking_y,bonus_y,bike_stands_y,available_bike_stands_y,available_bikes_y,status_y,last_update_y,position_lat_y,position_lon_y,num_bus_stations
0,0,2022-08-02 13:35:00,ALEJA - CELOVŠKA CESTA,1,-0.402747,-0.915311,14.9,0.0,0.0,31.0,...,False,False,20,9,11,OPEN,1702663252000,46.077302,14.482581,10
1,1,2022-08-02 13:44:00,ALEJA - CELOVŠKA CESTA,0,-0.438371,-0.898794,14.9,0.0,0.0,31.0,...,False,False,20,9,11,OPEN,1702663252000,46.077302,14.482581,10
2,2,2022-08-02 13:55:00,ALEJA - CELOVŠKA CESTA,0,-0.480989,-0.876727,14.9,0.0,0.0,31.0,...,False,False,20,9,11,OPEN,1702663252000,46.077302,14.482581,10
3,3,2022-08-02 14:05:00,ALEJA - CELOVŠKA CESTA,1,-0.518773,-0.854912,14.9,0.0,0.0,31.0,...,False,False,20,9,11,OPEN,1702663252000,46.077302,14.482581,10
4,4,2022-08-02 14:15:00,ALEJA - CELOVŠKA CESTA,2,-0.55557,-0.83147,14.9,0.0,0.0,31.0,...,False,False,20,9,11,OPEN,1702663252000,46.077302,14.482581,10


In [31]:
new_preprocessed_data.to_csv("../data/data/bicikelj_preprocessed_new.csv")