In [5]:
from scipy.spatial import KDTree
from geopy.distance import geodesic
import pandas as pd
import requests
import json

### DATA SOURCE: https://data.ibb.gov.tr/en/dataset/all-isbike-stations-web-service

In [6]:
isbike_url = "https://api.ibb.gov.tr/ispark-bike/GetAllStationStatus"

r = requests.get(isbike_url)
print(r)

<Response [200]>


In [23]:
isbike_stn = (
    pd.DataFrame(json.loads(r.text)['dataList'])[['adi', 'lat', 'lon']]
    .assign(lat=lambda x: x['lat'].astype('float64'),
            lon=lambda x: x['lon'].astype('float64'))
    .rename(columns={'adi' : 'bike_stnd_name', 'lat' : 'latitude', 'lon' : 'longitude'})
)

isbike_stn

Unnamed: 0,bike_stnd_name,latitude,longitude
0,Bostancı İDO - 2,40.952389,29.090669
1,Caddebostan Migros,40.963240,29.066482
2,Bostancı İDO - 1,40.952973,29.090108
3,Maltepe Kültür Merkezi-1,40.946598,29.101653
4,Süreyya Paşa Plajı,40.924904,29.123932
...,...,...,...
187,Sarıyer Balıkçılar Çarşısı,41.159263,29.042270
188,Maltepe İETT Durağı - 2,40.917346,29.130391
189,İPA,40.974846,28.785638
190,Maltepe Dolgu Alanı Yalı Boyu,40.924996,29.117321


In [4]:
gh_df = (
    pd.read_csv("datasets/15_gh_info/geohash_traffic_density_pt_15clusters.csv", usecols=['GEOHASH', 'LATITUDE', 'LONGITUDE'])
)

gh_df.head()

Unnamed: 0,GEOHASH,LATITUDE,LONGITUDE
0,sxk3xw,41.080627,28.811646
1,sxk9nm,40.987244,29.108276
2,sxk9q0,41.003723,29.09729
3,sxk3hx,40.99823,28.67981
4,sx7cmx,41.042175,28.02063


In [25]:
site_dict = (
    isbike_stn.set_index('bike_stnd_name')
    .T.to_dict()
)

In [26]:
# Unique list of coordinates
coordinates = gh_df[['LATITUDE', 'LONGITUDE']].drop_duplicates().values
coordinates

array([[41.080627, 28.811646],
       [40.987244, 29.108276],
       [41.003723, 29.09729 ],
       ...,
       [40.976257, 29.229126],
       [41.009216, 27.98767 ],
       [41.020203, 27.998657]])

In [27]:
def find_nearest_coordinates(
        target_lat: float,
        target_lon: float,
        coordinates: list[list[float, float]]
        ) -> list[tuple[tuple[float, float], float]]:
    """
    Finds the nearest coordinates to a target latitude and longitude.

    Args:
        target_lat (float): The latitude of the target location.
        target_lon (float): The longitude of the target location.
        coordinates (List[Tuple[float, float]]): A list of tuples representing the coordinates to search from.

    Returns:
        List[Tuple[Tuple[float, float], float]]: A list of tuples containing the nearest coordinates and their distances in kilometers.
    """
    target_coords = (target_lat, target_lon)
    
    # Build a KDTree for efficient nearest neighbor search
    tree = KDTree(coordinates)

    # Calculate the search radius in degrees
    radius_deg = 1 / 111.0  # 1 km radius in degrees

    # Query the KDTree for indices of points within the search radius
    indices = tree.query_ball_point([(target_lat, target_lon)], radius_deg)

    # Calculate the distance between target coordinates and nearest coordinates
    nearest_coordinates = [(coordinates[i], geodesic(target_coords, coordinates[i]).km) for i in indices[0]]

    # Sort the nearest coordinates by distance
    nearest_coordinates.sort(key=lambda x: x[1])  # Sort by distance

    return nearest_coordinates

In [28]:
isbike_list = []
new_list = []

for site in site_dict:

    target_lat = site_dict[site]['latitude']
    target_lon = site_dict[site]['longitude']

    nearest_coords = find_nearest_coordinates(target_lat, target_lon, coordinates)

    for i, (coords, distance) in enumerate(nearest_coords):

        gh = gh_df.loc[gh_df['LATITUDE'].eq(coords[0]) & gh_df['LONGITUDE'].eq(coords[1]), 'GEOHASH'].values[0]

        # Creating two dataframe which will later be merged rows wise for visualization purpose.
        isbike_list.append({
            "type" : "isbike_stn",
            "name" : site,
            "latitude" : target_lat,
            "longitude" : target_lon,
            "nearest" : site
        })

        new_list.append({
            "type" : "geohash",
            "name" : gh,
            "latitude" : coords[0],
            "longitude" : coords[1],
            "nearest" : site
        })

In [30]:
df = (
    pd.concat([pd.DataFrame(new_list), pd.DataFrame(isbike_list)], ignore_index=True)
    .drop_duplicates(ignore_index=True)
)

df

Unnamed: 0,type,name,latitude,longitude,nearest
0,geohash,sxk8vz,40.954285,29.086304,Bostancı İDO - 2
1,geohash,sxk8yp,40.954285,29.097290,Bostancı İDO - 2
2,geohash,sxk8yn,40.948790,29.097290,Bostancı İDO - 2
3,geohash,sxk9jb,40.959778,29.086304,Bostancı İDO - 2
4,geohash,sxk9j3,40.965270,29.064331,Caddebostan Migros
...,...,...,...,...,...
627,isbike_stn,Futbol Sahası - 2,40.940609,29.108244,Futbol Sahası - 2
628,isbike_stn,Sarıyer Balıkçılar Çarşısı,41.159263,29.042270,Sarıyer Balıkçılar Çarşısı
629,isbike_stn,Maltepe İETT Durağı - 2,40.917346,29.130391,Maltepe İETT Durağı - 2
630,isbike_stn,Maltepe Dolgu Alanı Yalı Boyu,40.924996,29.117321,Maltepe Dolgu Alanı Yalı Boyu


In [31]:
# Some GEOHASH has up to 15 ISBIKE stands within its 1km radius
(
    df.query("type=='geohash'")
    .groupby('name')['nearest'].nunique()
    .nlargest(10)
)

name
sxk8yd    15
sxk8ye    11
sxk8yc    10
sxk8yb     9
sxk8yf     8
sxkb92     8
sxk9hm     7
sxkb93     7
sxk8ys     6
sxkb91     6
Name: nearest, dtype: int64

In [32]:
df.to_csv("datasets/13_gh_proximities/gh_nearby_isbike_stnds.csv", index=False)