In [1]:
from scipy.spatial import KDTree
from geopy.distance import geodesic
import pandas as pd

In [2]:
gh_df = (
    pd.read_csv("datasets/15_gh_info/geohash_traffic_density_pt_15clusters.csv", usecols=['GEOHASH', 'LATITUDE', 'LONGITUDE'])
)

# Reading Istanbul_landmarks dataset and converting to dictionary
# keys are landmark names and values are its coordinates
site_dict = (
    pd.read_csv("datasets/12_tr_ist_tourist_sites/ist_sites.csv")
    .drop('site_address', axis=1)
    .set_index('site_name')
    .T
    .to_dict()
)

site_dict

{'5 Temmuz Şehitler Köprüsü': {'latitude': 41.039928, 'longitude': 29.0402745},
 '500. Yıl Vakfı Türk Musevileri Müzesi': {'latitude': 41.0267866,
  'longitude': 28.9727283},
 'Atik Valide Mosque': {'latitude': 41.0197939,
  'longitude': 29.02915605209612},
 'Basilica Cistern': {'latitude': 41.00840235, 'longitude': 28.97792248031497},
 'Beylerbeyi Palace': {'latitude': 41.04268765,
  'longitude': 29.04000176704749},
 'Blue Mosque': {'latitude': 41.0054279, 'longitude': 28.97664968333884},
 'Deniz Müzesi': {'latitude': 41.0410591, 'longitude': 29.005691266268045},
 'Dolmabahçe Palace': {'latitude': 41.03896085,
  'longitude': 28.99973803473693},
 'Egyptian Spice Bazaar': {'latitude': 41.01641785,
  'longitude': 28.970530846069572},
 'Eyüp Sultan Mosque': {'latitude': 41.04801125,
  'longitude': 28.93374562334459},
 'Fatih Sultan Mehmet Köprüsü': {'latitude': 41.0913149,
  'longitude': 29.06197573589518},
 'Galata Bridge': {'latitude': 41.0182562, 'longitude': 28.9717712},
 'Galata Mevl

In [3]:
# Unique list of coordinates
coordinates = gh_df[['LATITUDE', 'LONGITUDE']].drop_duplicates().values
coordinates

array([[41.080627, 28.811646],
       [40.987244, 29.108276],
       [41.003723, 29.09729 ],
       ...,
       [40.976257, 29.229126],
       [41.009216, 27.98767 ],
       [41.020203, 27.998657]])

In [4]:
def find_nearest_coordinates(
        target_lat: float,
        target_lon: float,
        coordinates: list[list[float, float]]
        ) -> list[tuple[tuple[float, float], float]]:
    """
    Finds the nearest coordinates to a target latitude and longitude.

    Args:
        target_lat (float): The latitude of the target location.
        target_lon (float): The longitude of the target location.
        coordinates (List[Tuple[float, float]]): A list of tuples representing the coordinates to search from.

    Returns:
        List[Tuple[Tuple[float, float], float]]: A list of tuples containing the nearest coordinates and their distances in kilometers.
    """
    target_coords = (target_lat, target_lon)
    
    # Build a KDTree for efficient nearest neighbor search
    tree = KDTree(coordinates)

    # Calculate the search radius in degrees
    radius_deg = 1 / 111.0  # 1 km radius in degrees

    # Query the KDTree for indices of points within the search radius
    indices = tree.query_ball_point([(target_lat, target_lon)], radius_deg)

    # Calculate the distance between target coordinates and nearest coordinates
    nearest_coordinates = [(coordinates[i], geodesic(target_coords, coordinates[i]).km) for i in indices[0]]

    # Sort the nearest coordinates by distance
    nearest_coordinates.sort(key=lambda x: x[1])  # Sort by distance

    return nearest_coordinates

In [5]:
%%time

hist_list = []
new_list = []

for site in site_dict:

    target_lat = site_dict[site]['latitude']
    target_lon = site_dict[site]['longitude']

    nearest_coords = find_nearest_coordinates(target_lat, target_lon, coordinates)

    print(f"Nearest Coordinates to {site}")
    print("---------------------------------------------------------")

    for i, (coords, distance) in enumerate(nearest_coords):

        gh = gh_df.loc[gh_df['LATITUDE'].eq(coords[0]) & gh_df['LONGITUDE'].eq(coords[1]), 'GEOHASH'].values[0]
        print(f"Nearest Coordinate {i+1}: {coords[0]}, {coords[1]}, Distance={distance:.2f} km, GEOHASH: {gh}")

        # Creating two dataframe which will later be merged rows wise for visualization purpose.
        hist_list.append({
            "type" : "landmark",
            "name" : site,
            "latitude" : target_lat,
            "longitude" : target_lon,
            "nearest" : site
        })

        new_list.append({
            "type" : "geohash",
            "name" : gh,
            "latitude" : coords[0],
            "longitude" : coords[1],
            "nearest" : site
        })

    print()

Nearest Coordinates to 5 Temmuz Şehitler Köprüsü
---------------------------------------------------------
Nearest Coordinate 1: 41.042175, 29.042358, Distance=0.30 km, GEOHASH: sxk9kz
Nearest Coordinate 2: 41.036682, 29.042358, Distance=0.40 km, GEOHASH: sxk9ky
Nearest Coordinate 3: 41.03119, 29.042358, Distance=0.99 km, GEOHASH: sxk9kv

Nearest Coordinates to 500. Yıl Vakfı Türk Musevileri Müzesi
---------------------------------------------------------
Nearest Coordinate 1: 41.025696, 28.97644, Distance=0.33 km, GEOHASH: sxk97k
Nearest Coordinate 2: 41.03119, 28.97644, Distance=0.58 km, GEOHASH: sxk97m
Nearest Coordinate 3: 41.025696, 28.965454, Distance=0.62 km, GEOHASH: sxk97h
Nearest Coordinate 4: 41.03119, 28.965454, Distance=0.78 km, GEOHASH: sxk97j
Nearest Coordinate 5: 41.020203, 28.97644, Distance=0.79 km, GEOHASH: sxk977

Nearest Coordinates to Atik Valide Mosque
---------------------------------------------------------
Nearest Coordinate 1: 41.020203, 29.031372, Distance=0

In [6]:
df = (
    pd.concat([pd.DataFrame(new_list), pd.DataFrame(hist_list)], ignore_index=True)
    .drop_duplicates()
)

In [7]:
df

Unnamed: 0,type,name,latitude,longitude,nearest
0,geohash,sxk9kz,41.042175,29.042358,5 Temmuz Şehitler Köprüsü
1,geohash,sxk9ky,41.036682,29.042358,5 Temmuz Şehitler Köprüsü
2,geohash,sxk9kv,41.031190,29.042358,5 Temmuz Şehitler Köprüsü
3,geohash,sxk97k,41.025696,28.976440,500. Yıl Vakfı Türk Musevileri Müzesi
4,geohash,sxk97m,41.031190,28.976440,500. Yıl Vakfı Türk Musevileri Müzesi
...,...,...,...,...,...
262,landmark,İBB 15 Temmuz Şehitler Ormanı ve Anıtı,41.207560,29.095861,İBB 15 Temmuz Şehitler Ormanı ve Anıtı
264,landmark,İstanbul Arkeoloji Müzeleri,41.011699,28.981323,İstanbul Arkeoloji Müzeleri
267,landmark,İstanbul Atatürk Havalimanı,40.977497,28.825494,İstanbul Atatürk Havalimanı
269,landmark,İstanbul Modern Museum,41.025911,28.982812,İstanbul Modern Museum


In [8]:
# some GEOHASH has around 8 landmarks within its 1km radius
(
    df.query("type=='geohash'")
    .groupby('name')
    .size()
    .nlargest(3)
)

name
sxk976    8
sxk97k    8
sxk977    7
dtype: int64

In [9]:
df.query("name=='sxk976'")

Unnamed: 0,type,name,latitude,longitude,nearest
13,geohash,sxk976,41.01471,28.97644,Basilica Cistern
25,geohash,sxk976,41.01471,28.97644,Egyptian Spice Bazaar
35,geohash,sxk976,41.01471,28.97644,Galata Bridge
46,geohash,sxk976,41.01471,28.97644,Gülhane Parkı
52,geohash,sxk976,41.01471,28.97644,Hagia Sophia
79,geohash,sxk976,41.01471,28.97644,Museum of Turkish & Islamic Arts
106,geohash,sxk976,41.01471,28.97644,Topkapı Palace
125,geohash,sxk976,41.01471,28.97644,İstanbul Arkeoloji Müzeleri


In [10]:
df

Unnamed: 0,type,name,latitude,longitude,nearest
0,geohash,sxk9kz,41.042175,29.042358,5 Temmuz Şehitler Köprüsü
1,geohash,sxk9ky,41.036682,29.042358,5 Temmuz Şehitler Köprüsü
2,geohash,sxk9kv,41.031190,29.042358,5 Temmuz Şehitler Köprüsü
3,geohash,sxk97k,41.025696,28.976440,500. Yıl Vakfı Türk Musevileri Müzesi
4,geohash,sxk97m,41.031190,28.976440,500. Yıl Vakfı Türk Musevileri Müzesi
...,...,...,...,...,...
262,landmark,İBB 15 Temmuz Şehitler Ormanı ve Anıtı,41.207560,29.095861,İBB 15 Temmuz Şehitler Ormanı ve Anıtı
264,landmark,İstanbul Arkeoloji Müzeleri,41.011699,28.981323,İstanbul Arkeoloji Müzeleri
267,landmark,İstanbul Atatürk Havalimanı,40.977497,28.825494,İstanbul Atatürk Havalimanı
269,landmark,İstanbul Modern Museum,41.025911,28.982812,İstanbul Modern Museum


In [11]:
df.to_csv("datasets/13_gh_proximities/gh_nearby_landmarks.csv", index=False)