In [1]:
from scipy.spatial import KDTree
from geopy.distance import geodesic
import pandas as pd

In [2]:
gh_df = (
    pd.read_csv("geohash_traffic_density_pt_15clusters.csv", usecols=['GEOHASH', 'LATITUDE', 'LONGITUDE'])
)

# Reading Football match dataset and converting to list
# Each key is a stadium and the values are its coordinates
site_dict = (
    pd.read_csv("datasets/05_tr_football_matches/tr_ist_2019_2023_football_matches.csv", usecols=['stadium', 'stad_lat', 'stad_long'])
    .drop_duplicates(ignore_index=True)
    .set_index('stadium')
    .T
    .to_dict()
)

In [3]:
# Unique list of coordinates
coordinates = gh_df[['LATITUDE', 'LONGITUDE']].drop_duplicates().values
coordinates

array([[41.080627, 28.811646],
       [40.987244, 29.108276],
       [41.003723, 29.09729 ],
       ...,
       [40.976257, 29.229126],
       [41.009216, 27.98767 ],
       [41.020203, 27.998657]])

In [4]:
def find_nearest_coordinates(
        target_lat: float,
        target_lon: float,
        coordinates: list[list[float, float]]
        ) -> list[tuple[tuple[float, float], float]]:
    """
    Finds the nearest coordinates to a target latitude and longitude.

    Args:
        target_lat (float): The latitude of the target location.
        target_lon (float): The longitude of the target location.
        coordinates (List[Tuple[float, float]]): A list of tuples representing the coordinates to search from.

    Returns:
        List[Tuple[Tuple[float, float], float]]: A list of tuples containing the nearest coordinates and their distances in kilometers.
    """
    target_coords = (target_lat, target_lon)
    
    # Build a KDTree for efficient nearest neighbor search
    tree = KDTree(coordinates)

    # Calculate the search radius in degrees
    radius_deg = 1 / 111.0  # 1 km radius in degrees

    # Query the KDTree for indices of points within the search radius
    indices = tree.query_ball_point([(target_lat, target_lon)], radius_deg)

    # Calculate the distance between target coordinates and nearest coordinates
    nearest_coordinates = [(coordinates[i], geodesic(target_coords, coordinates[i]).km) for i in indices[0]]

    # Sort the nearest coordinates by distance
    nearest_coordinates.sort(key=lambda x: x[1])  # Sort by distance

    return nearest_coordinates

In [5]:
stad_list = []
new_list = []

for site in site_dict:

    target_lat = site_dict[site]['stad_lat']
    target_lon = site_dict[site]['stad_long']

    nearest_coords = find_nearest_coordinates(target_lat, target_lon, coordinates)

    print(f"Nearest Coordinates to {site}")
    print("---------------------------------------------------------")

    for i, (coords, distance) in enumerate(nearest_coords):

        gh = gh_df.loc[gh_df['LATITUDE'].eq(coords[0]) & gh_df['LONGITUDE'].eq(coords[1]), 'GEOHASH'].values[0]
        print(f"Nearest Coordinate {i+1}: {coords[0]}, {coords[1]}, Distance={distance:.2f} km, GEOHASH: {gh}")

        # Creating two dataframe which will later be merged rows wise for visualization purpose.
        stad_list.append({
            "type" : "stadium",
            "name" : site,
            "latitude" : target_lat,
            "longitude" : target_lon,
            "nearest" : site
        })

        new_list.append({
            "type" : "geohash",
            "name" : gh,
            "latitude" : coords[0],
            "longitude" : coords[1],
            "nearest" : site
        })

    print()

Nearest Coordinates to BAŞAKŞEHİR FATİH TERİM
---------------------------------------------------------
Nearest Coordinate 1: 41.124573, 28.811646, Distance=0.27 km, GEOHASH: sxk3zw
Nearest Coordinate 2: 41.11908, 28.811646, Distance=0.46 km, GEOHASH: sxk3zt
Nearest Coordinate 3: 41.130066, 28.811646, Distance=0.83 km, GEOHASH: sxk3zx

Nearest Coordinates to RECEP TAYYİP ERDOĞAN STADYUMU
---------------------------------------------------------
Nearest Coordinate 1: 41.03119, 28.97644, Distance=0.38 km, GEOHASH: sxk97m
Nearest Coordinate 2: 41.036682, 28.97644, Distance=0.56 km, GEOHASH: sxk97q
Nearest Coordinate 3: 41.03119, 28.965454, Distance=0.61 km, GEOHASH: sxk97j
Nearest Coordinate 4: 41.036682, 28.965454, Distance=0.73 km, GEOHASH: sxk97n
Nearest Coordinate 5: 41.025696, 28.97644, Distance=0.85 km, GEOHASH: sxk97k

Nearest Coordinates to ÜLKER STADYUMU
---------------------------------------------------------
Nearest Coordinate 1: 40.987244, 29.042358, Distance=0.46 km, GEOHASH

In [6]:
df = (
    pd.concat([pd.DataFrame(new_list), pd.DataFrame(stad_list)], ignore_index=True)
    .drop_duplicates()
)

df

Unnamed: 0,type,name,latitude,longitude,nearest
0,geohash,sxk3zw,41.124573,28.811646,BAŞAKŞEHİR FATİH TERİM
1,geohash,sxk3zt,41.11908,28.811646,BAŞAKŞEHİR FATİH TERİM
2,geohash,sxk3zx,41.130066,28.811646,BAŞAKŞEHİR FATİH TERİM
3,geohash,sxk97m,41.03119,28.97644,RECEP TAYYİP ERDOĞAN STADYUMU
4,geohash,sxk97q,41.036682,28.97644,RECEP TAYYİP ERDOĞAN STADYUMU
5,geohash,sxk97j,41.03119,28.965454,RECEP TAYYİP ERDOĞAN STADYUMU
6,geohash,sxk97n,41.036682,28.965454,RECEP TAYYİP ERDOĞAN STADYUMU
7,geohash,sxk97k,41.025696,28.97644,RECEP TAYYİP ERDOĞAN STADYUMU
8,geohash,sxk9hv,40.987244,29.042358,ÜLKER STADYUMU
9,geohash,sxk9ht,40.987244,29.031372,ÜLKER STADYUMU


In [7]:
# All GEOHASH in this list has only 1 stadium within its 1km radius
(
    df.query("type=='geohash'")
    .groupby('name')['nearest'].nunique()
    .nlargest(5)
)

name
sxk3kg    1
sxk3ku    1
sxk3ws    1
sxk3wt    1
sxk3ww    1
Name: nearest, dtype: int64

In [8]:
df.to_csv("datasets/13_gh_proximities/gh_nearby_stadiums.csv", index=False)