In [2]:
import requests
import pandas as pd
import numpy as np

# Define the Overpass API URL
overpass_url = "https://www.overpass-api.de/api/interpreter"

# Read the CSV file into a DataFrame
csv_file = "Data/train - Copy.csv"
data = pd.read_csv(csv_file, sep=';', decimal=',')

data.head()

Unnamed: 0,id_titik_mulai,id_titik_akhir
0,21390008,1425033102
1,1677092762,579493410
2,26486694,1930267566
3,1111592522,3775231113
4,5940503398,5940503394


In [12]:
# Create new columns for latitude and longitude
data["start_latitude"] = None
data["start_longitude"] = None

empty_start_elements = []
# Iterate through each row in the DataFrame
for index, row in data.iterrows():
    start_node_id = row["id_titik_mulai"]
    # end_node_id = row["id_titik_akhir"]
    
    # Define the Overpass API queries to retrieve latitude and longitude for the nodes
    start_node_query = f"""
        [out:json];
        node({start_node_id});
        out;
    """
    # end_node_query = f"""
    #     [out:json];
    #     node({end_node_id});
    #     out;
    # """
    
    # Send the requests to the Overpass API
    start_response = requests.get(overpass_url, params={"data": start_node_query})
    # end_response = requests.get(overpass_url, params={"data": end_node_query})
    
    start_data = start_response.json()
    # end_data = end_response.json()
    
    if "elements" in start_data and len(start_data["elements"]) > 0:
        start_node = start_data["elements"][0]
        start_latitude = start_node.get("lat", None)
        start_longitude = start_node.get("lon", None)
        data.at[index, "start_latitude"] = start_latitude
        data.at[index, "start_longitude"] = start_longitude
        
    if len(start_data["elements"]) == 0:
        empty_start_elements.append(start_node_id)
        
print(len(empty_start_elements))

14


In [14]:
empty_start_elements

[5669475346,
 227758,
 227771,
 1663461749,
 1701556095,
 5669475346,
 2375143,
 227771,
 227758,
 6205314505,
 1663461749,
 6205314505,
 1701556095,
 2375143]

In [16]:
data["end_latitude"] = None
data["end_longitude"] = None

empty_end_elements = []
# Iterate through each row in the DataFrame
for index, row in data.iterrows():
    end_node_id = row["id_titik_akhir"]

    end_node_query = f"""
        [out:json];
        node({end_node_id});
        out;
    """
    
    end_response = requests.get(overpass_url, params={"data": end_node_query})
    end_data = end_response.json()
    
    if "elements" in end_data and len(end_data["elements"]) > 0:
        end_node = end_data["elements"][0]
        end_latitude = end_node.get("lat", None)
        end_longitude = end_node.get("lon", None)
        data.at[index, "end_latitude"] = end_latitude
        data.at[index, "end_longitude"] = end_longitude
        
    if len(end_data["elements"]) == 0:
        empty_end_elements.append(end_node_id)
        
print(len(empty_end_elements))

14


In [17]:
empty_end_elements

[5669475346,
 1701556095,
 2375143,
 1701556095,
 6205314505,
 6205314505,
 227758,
 227758,
 5669475346,
 227771,
 1663461749,
 1663461749,
 227771,
 2375143]

In [18]:
def haversine(lat1, lon1, lat2, lon2):
    R = 6371.0  # Radius of the Earth in kilometers
    dlat = np.radians(lat2 - lat1)
    dlon = np.radians(lon2 - lon1)
    a = np.sin(dlat / 2) ** 2 + np.cos(np.radians(lat1)) * np.cos(np.radians(lat2)) * np.sin(dlon / 2) ** 2
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))
    distance = R * c
    return distance

In [35]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 934 entries, 0 to 933
Data columns (total 6 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   id_titik_mulai   934 non-null    int64 
 1   id_titik_akhir   934 non-null    int64 
 2   start_latitude   920 non-null    object
 3   start_longitude  920 non-null    object
 4   end_latitude     920 non-null    object
 5   end_longitude    920 non-null    object
dtypes: int64(2), object(4)
memory usage: 43.9+ KB


In [45]:
data.to_csv('lat_lon_data.csv', index=False, sep=';', decimal=',')

In [29]:
x = haversine(51.434928, -0.161176, 51.434975, -0.16109)
x*1000

7.927905607025733

In [30]:
y = haversine(51.530289, -0.228343	,51.530197	, -0.227949)
y*1000

29.111400752964496

In [40]:
# Convert latitude and longitude columns to numeric values
data['start_latitude'] = pd.to_numeric(data['start_latitude'])
data['start_longitude'] = pd.to_numeric(data['start_longitude'])
data['end_latitude'] = pd.to_numeric(data['end_latitude'])
data['end_longitude'] = pd.to_numeric(data['end_longitude'])

# Calculate distance for each row using the haversine function
data['distance_km'] = data.apply(lambda row: haversine(row['start_latitude'], row['start_longitude'], row['end_latitude'], row['end_longitude']), axis=1)

In [43]:
data['distance_meter'] = data['distance_km'] * 1000