# Geocoding
This script is used to search the latitude and longitude of our data based on addresses.

In [None]:
import pandas as pd
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut
import requests
import time

In [None]:
# Function to get coordinates using Nominatim (from OpenStreetMap)
def get_coordinates_nominatim(address, geolocator, retries=3):
    for _ in range(retries):  
        try:
            location = geolocator.geocode(address, timeout=10)  # Geocode the address
            if location:  
                return location.latitude, location.longitude
        except GeocoderTimedOut:
            time.sleep(1)  
    return None, None  # Return None if no location is found after retries

# Function to get coordinates using LocationIQ API
def get_coordinates_locationiq(address):
    url = f'https://locationiq.org/v1/search.php?key=public&format=json&q={address}'  # Construct the URL with the address
    try:
        response = requests.get(url).json()  # Make the request to LocationIQ API
        if response:  
            return float(response[0]['lat']), float(response[0]['lon'])  # Return the latitude and longitude
    except Exception:
        return None, None  # Return None in case of an error
    return None, None  # Return None if no response

# Function to get coordinates using Geocode.xyz API
def get_coordinates_geocode_xyz(address):
    url = f'https://geocode.xyz/{address}?json=1'  # Construct the URL for Geocode.xyz API
    try:
        response = requests.get(url).json()  # Make the request to Geocode.xyz API
        if 'latt' in response and 'longt' in response:  
            return float(response['latt']), float(response['longt'])  # Return the coordinates
    except Exception:
        return None, None  # Return None in case of an error
    return None, None  # Return None if no response

# Function to get coordinates using OpenCageData API
def get_coordinates_opencage(address):
    url = f'https://api.opencagedata.com/geocode/v1/json?q={address}&key=public'  # Construct the URL for OpenCageData API
    try:
        response = requests.get(url).json()  # Make the request to OpenCageData API
        if response['results']:  
            return response['results'][0]['geometry']['lat'], response['results'][0]['geometry']['lng']  # Return the coordinates
    except Exception:
        return None, None  # Return None in case of an error
    return None, None  # Return None if no results found

# Main function to get coordinates using multiple APIs
def get_coordinates(address, geolocator):
    lat, lon = get_coordinates_nominatim(address, geolocator)  
    if lat is None: 
        lat, lon = get_coordinates_locationiq(address)
    if lat is None:  
        lat, lon = get_coordinates_geocode_xyz(address)
    if lat is None:  
        lat, lon = get_coordinates_opencage(address)
    return lat, lon  

# Function to process multiple CSV files and add coordinates
def process_csv(files, output_path, address_column="Adres"):
    df_list = [pd.read_csv(file) for file in files]  
    df = pd.concat(df_list, ignore_index=True)  

    geolocator = Nominatim(user_agent="geo_locator")  # Create a geolocator instance

    # Geocode the addresses and store the latitude and longitude in new columns
    df["Latitude"], df["Longitude"] = zip(*df[address_column].apply(lambda x: get_coordinates(x, geolocator)))

    # Save the updated DataFrame to a new CSV file
    df.to_csv(output_path, index=False)  # Write the DataFrame to a new CSV file without the index column
    print(f"File saved as {output_path}")  


In [None]:
# List of input CSV files containing different types of offers (apartments, houses, studios)
input_files = [
    "otodom_apartment_offers.csv",  # Path to the apartment offers file
    "otodom_house_offers.csv",  # Path to the house offers file
    "otodom_studio_offers.csv"   # Path to the studio offers file
]

# Path to the output CSV file where combined results will be saved
output_file = "otodom_offers_coordinates.csv"

In [None]:
# Call the 'process_csv' function to process the input files and save the results to the output file
process_csv(input_files, output_file)