# Convert home addresses weather zones (H3 cells of level 4)

In [None]:
import pandas as pd
import numpy as np
import requests

from pyproj import Transformer

import h3
import folium
from folium.plugins import MarkerCluster

import sys
sys.path.append('../data/')
sys.path.append('../view/')
from plotter import Plot

excel_file = "nfh_addresses.xlsx"
extended_excel_file = 'nfh_addresses_extended.xlsx'
home_weather_locations_file_path = "home_weather_locations.parquet"
home_weather_locations_excel_path = "home_weather_locations.xlsx"
# student_file_output_path = "students_data_science_weather_interpolation_locations.parquet"
# student_file_output_excel = "students_data_science_weather_interpolation_locations.xlsx"

# BAG API endpoint
BAG_API_URL = "https://api.bag.kadaster.nl/lvbag/individuelebevragingen/v2/adressen"

# NB You also need a production API key for BAG and put it in a file with the name below and one line KADASTER_API_KEY=your_BAG_API_key 
api_keys_file='bag_api_key.txt'
# If your organistion does not have one yet, request one here: https://www.kadaster.nl/zakelijk/producten/adressen-en-gebouwen/bag-api-individuele-bevragingen

#striking a balance between precision for of geospatoial weather interpolation and privacy 
resolution = 4 

In [None]:
# Read API keys from a text file
def read_api_keys(file_path):
    api_keys = {}
    with open(file_path, 'r') as file:
        for line in file:
            key, value = line.strip().split('=')
            api_keys[key] = value
    return api_keys

In [None]:
# Lezen van API keys uit het tekstbestand
api_keys = read_api_keys(api_keys_file)
API_KEY = api_keys.get('KADASTER_API_KEY')

In [None]:
# Functie om RD-coördinaten om te zetten naar GPS-coördinaten
def rd_to_gps(rd_x, rd_y):
    transformer = Transformer.from_crs("epsg:28992", "epsg:4326")  # RD naar WGS84
    lat, lon = transformer.transform(rd_x, rd_y)
    return lat, lon

In [None]:
# Functie om adresdetails op te halen van de BAG API
def get_address_details(postcode, huisnummer, huisletter=None):
    headers = {
        'X-Api-Key': API_KEY
    }
    params = {
        'postcode': postcode.replace(' ', ''),
        'huisnummer': huisnummer,
    }
    if huisletter:
        params['huisletter'] = huisletter

    response = requests.get(BAG_API_URL, headers=headers, params=params)
    response.raise_for_status()
    data = response.json()
    

    # Check whether key 'adressen' is present
    if '_embedded' in data and 'adressen' in data['_embedded']:
        address = data['_embedded']['adressen'][0]
        rd_x = None
        rd_y = None
        street = address['openbareRuimteNaam']
        city = address['woonplaatsNaam']
        
        # Get RD-coordinates via the link adresseerbaarObject 
        if 'adresseerbaarObject' in address['_links']:
            adresseerbaarObject_url = address['_links']['adresseerbaarObject']['href']
            headers['Accept-Crs'] = 'epsg:28992'
            adresseerbaarObject_response = requests.get(adresseerbaarObject_url, headers=headers)
            adresseerbaarObject_response.raise_for_status()
            adresseerbaarObject_data = adresseerbaarObject_response.json()
            if 'geometrie' in adresseerbaarObject_data['verblijfsobject']:
                coordinates = adresseerbaarObject_data['verblijfsobject']['geometrie']['punt']['coordinates']
                rd_x, rd_y = coordinates[0], coordinates[1]
                
        return rd_x, rd_y, street, city
    else:
        raise KeyError("De sleutel 'adressen' ontbreekt in de respons")

In [None]:
# Function to add location information to an address consisting of only PC6, home number and home letter
def add_addres_location_to_pc6homenr(row):
    try:
        huisletter = row['home_nr_add_on']
        if pd.isna(huisletter):  # Check whether value is NaN
            huisletter = None  # If NaN convert to None
        rd_x, rd_y, street, city = get_address_details(row['pc6'], row['home_nr'], huisletter)
        lat, lon = rd_to_gps(rd_x, rd_y)
        return pd.Series([street, city, rd_x, rd_y, lat, lon])
    except Exception as e:
        print(f"Error processing row: {e}")
        return pd.Series([None, None, None, None, None])

## Read addresses

In [None]:
# Read Excel file
df = pd.read_excel('nfh_addresses.xlsx')

In [None]:
df = df.set_index('id')

In [None]:
# Convert NaN values in 'home_nr_add_on' to empty strings
df['home_nr_add_on'] = df['home_nr_add_on'].replace(np.nan, '')


In [None]:
df

## Add location

In [None]:
# Apply add_addres_location_to_pc6homenr function to each row
df[['street', 'city', 'rd_x', 'rd_y', 'gps_lat__degN', 'gps_lon__degE']] = df.apply(add_addres_location_to_pc6homenr, axis=1)

In [None]:
df

In [None]:
# Function to convert latitude and longitude to H3 index
def lat_lon_to_h3(lat, lon, resolution):
    return h3.geo_to_h3(lat, lon, resolution)

# Function to get the center of H3 cell
def h3_to_lat_lon(h3_index):
    lat, lon = h3.h3_to_geo(h3_index)
    return lat, lon

In [None]:
# Main function to convert addresses to H3
def adresses_lat_lon_to_h3(addresses, resolution=4):
    results = []
    for index, row in addresses.iterrows():
        h3_index = lat_lon_to_h3(row['gps_lat__degN'], row['gps_lon__degE'], resolution)
        h3_center = h3_to_lat_lon(h3_index)
        results.append({
            'H3_cell_id': h3_index,
            'H3_cell_gps_lat__degN': h3_center[0],
            'H3_cell_gps_lon__degE': h3_center[1]
        })
    return results

In [None]:
# Main function to convert addresses to H3
def addresses_to_h3(addresses, resolution=4):
    results = []
    for index, row in addresses.iterrows():
        address = f'{row["PC6"]}, {row["home_nr"]}{row["home_nr_add_on"]}, Netherlands'
        lat_lon = geocode_address(row['PC6'], row['home_nr'], row['home_nr_add_on'])
        if lat_lon:
            h3_index = lat_lon_to_h3(lat_lon[0], lat_lon[1], resolution)
            h3_center = h3_to_lat_lon(h3_index)
            results.append({
                'address': address,
                'gps_lat__degN': lat_lon[0],
                'gps_lon__degE': lat_lon[1],
                'H3_cell_id': h3_index,
                'H3_cell_gps_lat__degN': h3_center[0],
                'H3_cell_gps_lon__degE': h3_center[1]
            })
    return results

In [None]:
# Function to add weather_lat and weather_lon to DataFrame
def add_weather_coordinates(addresses_df, h3_coordinates):
    weather_lat = [coord['H3_cell_gps_lat__degN'] for coord in h3_coordinates]
    weather_lon = [coord['H3_cell_gps_lon__degE'] for coord in h3_coordinates]
    weather_H3_cell_id = [coord['H3_cell_id'] for coord in h3_coordinates]
    addresses_df['weather_lat__degN'] = weather_lat
    addresses_df['weather_lon__degE'] = weather_lon
    addresses_df['weather_H3_cell_id'] = weather_H3_cell_id
    return addresses_df

## Add H3 weather zone

In [None]:
# Convert addresses to H3 coordinates
h3_coordinates  = adresses_lat_lon_to_h3(df, resolution)


In [None]:
# Add weather coordinates to the DataFrame
df = add_weather_coordinates(df, h3_coordinates)

In [None]:
df

In [None]:
# Save file with extended data as Excel
df.to_excel(extended_excel_file, index=True)

## Draw weather zones on map 

In [None]:
# Create a list of H3 cell IDs
h3_cell_ids = df['weather_H3_cell_id'].tolist()

# Create a DataFrame for the marker information
marker_data = {
    'lat__degN': df['gps_lat__degN'],
    'lon__degE': df['gps_lon__degE'],
    'popup_text': df.apply(lambda row: f"{row['street']} {row['home_nr']} {row['home_nr_add_on']} {row['pc6']} {row['city']}", axis=1)
}
marker_df = pd.DataFrame(marker_data)

# Call the plot_h3_cells_and_markers function
Plot.plot_h3_cells_and_markers(h3_cell_ids, marker_df, output_file="map_with_real_home_locations_and_h3_cells.html")

## Convert to pseudonymous mapping

In [None]:
df_pseudonymous = df[['weather_lat__degN', 'weather_lon__degE', 'weather_H3_cell_id']]

In [None]:
df_pseudonymous

In [None]:
%%time 
df_pseudonymous.to_parquet(home_weather_locations_file_path, index=True, engine='pyarrow')

In [None]:
# Save the DataFrame back to Excel
df_pseudonymous.to_excel(home_weather_locations_excel_path, index=True)

## Convert to student mapping

In [None]:
# # Read the mapping DataFrame from the Excel file
# mapping_df = pd.read_excel('pseudonym_id_student_mapping.xlsx')

In [None]:
# mapping_df

In [None]:
# df_pseudonymous.index.get_level_values(0).unique()

In [None]:
# # Convert the mapping DataFrame to a dictionary with 'id' as key and 'random_id' as value
# id_mapping_dict = dict(zip(mapping_df['id'], mapping_df['random_id']))

# # Replace 'id' values in the DataFrame using the dictionary
# df_pseudonymous.index = df_pseudonymous.index.map(id_mapping_dict)

In [None]:
# df_pseudonymous.index

In [None]:
# df_pseudonymous.index = df_pseudonymous.index.astype('Int64')

In [None]:
# df_pseudonymous

In [None]:
# df_pseudonymous.index.unique()

In [None]:
# df_pseudonymous = df_pseudonymous.reset_index().dropna(axis=0, subset=['id']).set_index('id')

In [None]:
# df_pseudonymous

In [None]:
# %%time 
# df_pseudonymous.to_parquet(student_file_output_path, index=True, engine='pyarrow')

In [None]:
# Save the DataFrame back to Excel
# df_pseudonymous.to_excel(student_file_output_excel, index=True)