In [1]:
import h3

In [2]:
h3.h3_to_geo?

[0;31mSignature:[0m [0mh3[0m[0;34m.[0m[0mh3_to_geo[0m[0;34m([0m[0mh[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Return the center point of an H3 cell as a lat/lng pair.

Parameters
----------
h : H3Cell

Returns
-------
lat : float
    Latitude
lng : float
    Longitude
[0;31mFile:[0m      ~/.local/lib/python3.10/site-packages/h3/api/_api_template.py
[0;31mType:[0m      method

In [10]:
# Required imports
from datetime import date
import dateutil.relativedelta

# Settings
project = "project_opdi"
max_h3_resolution = 12
today = date.today().strftime('%d %B %Y')  # Automatically format today's date

# Helper function to prepare H3 query
def h3_query_prep(project, max_h3_resolution):
    h3_resolution = max_h3_resolution
    h3_res_sql = ""
    h3_res = []

    # Generate H3 resolution columns
    while h3_resolution >= 0:
        h3_res_sql += f"\th3_res_{h3_resolution} STRING COMMENT 'H3 cell identifier for lat and lon with H3 resolution {h3_resolution}.',\n"
        h3_res.append(f"h3_res_{h3_resolution}")
        h3_resolution -= 1

    # Generate the CREATE TABLE statement
    create_osn_tracks_partitioned_sql = f"""
    CREATE TABLE IF NOT EXISTS `{project}`.`osn_tracks_iceberg` (
        event_time BIGINT COMMENT 'This column contains the unix (aka POSIX or epoch) timestamp for which the state vector was valid.',
        icao24 STRING COMMENT 'This column contains the 24-bit ICAO transponder ID which can be used to track specific airframes over different flights.',
        lat DOUBLE COMMENT 'This column contains the last known latitude of the aircraft.',
        lon DOUBLE COMMENT 'This column contains the last known longitude of the aircraft.',
        velocity DOUBLE COMMENT 'This column contains the speed over ground of the aircraft in meters per second.',
        heading DOUBLE COMMENT 'This column represents the direction of movement (track angle in degrees) as the clockwise angle from the geographic north.',
        vert_rate DOUBLE COMMENT 'This column contains the vertical speed of the aircraft in meters per second.',
        callsign STRING COMMENT 'This column contains the callsign that was broadcast by the aircraft.',
        on_ground BOOLEAN COMMENT 'This flag indicates whether the aircraft is broadcasting surface positions (true) or airborne positions (false).',
        alert BOOLEAN COMMENT 'This flag is a special indicator used in ATC.',
        spi BOOLEAN COMMENT 'This flag is a special indicator used in ATC.',
        squawk STRING COMMENT 'This 4-digit octal number is another transponder code which is used by ATC and pilots for identification purposes and indication of emergencies.',
        baro_altitude DOUBLE COMMENT 'This column indicates the aircraft altitude. Barometric altitude measured in meters.',
        baro_altitude_c DOUBLE COMMENT 'This column indicates the cleaned barometric altitude in meters.',
        geo_altitude DOUBLE COMMENT 'This column indicates the aircraft altitude based on GNSS (GPS) sensors in meters.',
        geo_altitude_c DOUBLE COMMENT 'This column indicates the cleaned GNSS-measured altitude in meters.',
        last_pos_update DOUBLE COMMENT 'This Unix timestamp indicates the age of the position.',
        last_contact DOUBLE COMMENT 'This Unix timestamp indicates the time at which OpenSky received the last signal of the aircraft.',
        serials ARRAY<INT> COMMENT 'The serials column is a list of serials of the ADS-B receivers which received the message.',
        track_id STRING COMMENT 'Unique identifier for the associated flight tracks in `{project}`.`opdi_flight_table`.',
        {h3_res_sql}
        segment_distance_nm DOUBLE COMMENT 'The distance from the previous state vector in nautical miles.',
        cumulative_distance_nm DOUBLE COMMENT 'The cumulative distance from the start in nautical miles.'
    )
    COMMENT '`{project}`.`osn_statevectors_iceberg` with added track_ids (generated based on callsign, icao24 grouping with 30 min signal gap intolerance) and H3 tags. Last updated: {today}.'
    PARTITIONED BY SPEC(DAY(event_time))
    TBLPROPERTIES (
        'write.sort.order' = 'event_time ASC, baro_alt ASC, track_id ASC, h3_res_7 ASC, h3_res_12 ASC',
        'write.distribution-mode' = 'hash',
        'parquet.compression' = 'SNAPPY',
        'bucketing_version'='2',
        'engine.hive.enabled'='true',
        'format-version'='2',
        'numFiles'='0',
        'numFilesErasureCoded'='0',
        'numRows'='0',
        'parquet.compression'='SNAPPY',
        'rawDataSize'='0',
        'serialization.format'='1',
        'table_type'='ICEBERG',
        'totalSize'='0',
        'write.delete.mode'='merge-on-read',
        'write.distribution.mode'='hash',
        'write.merge.mode'='merge-on-read',
        'write.parquet.compression-codec'='SNAPPY',
        'write.update.mode'='merge-on-read'
    );
    """
    # Print the generated SQL statement
    print(create_osn_tracks_partitioned_sql)

# Generate the CREATE TABLE statement
h3_query_prep(project=project, max_h3_resolution=max_h3_resolution)



    CREATE TABLE IF NOT EXISTS `project_opdi`.`osn_tracks_iceberg` (
        event_time BIGINT COMMENT 'This column contains the unix (aka POSIX or epoch) timestamp for which the state vector was valid.',
        icao24 STRING COMMENT 'This column contains the 24-bit ICAO transponder ID which can be used to track specific airframes over different flights.',
        lat DOUBLE COMMENT 'This column contains the last known latitude of the aircraft.',
        lon DOUBLE COMMENT 'This column contains the last known longitude of the aircraft.',
        velocity DOUBLE COMMENT 'This column contains the speed over ground of the aircraft in meters per second.',
        heading DOUBLE COMMENT 'This column represents the direction of movement (track angle in degrees) as the clockwise angle from the geographic north.',
        vert_rate DOUBLE COMMENT 'This column contains the vertical speed of the aircraft in meters per second.',
        callsign STRING COMMENT 'This column contains the callsign

In [3]:
#!pip install h3
#!pip install h3_pyspark
#!pip install shapely

#from pyspark.sql import SparkSession, Window
from pyspark.sql import functions as F
from pyspark.sql.functions import lit
from pyspark.sql.window import Window
from pyspark.sql import DataFrame
import shutil
from datetime import datetime, date
import dateutil.relativedelta
import calendar
import h3_pyspark
import os
import pandas as pd
from datetime import datetime, date
import dateutil.relativedelta
import calendar

# Settings
## Config
project = "project_opdi"
max_h3_resolution = 12
start_month = date(2022, 1, 1)

## Which months to process
today = date.today()
end_month = today - dateutil.relativedelta.relativedelta(months=1) # We work on the d-1 months

# Getting today's date formatted
today = today.strftime('%d %B %Y')


# Helperfunctions

def h3_query_prep(project, max_h3_resolution):
  # Create OSN tracks db
  h3_resolution = max_h3_resolution
  h3_res_sql = ""
  h3_res = []

  while h3_resolution >= 0:
      h3_res_sql = h3_res_sql + f"\th3_res_{h3_resolution} STRING COMMENT 'H3 cell identifier for lat and lon with H3 resolution {h3_resolution}.',\n"
      h3_res.append(f"h3_res_{h3_resolution}")
      h3_resolution = h3_resolution - 1

  h3_res_str = ', '.join(h3_res)
  
  create_osn_tracks_partitioned_sql = f"""
    CREATE TABLE IF NOT EXISTS `{project}`.`osn_tracks_clustered` (
        event_time BIGINT COMMENT 'This column contains the unix (aka POSIX or epoch) timestamp for which the state vector was valid.',
        icao24 STRING COMMENT 'This column contains the 24-bit ICAO transponder ID which can be used to track specific airframes over different flights.',
        lat DOUBLE COMMENT 'This column contains the last known latitude of the aircraft.',
        lon DOUBLE COMMENT 'This column contains the last known longitude of the aircraft.',
        velocity DOUBLE COMMENT 'This column contains the speed over ground of the aircraft in meters per second.',
        heading DOUBLE COMMENT 'This column represents the direction of movement (track angle in degrees) as the clockwise angle from the geographic north.',
        vert_rate DOUBLE COMMENT 'This column contains the vertical speed of the aircraft in meters per second.',
        callsign STRING COMMENT 'This column contains the callsign that was broadcast by the aircraft.',
        on_ground BOOLEAN COMMENT 'This flag indicates whether the aircraft is broadcasting surface positions (true) or airborne positions (false).',
        alert BOOLEAN COMMENT 'This flag is a special indicator used in ATC.',
        spi BOOLEAN COMMENT 'This flag is a special indicator used in ATC.',
        squawk STRING COMMENT 'This 4-digit octal number is another transponder code which is used by ATC and pilots for identification purposes and indication of emergencies.',
        baro_altitude DOUBLE COMMENT 'This column indicates the aircrafts altitude. As the names suggest, baroaltitude is the altitude measured by the barometer (in meter).',
        baro_altitude_c DOUBLE COMMENT 'This column indicates the cleaned aircrafts altitude. As the names suggest, baroaltitude is the altitude measured by the barometer (in meter).',
        geo_altitude DOUBLE COMMENT 'This column indicates the aircrafts altitude. As the names suggest, geoaltitude is determined using the GNSS (GPS) sensor (in meter).',
        geo_altitude_c DOUBLE COMMENT 'This column indicates the cleaned aircrafts altitude. As the names suggest, baroaltitude is the altitude measured by the barometer (in meter).',
        last_pos_update DOUBLE COMMENT 'This unix timestamp indicates the age of the position.',
        last_contact DOUBLE COMMENT 'This unix timestamp indicates the time at which OpenSky received the last signal of the aircraft.',
        serials ARRAY<INT> COMMENT 'The serials column is a list of serials of the ADS-B receivers which received the message.',
        track_id STRING COMMENT 'Unique identifier for the associated flight tracks in `{project}`.`opdi_flight_table`.',
        {h3_res_sql}
        segment_distance_nm DOUBLE COMMENT 'The distance from the previous statevector in nautic miles.',
        cumulative_distance_nm DOUBLE COMMENT 'The cumulative distance from the start in nautic miles.'
    )
    COMMENT '`{project}`.`osn_statevectors` with added track_ids (generated based on callsign, icao24 grouping with 30 min signal gap intolerance) and H3 tags. Last updated: {today}.'
    PARTITIONED BY (
    bucket(1024, track_id),
    date_trunc('day', event_time),
    bucket(1024, baro_altitude),
    bucket(512, h3_res_7),
    bucket(512, h3_res_12)
    )
    STORED BY ICEBERG
    TBLPROPERTIES (
      'bucketing_version'='2',
      'engine.hive.enabled'='true',
      'format-version'='2',
      'numFiles'='0',
      'numFilesErasureCoded'='0',
      'numRows'='0',
      'parquet.compression'='SNAPPY',
      'rawDataSize'='0',
      'serialization.format'='1',
      'table_type'='ICEBERG',
      'totalSize'='0',
      'write.delete.mode'='merge-on-read',
      'write.distribution.mode'='hash',
      'write.merge.mode'='merge-on-read',
      'write.parquet.compression-codec'='SNAPPY',
      'write.update.mode'='merge-on-read'
    );"""
  
  # Printing and creating in HUE because spark statements below don't work from CML
  print(create_osn_tracks_partitioned_sql)

h3_query_prep(project = 'project_opdi', max_h3_resolution = 12)



    CREATE TABLE IF NOT EXISTS `project_opdi`.`osn_tracks_clustered` (
        event_time BIGINT COMMENT 'This column contains the unix (aka POSIX or epoch) timestamp for which the state vector was valid.',
        icao24 STRING COMMENT 'This column contains the 24-bit ICAO transponder ID which can be used to track specific airframes over different flights.',
        lat DOUBLE COMMENT 'This column contains the last known latitude of the aircraft.',
        lon DOUBLE COMMENT 'This column contains the last known longitude of the aircraft.',
        velocity DOUBLE COMMENT 'This column contains the speed over ground of the aircraft in meters per second.',
        heading DOUBLE COMMENT 'This column represents the direction of movement (track angle in degrees) as the clockwise angle from the geographic north.',
        vert_rate DOUBLE COMMENT 'This column contains the vertical speed of the aircraft in meters per second.',
        callsign STRING COMMENT 'This column contains the callsi