In [3]:
import pandas as pd
import h2o
import utm
import numpy as np
from utils.to_latlon import to_latlon
from utils.preprocessing import preprocess
from utils.postprocessing import postprocess_data

In [1]:
# Filepaths to input data
pred_data_path = 'Example_data\Input\Lotek_combined_csv\sampled_data.csv'
radio_tower_xy_path = 'Example_data\Input\Radio_tower_locations\RTEastNorth.xlsx'

# Filepaths to trained models
xOffset_model_path = r'Example_data\Output\Trained_models\xOffset\DeepLearning_grid_3_AutoML_1_20230528_210520_model_1'
yOffset_model_path = r'Example_data\Output\Trained_models\yOffset\StackedEnsemble_AllModels_1_AutoML_2_20230528_210956'

# Variable parameters
freq = '5min' # Frequency of data
data_type = None

routine = 'prediction'
dimensions = ['xOffset', 'yOffset']
predictors = ['ant1_mean', 'ant2_mean', 'ant3_mean', 'ant4_mean', 'ant1_count', 'ant2_count', 'ant3_count', 'ant4_count', 'ant1_std', 'ant2_std', 'ant3_std', 'ant4_std', 'mean_std', 'total_count']

In [4]:
# Get prediction data
pred_data = pd.read_csv(pred_data_path)
pred_data['DateAndTime'] = pd.to_datetime(pred_data['DateAndTime'])

# Get tower locations
tower_locs = pd.read_excel(radio_tower_xy_path)

In [5]:
# Preprocess the unlabelled data
pred_data_preproc = preprocess(pred_data, freq, routine)

In [7]:
# Initialise h2o
h2o.init()

# Make predictions for each dimension
for dimension in dimensions:
    # Load the trained model
    try:
        if dimension == 'xOffset':
            model = h2o.load_model(xOffset_model_path)
        elif dimension == 'yOffset':
            model = h2o.load_model(yOffset_model_path)
        else:
            raise ValueError(f"Unknown dimension: {dimension}")
    except Exception as e:
        print(f"Error loading model for dimension '{dimension}': {str(e)}")
   
    # Make predictions on the test data
    unlabelled_data = h2o.H2OFrame(pred_data_preproc)
    preds = model.predict(unlabelled_data)

    # Save predictions to a new column in the test dataframe
    pred_column_name = f"{dimension}_pred"
    pred_data_preproc[pred_column_name] = preds.as_data_frame()

# Stop h2o
h2o.cluster().shutdown()

Checking whether there is an H2O instance running at http://localhost:54321..... not found.
Attempting to start a local H2O server...
; OpenJDK 64-Bit Server VM Zulu17.36+17-CA (build 17.0.4.1+1-LTS, mixed mode, sharing)
  Starting server from C:\Users\s5236256\Documents\GitHub\ml4rt\.venv\Lib\site-packages\h2o\backend\bin\h2o.jar
  Ice root: C:\Users\s5236256\AppData\Local\Temp\tmpys8f61oe
  JVM stdout: C:\Users\s5236256\AppData\Local\Temp\tmpys8f61oe\h2o_s5236256_started_from_python.out
  JVM stderr: C:\Users\s5236256\AppData\Local\Temp\tmpys8f61oe\h2o_s5236256_started_from_python.err
  Server is running at http://127.0.0.1:54321
Connecting to H2O server at http://127.0.0.1:54321 ... successful.


0,1
H2O_cluster_uptime:,10 secs
H2O_cluster_timezone:,Australia/Brisbane
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.40.0.4
H2O_cluster_version_age:,1 month and 6 days
H2O_cluster_name:,H2O_from_python_s5236256_605qim
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,3.922 Gb
H2O_cluster_total_cores:,8
H2O_cluster_allowed_cores:,8


Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
deeplearning prediction progress: |██████████████████████████████████████████████| (done) 100%
Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
stackedensemble prediction progress: |███████████████████████████████████████████| (done) 100%
H2O session _sid_9aaa closed.


In [9]:
predictions = postprocess_data(pred_data_preproc, tower_locs)

In [11]:
predictions[['latitude_pred', 'longitude_pred']] = predictions.apply(lambda row: pd.Series(to_latlon(row['easting_pred'], row['northing_pred'], row['zone_number'], row['zone_letter'])), axis=1)


In [12]:
predictions.to_excel("predictions.xlsx", index=False)