In [1]:
import pandas as pd
import h2o
import os
import utm
import numpy as np
from utils.to_latlon import to_latlon
from utils.preprocessing import preprocess
from utils.postprocessing import postprocess_data, location_averaging_inference
import tkinter as tk
from tkinter import filedialog
from tkinter.simpledialog import askstring

In [2]:
# User input of data paths and temporal resolution

# Initialize Tkinter
root = tk.Tk()
root.attributes('-topmost', True)
root.withdraw()

# Ask the user to select the train data file
receiver_data = filedialog.askopenfilename(
    title="Select receiver data",
    filetypes=[("Excel files", "*.xlsx")]
)

# Ask the user to select the radio tower XY data file
radio_tower_xy_path = filedialog.askopenfilename(
    title="Select radio tower location data",
    filetypes=[("Excel files", "*.xlsx")]
)

# Ask the user to select the model save path
model_save_path = filedialog.askdirectory(
    title="Select folder that contains trained models from the 'train_model.ipynb' notebook"
)

# Ask the user to select the model save path
predictions_save_path = filedialog.askdirectory(
    title="Select save path for output file with location predictions"
)

# Function to get minutes from user
def get_minutes():
    while True:
        minutes = askstring("Time (in minutes) to compile location data (t)", "Enter time period (t) in minutes (must be an integer):")
        if minutes and minutes.isdigit():
            return minutes
        messagebox.showerror("Error", "Invalid input. Please enter a number.")

# Prompt the user and get the validated input
minutes = get_minutes()

# Append the input number to 'min'
freq = minutes + 'min'

# Print freq to verify (optional)
print("Frequency:", freq)

Frequency: 3min


In [3]:
# Get training data
pred_data = pd.read_excel(receiver_data)
pred_data['DateAndTime'] = pd.to_datetime(pred_data['DateAndTime'])

# Get tower locations
tower_locs = pd.read_excel(radio_tower_xy_path)

In [4]:
routine = 'prediction'
dimensions = ['xOffset', 'yOffset']

In [5]:
# Preprocess the unlabelled data
pred_data_preproc, predictors_predict = preprocess(pred_data, freq, routine)

# Create a dictionary of the coordinates of the towers
offset_dict = tower_locs.set_index('TowerID').to_dict()
tower_g = offset_dict['tower_group']

# Add the model group
pred_data_preproc['tower_group'] = pred_data_preproc['TowerID'].map(tower_g).fillna(0)

  .agg(['mean', 'count', np.std])


In [6]:
tower_groups = tower_locs['tower_group'].unique()

# Initialise h2o
h2o.init(nthreads = 2)

# Make predictions for each tower group and dimension
for tower_group in tower_groups:
    for dimension in dimensions:
        # Load the trained model
        try:
            if dimension == 'xOffset':
                model = h2o.load_model(f"{model_save_path}\{dimension}_group_{tower_group}_model")
                
            elif dimension == 'yOffset':
                model = h2o.load_model(f"{model_save_path}\{dimension}_group_{tower_group}_model")
            else:
                raise ValueError(f"Error loading the model: {dimension}")
        except Exception as e:
            print(f"Error loading model for dimension '{dimension}': {str(e)}")
    
        # Make predictions on the test data
        data_input = pred_data_preproc[pred_data_preproc['tower_group'] == tower_group]
        data_input['unique_index'] = data_input.index ## Delete once bug tested
        unlabelled_data = h2o.H2OFrame(data_input, header=1)
        preds = model.predict(unlabelled_data)

        # Save predictions to a new column in the test dataframe
        pred_column_name = f"{dimension}_pred"
        pred_data_preproc.loc[pred_data_preproc['tower_group'] == tower_group, pred_column_name] = preds.as_data_frame().values
        

# Stop h2o
h2o.cluster().shutdown()

Checking whether there is an H2O instance running at http://localhost:54321.

  model = h2o.load_model(f"{model_save_path}\{dimension}_group_{tower_group}_model")
  model = h2o.load_model(f"{model_save_path}\{dimension}_group_{tower_group}_model")


.... not found.
Attempting to start a local H2O server...
; Java HotSpot(TM) 64-Bit Server VM (build 17.0.10+11-LTS-240, mixed mode, sharing)
  Starting server from C:\Users\JohnvanOsta\Documents\GitHub\ml4rt\.venv\Lib\site-packages\h2o\backend\bin\h2o.jar
  Ice root: C:\Users\JOHNVA~1\AppData\Local\Temp\tmp2nb53u2a
  JVM stdout: C:\Users\JOHNVA~1\AppData\Local\Temp\tmp2nb53u2a\h2o_JohnvanOsta_started_from_python.out
  JVM stderr: C:\Users\JOHNVA~1\AppData\Local\Temp\tmp2nb53u2a\h2o_JohnvanOsta_started_from_python.err
  Server is running at http://127.0.0.1:54321
Connecting to H2O server at http://127.0.0.1:54321 ... successful.
Please download and install the latest version from: https://h2o-release.s3.amazonaws.com/h2o/latest_stable.html


0,1
H2O_cluster_uptime:,02 secs
H2O_cluster_timezone:,Australia/Brisbane
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.44.0.3
H2O_cluster_version_age:,4 months and 28 days
H2O_cluster_name:,H2O_from_python_JohnvanOsta_wbv1ye
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,3.963 Gb
H2O_cluster_total_cores:,0
H2O_cluster_allowed_cores:,0


Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
gbm prediction progress: |███████████████████████████████████████████████████████| (done) 100%




Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
stackedensemble prediction progress: |███████████████████████████████████████████| (done) 100%
H2O session _sid_b2e3 closed.




In [7]:
# Post process the data and save the predictions
predictions = postprocess_data(pred_data_preproc, tower_locs)
location_estimates = location_averaging_inference(predictions)
location_estimates[['latitude_pred', 'longitude_pred']] = location_estimates.apply(lambda row: pd.Series(to_latlon(row['easting_pred'], row['northing_pred'], row['zone_number'], row['zone_letter'])), axis=1)
location_estimates.to_excel(os.path.join(predictions_save_path, "predictions.xlsx"), index=False)