In [51]:
import pandas as pd
import h2o
import utm
import numpy as np
from utils.to_latlon import to_latlon

In [41]:
# Filepaths to input data
pred_data_path = 'Example_data\Input\Lotek_combined_csv\sampled_data.csv'
radio_tower_xy_path = 'H:\My Drive\Colab Notebooks\RadioTelemetry\Tower_data\RTEastNorth.xlsx'

# Filepaths to trained models
xOffset_model_path = r'Example_data\Output\Trained_models\xOffset\DeepLearning_grid_3_AutoML_1_20230528_210520_model_1'
yOffset_model_path = r'Example_data\Output\Trained_models\yOffset\StackedEnsemble_AllModels_1_AutoML_2_20230528_210956'

# Variable parameters
freq = '5min' # Frequency of data
data_type = None

dimensions = ['xOffset', 'yOffset']
predictors = ['ant1_mean', 'ant2_mean', 'ant3_mean', 'ant4_mean', 'ant1_count', 'ant2_count', 'ant3_count', 'ant4_count', 'ant1_std', 'ant2_std', 'ant3_std', 'ant4_std', 'mean_std', 'total_count']

In [17]:
# Get prediction data
pred_data = pd.read_csv(pred_data_path)
pred_data['DateAndTime'] = pd.to_datetime(pred_data['DateAndTime'])

# Get tower locations
tower_locs = pd.read_excel(radio_tower_xy_path)

In [27]:
def preprocess_predict_data(pred_data, freq):
     
    # make column with the datetime to nearest 'freq' value (e.g. 5min)
    pred_data_preproc = pred_data.assign(DateTime = pred_data['DateAndTime'].dt.floor(freq=freq))
  
    # group by datetime, tag, tower and antenna, compute mean power and std power, pivot to antennas
    pred_data_preproc = (
            pred_data_preproc.groupby(['DateTime', 'TowerID', 'TagID', 'Antenna'])['Power']
            .agg(['mean', 'count', np.std])
            .reset_index()
            .pivot_table(index=['DateTime', 'TowerID', 'TagID'], columns='Antenna', values=['mean', 'count', 'std'])
            .reset_index()
        )
    
    # Rename columns
    pred_data_preproc.columns = [f"{col[0]}{col[1]}" if col[1] != "" else col[0] for col in pred_data_preproc.columns.values]
    pred_data_preproc = pred_data_preproc.rename(columns={ 'mean1': 'ant1_mean', 'mean2': 'ant2_mean', 'mean3': 'ant3_mean', 'mean4': 'ant4_mean',
                                                  'count1': 'ant1_count', 'count2': 'ant2_count', 'count3': 'ant3_count', 'count4': 'ant4_count',
                                                  'std1': 'ant1_std', 'std2': 'ant2_std', 'std3': 'ant3_std', 'std4': 'ant4_std'})
    
    # Calculate the mean std and total count across the antennas
    pred_data_preproc['mean_std'] = pred_data_preproc[['ant1_std', 'ant2_std', 'ant3_std', 'ant4_std']].mean(axis=1)
    pred_data_preproc['total_count'] = pred_data_preproc[['ant1_count', 'ant2_count', 'ant3_count', 'ant4_count']].sum(axis=1)

    # Fill missing values with 0
    pred_data_preproc = pred_data_preproc.fillna(value=0)

    return pred_data_preproc

In [46]:
#Convert locations predictions back to easting northings

def postprocess_data(prediction_data, tower_locs):
    # Create a dictionary of the coordinates of the towers
    offset_dict = tower_locs.set_index('TowerID').to_dict()
    point_x = offset_dict['POINT_X']
    point_y = offset_dict['POINT_Y']

    # Change predicted x/y offset values to their respective easting/northing considering the location of the tower
    prediction_data['easting_pred'] = prediction_data['xOffset_pred'] + prediction_data['TowerID'].map(point_x).fillna(0)
    prediction_data['northing_pred'] = prediction_data['yOffset_pred'] + prediction_data['TowerID'].map(point_y).fillna(0)
    
    return prediction_data

In [43]:
# Preprocess the training and testing data
pred_data_preproc = preprocess_predict_data(pred_data, freq)

In [44]:
# Initialise h2o
h2o.init()

# Make predictions for each dimension
for dimension in dimensions:
    # Load the trained model
    try:
        if dimension == 'xOffset':
            model = h2o.load_model(xOffset_model_path)
        elif dimension == 'yOffset':
            model = h2o.load_model(yOffset_model_path)
        else:
            raise ValueError(f"Unknown dimension: {dimension}")
    except Exception as e:
        print(f"Error loading model for dimension '{dimension}': {str(e)}")
   
    # Make predictions on the test data
    unlabelled_data = h2o.H2OFrame(pred_data_preproc)
    preds = model.predict(unlabelled_data)

    # Save predictions to a new column in the test dataframe
    pred_column_name = f"{dimension}_pred"
    pred_data_preproc[pred_column_name] = preds.as_data_frame()

# Stop h2o
h2o.cluster().shutdown()

Checking whether there is an H2O instance running at http://localhost:54321..... not found.
Attempting to start a local H2O server...
; Java HotSpot(TM) 64-Bit Server VM (build 25.321-b07, mixed mode)
  Starting server from C:\Users\John\Documents\GitHub\ml4rt\.venv\Lib\site-packages\h2o\backend\bin\h2o.jar
  Ice root: C:\Users\John\AppData\Local\Temp\tmp02_xmtya
  JVM stdout: C:\Users\John\AppData\Local\Temp\tmp02_xmtya\h2o_John_started_from_python.out
  JVM stderr: C:\Users\John\AppData\Local\Temp\tmp02_xmtya\h2o_John_started_from_python.err
  Server is running at http://127.0.0.1:54321
Connecting to H2O server at http://127.0.0.1:54321 ... successful.


0,1
H2O_cluster_uptime:,03 secs
H2O_cluster_timezone:,Australia/Brisbane
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.40.0.4
H2O_cluster_version_age:,1 month and 3 days
H2O_cluster_name:,H2O_from_python_John_93incj
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,3.467 Gb
H2O_cluster_total_cores:,8
H2O_cluster_allowed_cores:,8


Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
deeplearning prediction progress: |██████████████████████████████████████████████| (done) 100%
Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
stackedensemble prediction progress: |███████████████████████████████████████████| (done) 100%
H2O session _sid_a20e closed.


In [45]:
pred_data_preproc

Unnamed: 0,DateTime,TowerID,TagID,ant1_count,ant2_count,ant3_count,ant4_count,ant1_mean,ant2_mean,ant3_mean,ant4_mean,ant1_std,ant2_std,ant3_std,ant4_std,mean_std,total_count,xOffset_pred,yOffset_pred
0,2020-01-09 10:35:00,RT24,9,0.0,0.0,1.0,0.0,0.0,0.0,121.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,-76.605361,-183.367438
1,2020-01-09 10:50:00,RT24,18,0.0,1.0,0.0,0.0,0.0,47.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,124.280808,-61.458774
2,2020-01-09 10:55:00,RT24,9,0.0,1.0,0.0,0.0,0.0,105.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,99.661455,-37.256226
3,2020-01-12 02:35:00,RT24,18,0.0,0.0,1.0,0.0,0.0,0.0,58.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,-56.984975,-360.161024
4,2020-02-09 06:30:00,RT01,23,0.0,0.0,1.0,0.0,0.0,0.0,112.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,-79.138165,-196.453707
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39958,2023-10-01 17:15:00,RT08,67,0.0,0.0,1.0,0.0,0.0,0.0,66.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,-65.535752,-360.966553
39959,2023-10-01 17:30:00,RT27,8,0.0,1.0,0.0,0.0,0.0,74.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,174.911186,-46.429969
39960,2023-10-01 17:50:00,RT25,71,0.0,0.0,1.0,0.0,0.0,0.0,56.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,-54.252983,-359.455599
39961,2023-10-01 18:35:00,RT02,44,0.0,1.0,0.0,0.0,0.0,39.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,110.371057,-96.843200


In [48]:
predictions = postprocess_data(pred_data_preproc, tower_locs)

In [55]:
predictions

Unnamed: 0,DateTime,TowerID,TagID,ant1_count,ant2_count,ant3_count,ant4_count,ant1_mean,ant2_mean,ant3_mean,...,ant1_std,ant2_std,ant3_std,ant4_std,mean_std,total_count,xOffset_pred,yOffset_pred,easting_pred,northing_pred
0,2020-01-09 10:35:00,RT24,9,0.0,0.0,1.0,0.0,0.0,0.0,121.0,...,0.0,0.0,0.0,0.0,0.0,1.0,-76.605361,-183.367438,436398.663790,7.551671e+06
1,2020-01-09 10:50:00,RT24,18,0.0,1.0,0.0,0.0,0.0,47.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,124.280808,-61.458774,436599.549959,7.551792e+06
2,2020-01-09 10:55:00,RT24,9,0.0,1.0,0.0,0.0,0.0,105.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,99.661455,-37.256226,436574.930606,7.551817e+06
3,2020-01-12 02:35:00,RT24,18,0.0,0.0,1.0,0.0,0.0,0.0,58.0,...,0.0,0.0,0.0,0.0,0.0,1.0,-56.984975,-360.161024,436418.284176,7.551494e+06
4,2020-02-09 06:30:00,RT01,23,0.0,0.0,1.0,0.0,0.0,0.0,112.0,...,0.0,0.0,0.0,0.0,0.0,1.0,-79.138165,-196.453707,423259.593019,7.575722e+06
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39958,2023-10-01 17:15:00,RT08,67,0.0,0.0,1.0,0.0,0.0,0.0,66.0,...,0.0,0.0,0.0,0.0,0.0,1.0,-65.535752,-360.966553,425309.171726,7.572790e+06
39959,2023-10-01 17:30:00,RT27,8,0.0,1.0,0.0,0.0,0.0,74.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,174.911186,-46.429969,437151.379818,7.551307e+06
39960,2023-10-01 17:50:00,RT25,71,0.0,0.0,1.0,0.0,0.0,0.0,56.0,...,0.0,0.0,0.0,0.0,0.0,1.0,-54.252983,-359.455599,436911.612566,7.551481e+06
39961,2023-10-01 18:35:00,RT02,44,0.0,1.0,0.0,0.0,0.0,39.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,110.371057,-96.843200,423962.833846,7.575511e+06


In [53]:
zone_number = 55
letter = 'k'

In [59]:
predictions['latitude_pred'], predictions['longitude_pred'] = to_latlon(predictions['easting_pred'].values, predictions['northing_pred'].values, zone_number, letter)

In [60]:
predictions.to_excel("predictions.xlsx", index=False)