In [12]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn import preprocessing
import utm

In [9]:
data_path = 'H:\My Drive\Colab Notebooks\RadioTelemetry\Simul_data\RTdat_sim_only_20230202.xlsx'

RTdat = pd.read_excel(data_path)
RTdat['DateAndTime'] = pd.to_datetime(RTdat['DateAndTime'])
# RTdat = RTdat.loc[RTdat['TagID']==70] # for testing, to be commented out
RTdat

Unnamed: 0,TowerID,Date_QA,Time,DateAndTime,Channel,TagID,Antenna,Power,Status,Data_type,POINT_X,POINT_Y
0,RT01,2021-02-02,07:50:13 AM,2021-02-02 07:50:00,0,60,2,124,Deployed,Live BTF,146.256751,-21.919946
1,RT01,2021-02-02,07:50:39 AM,2021-02-02 07:50:00,0,60,4,135,Deployed,Live BTF,146.256751,-21.919946
2,RT01,2021-02-02,07:51:18 AM,2021-02-02 07:51:00,0,60,2,122,Deployed,Live BTF,146.256751,-21.919946
3,RT01,2021-02-02,07:51:44 AM,2021-02-02 07:51:00,0,60,4,133,Deployed,Live BTF,146.256751,-21.919946
4,RT01,2021-02-02,07:52:36 AM,2021-02-02 07:52:00,0,60,4,123,Deployed,Live BTF,146.256751,-21.919946
...,...,...,...,...,...,...,...,...,...,...,...,...
5217,RT27,2021-09-18,06:22:41 PM,2021-09-18 18:22:00,0,92,1,65,Deployed,Live BTF,146.389023,-22.144377
5218,RT27,2021-09-18,06:22:54 PM,2021-09-18 18:22:00,0,92,1,64,Deployed,Live BTF,146.389023,-22.144377
5219,RT27,2021-09-18,06:23:45 PM,2021-09-18 18:23:00,0,92,1,62,Deployed,Live BTF,146.389023,-22.144377
5220,RT27,2021-09-18,06:24:49 PM,2021-09-18 18:24:00,0,92,1,61,Deployed,Live BTF,146.389023,-22.144377


In [10]:
RTdat.dtypes

TowerID                object
Date_QA        datetime64[ns]
Time                   object
DateAndTime    datetime64[ns]
Channel                 int64
TagID                   int64
Antenna                 int64
Power                   int64
Status                 object
Data_type              object
POINT_X               float64
POINT_Y               float64
dtype: object

In [11]:
status = 'Deployed'
freq = '5min' # in Pandas frequency aliases format

# Get data
RDdat_filt = RTdat[RTdat['Status'] == status]

# make column with the datetime to nearest minute
RDdat_filt = RDdat_filt.assign(DateTime = RDdat_filt['DateAndTime'].dt.floor(freq=freq))

# group by datetime, tag, tower and antenna, compute mean power, pivot to antennas
grouped = (
    RDdat_filt.groupby(['DateTime', 'TowerID', 'TagID', 'Antenna', 'POINT_X', 'POINT_Y'])['Power']
    .mean()
    .reset_index()
    .pivot_table(index=['DateTime', 'TowerID', 'TagID', 'POINT_X', 'POINT_Y'], columns='Antenna', values='Power')
    .reset_index()
    .rename(columns={1: 'ant1', 2: 'ant2', 3: 'ant3', 4: 'ant4'})
    .fillna(value=0) # Need to check the impact of this
)
grouped

Antenna,DateTime,TowerID,TagID,POINT_X,POINT_Y,ant1,ant2,ant3,ant4
0,2021-02-02 07:50:00,RT01,60,146.256427,-21.919968,0.0,123.000000,0.0,133.0
1,2021-02-02 07:50:00,RT01,60,146.256751,-21.919946,0.0,122.000000,0.0,127.5
2,2021-02-02 07:50:00,RT04,60,146.256427,-21.919968,116.0,0.000000,127.5,0.0
3,2021-02-02 07:50:00,RT04,60,146.256751,-21.919946,114.5,0.000000,0.0,0.0
4,2021-02-02 07:55:00,RT01,60,146.256427,-21.919968,0.0,118.000000,0.0,126.5
...,...,...,...,...,...,...,...,...,...
154,2022-02-23 07:50:00,RT18,125,146.241108,-21.932803,47.0,57.666667,0.0,0.0
155,2022-02-23 07:55:00,RT18,125,146.241108,-21.932803,63.0,65.000000,0.0,0.0
156,2022-02-23 09:30:00,RT18,126,146.240347,-21.934927,0.0,59.000000,0.0,0.0
157,2022-02-23 09:35:00,RT18,126,146.240347,-21.934927,24.0,79.200000,0.0,0.0


Need to convert POINT_X AND POINT_Y to a grid reference from the tower

In [13]:
def from_latlon(lat, lon):
    easting, northing, zone_num, zone_letter = utm.from_latlon(lat, lon)
    return easting, northing, zone_num, zone_letter

In [14]:
grouped['easting'], grouped['northing'], grouped['zone_num'], grouped['zone_letter'] = from_latlon(grouped['POINT_Y'].values, grouped['POINT_X'].values)

In [15]:
grouped

Antenna,DateTime,TowerID,TagID,POINT_X,POINT_Y,ant1,ant2,ant3,ant4,easting,northing,zone_num,zone_letter
0,2021-02-02 07:50:00,RT01,60,146.256427,-21.919968,0.0,123.000000,0.0,133.0,423203.210195,7.575845e+06,55,K
1,2021-02-02 07:50:00,RT01,60,146.256751,-21.919946,0.0,122.000000,0.0,127.5,423236.662733,7.575848e+06,55,K
2,2021-02-02 07:50:00,RT04,60,146.256427,-21.919968,116.0,0.000000,127.5,0.0,423203.210195,7.575845e+06,55,K
3,2021-02-02 07:50:00,RT04,60,146.256751,-21.919946,114.5,0.000000,0.0,0.0,423236.662733,7.575848e+06,55,K
4,2021-02-02 07:55:00,RT01,60,146.256427,-21.919968,0.0,118.000000,0.0,126.5,423203.210195,7.575845e+06,55,K
...,...,...,...,...,...,...,...,...,...,...,...,...,...
154,2022-02-23 07:50:00,RT18,125,146.241108,-21.932803,47.0,57.666667,0.0,0.0,421628.012884,7.574417e+06,55,K
155,2022-02-23 07:55:00,RT18,125,146.241108,-21.932803,63.0,65.000000,0.0,0.0,421628.012884,7.574417e+06,55,K
156,2022-02-23 09:30:00,RT18,126,146.240347,-21.934927,0.0,59.000000,0.0,0.0,421550.584313,7.574181e+06,55,K
157,2022-02-23 09:35:00,RT18,126,146.240347,-21.934927,24.0,79.200000,0.0,0.0,421550.584313,7.574181e+06,55,K


In [None]:
# Code to convert utm back to lat long. Zone number = 55, zone letter = k
# lon = 146.236472
# lat = -21.937491

# easting, northing, zone_num, zone_letter = utm.from_latlon(lat, lon)
# utm.to_latlon(east, north, zone_num, zone_letter)

Test multiple linear regression

In [None]:
x = grouped.drop(['DateTime','TowerID','TagID','POINT_X','POINT_Y'],axis= 1)
y = grouped['POINT_Y']

In [None]:
seed = 38

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=seed)

NameError: ignored

In [None]:
model = LinearRegression()
model.fit(X_train,y_train)

In [None]:
predictions = model.predict(X_test)
print('mean_squared_error : ', mean_squared_error(y_test, predictions))
print('mean_absolute_error : ', mean_absolute_error(y_test, predictions))

In [None]:
predictions

In [None]:
plt.scatter(y_test, predictions)