In [126]:
# Dependencies
from influxdb import InfluxDBClient
import datetime
import json
import pandas as pd
import numpy as np

In [94]:
# Connect to DB
client = InfluxDBClient(
    'influxdb',
    8086,
    'admin',
    'password',
    'db1')
client.switch_database('starklink_db')

# Retrieve objective satellite ids
starklink_datafile = 'starlink_historical_data.json'
with open(starklink_datafile, 'r') as data_file:
    json_data = data_file.read()
starklink_data = json.loads(json_data)
satellite_ids = {data['id'] for data in starklink_data}


Now, let us consider this time

In [85]:
datime_time = '2021-01-01 13:30:00'

Now we can get the closest satellite positions on time

In [114]:
position_data = [['time', 'latitude', 'longitude', 'satellite_id']]
for satellite_id in satellite_ids:
    query_under = (
        f"SELECT time, latitude, longitude FROM sat_pos_{satellite_id} "
        f"WHERE time <= '{datime_time}' "
        "ORDER BY DESC LIMIT 1")
    response = client.query(query_under)
    if response:
        response_data = response.raw['series'][0]['values'][0]
        response_data.append(satellite_id)
        position_data.append(response_data)
        
    query_above = (
        f"SELECT time, latitude, longitude FROM sat_pos_{satellite_id} "
        f"WHERE time <= '{datime_time}' "
        "ORDER BY DESC LIMIT 1")
    response = client.query(query_above)
    if response:
        response_data = response.raw['series'][0]['values'][0]
        response_data.append(satellite_id)
        position_data.append(response_data)
        
position_data = pd.DataFrame(
    position_data[1:], 
    columns = position_data[0])

position_data = position_data.dropna()
display(position_data)


Unnamed: 0,time,latitude,longitude,satellite_id
2,2020-12-19T06:26:10Z,0.69936,91,60106f20e900d60006e32cbd
3,2020-12-19T06:26:10Z,0.69936,91,60106f20e900d60006e32cbd
4,2020-12-19T06:26:10Z,-0.606328,160,60106f1fe900d60006e32c82
5,2020-12-19T06:26:10Z,-0.606328,160,60106f1fe900d60006e32c82
22,2020-12-19T06:26:10Z,0.108333,74,60106f200c72a20006004c18
23,2020-12-19T06:26:10Z,0.108333,74,60106f200c72a20006004c18
24,2020-12-19T06:26:10Z,1.203297,78,60106f1fe900d60006e32caf
25,2020-12-19T06:26:10Z,1.203297,78,60106f1fe900d60006e32caf
26,2020-12-19T06:26:10Z,10.634416,86,60106f1fe900d60006e32c8d
27,2020-12-19T06:26:10Z,10.634416,86,60106f1fe900d60006e32c8d


The previous DataFrame contains the latitud longitud position of the time nearest points. To the **datime_time** variable. Now the next definition allows us to compute the latitud-longitude distance and it's minimum

In [124]:
def haversine_vectorize(lon1, lat1, lon2, lat2):

    lon1, lat1, lon2, lat2 = map(np.radians, [lon1, lat1, lon2, lat2])

    newlon = lon2 - lon1
    newlat = lat2 - lat1

    haver_formula = np.sin(newlat/2.0)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(newlon/2.0)**2

    dist = 2 * np.arcsin(np.sqrt(haver_formula ))
    km = 6367 * dist #6367 for distance in KM for miles use 3958
    return km

An now let's consider this latitud longitude position

In [118]:
position = {
    'latitude': 1.77,
    'longitude': 37.13}

So now, we can compute the closest

In [128]:
distances = haversine_vectorize(
    position_data['latitude'],
    position_data['longitude'],
    position['latitude'],
    position['longitude'])
minimum_index = distances.idxmin()
print(f"The closest satellite is)

110

In [122]:
position_data.dtypes

time             object
latitude        float64
longitude         int64
satellite_id     object
dtype: object