# Challenge
Here the idea is that given a certain time and position, choose the closest satellite.

In [1]:
# Dependencies
from datetime import datetime
import json
import pandas as pd
import numpy as np

from influxdb_client import InfluxDBClient

In [2]:
url = "http://influxdb:8086"
token = "my-admin-token"
org = "my-org"
bucket = "my-bucket"

client = InfluxDBClient(url=url, token=token, org=org)

# Retrieve objective satellite ids
starklink_datafile = 'starlink_historical_data.json'
with open(starklink_datafile, 'r') as data_file:
    json_data = data_file.read()
starklink_data = json.loads(json_data)
satellite_ids = {data['id'] for data in starklink_data}

Now, let us consider this time

In [3]:
datime_time = '2021-01-01T13:30:00Z'

Now we can get the closest satellite positions on time

In [4]:
position_data = [['satellite_id', 'latitude', 'longitude']]

for satellite_id in satellite_ids:
    query = f"""
        from(bucket: "my-bucket")
          |> range(start: {datime_time})
          |> filter(fn: (r) => r["_measurement"] == "sat_pos_{satellite_id}")
          |> first()
        """
    query_api = client.query_api()
    df_result = query_api.query_data_frame(query)
    
    if not df_result.empty:
        lat_mask = df_result._field == 'lat'
        lat = df_result.loc[lat_mask]._value.values[0]
        lon_mask = df_result._field == 'lon'
        lon = df_result.loc[lon_mask]._value.values[0]
        
        position_data.append([satellite_id, lat, lon])
    
position_data = pd.DataFrame(
    position_data[1:], 
    columns = position_data[0])

display(position_data)

Unnamed: 0,satellite_id,latitude,longitude
0,5eed7715096e590006985759,33.038027,5.0
1,5eed7714096e5900069856d1,-47.799738,21.0
2,5f5a9c1e2fd30c00065e5e8e,39.720494,177.0
3,5eed7714096e590006985658,-21.625599,102.0
4,5eed7716096e59000698580f,-15.072719,53.0
...,...,...,...
946,5fc7ec8ce4130000069e2c6c,31.265767,120.0
947,60106f1e0c72a20006004c0b,-0.433577,18.0
948,5fa31092c4fa370006feed0c,8.962893,89.0
949,5eed7716096e5900069857b7,17.192366,108.0


The previous DataFrame contains the latitud longitud position of the time nearest points. To the **datime_time** variable. Now the next definition allows us to compute the latitud-longitude distance and it's minimum

In [5]:
def haversine_vectorize(lon1, lat1, lon2, lat2):

    lon1, lat1, lon2, lat2 = map(np.radians, [lon1, lat1, lon2, lat2])

    newlon = lon2 - lon1
    newlat = lat2 - lat1

    haver_formula = np.sin(newlat/2.0)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(newlon/2.0)**2

    dist = 2 * np.arcsin(np.sqrt(haver_formula ))
    km = 6367 * dist #6367 for distance in KM for miles use 3958
    return km

An now let's consider this latitud longitude position

In [6]:
position = {
    'latitude': 1.77,
    'longitude': 37.13}

So now, we can compute the closest

In [7]:
distances = haversine_vectorize(
    position_data['latitude'],
    position_data['longitude'],
    position['latitude'],
    position['longitude'])
minimum_index = distances.idxmin()
print(f"The closest satellite is {position_data.loc[minimum_index, 'satellite_id']}")
print(f"It's position is {position_data.loc[minimum_index, :]}")

The closest satellite is 5f889669c86e27000615b235
It's position is satellite_id    5f889669c86e27000615b235
latitude                        1.909076
longitude                           39.0
Name: 567, dtype: object
