In [None]:
# default_exp hit_test
%load_ext autoreload
%autoreload 2

## HitTest
Test a more sensible approach to satellite visibility by using standard astronomy libraries to ask whether a satellite is in view of a ship at a given time.  

In [None]:
# export hit_test
# Supposed to be "hide" but that keeps the module from having imports.

from datetime import datetime
from dateutil import tz
from skyfield.api import EarthSatellite
from skyfield.api import Topos, load
import math
import pandas as pd
import plotly.express as px
import numpy as np

In [None]:
#export
DAY_FILE_PATH="data/VAULT_Data/TLE_daily"  
#DAY_FILE_PATH="/share/nas2/data/vault/TLE_daily"


In [None]:
#export
COLUMNS = ["satellite", "day_dt", "day", "tle_dt", "tle_ts", "line1", "line2"]
# DTYPES = [str, str, int, str, int, str, str]
DTYPES = {'satellite': 'uint16', # observed values are ints in 5..41678, so 0..65535 is good
          'day_dt': 'str',       # here a single date, but generally datetime: PARSE
          'day': 'uint16',       # here a single value 6026, too big for uint8, but 16 is good
          'tle_dt': 'str',       # again, PARSE AS DATETIME
          'tle_ts': 'uint32',    # large ints, but < 4294967295. We could compress more, but... meh
          'line1': 'string',     # 12K unique 80-char TLE strings. Category wd give tiny compression.
          'line2': 'string'}     # In theory "string" is better than "object". Not seeing it here. 

dates = ['day_dt', 'tle_dt']


### Read test

Create a function to load a single day's file and return parsed datatypes. Then test it on a single day.

In [None]:
#export
def load_day_file(dt, folder=DAY_FILE_PATH):
    df_path = "%s/%4d/%02d/%02d.tab.gz"%(folder, dt.year, dt.month, dt.day)
    print(df_path)
    df = pd.read_csv(df_path,
                     names=COLUMNS, sep='\t', compression='gzip',
                     dtype=DTYPES,
                     parse_dates=dates,
                     infer_datetime_format=True) 
    return df


In [None]:
#export
df = load_day_file(datetime(2016, 6, 30))
df.count()
df.head()


data/VAULT_Data/TLE_daily/2016/06/30.tab.gz


Unnamed: 0,satellite,day_dt,day,tle_dt,tle_ts,line1,line2
0,1000,2016-06-30,6026,2016-06-27 11:15:21,1467040521,1 01000U 65008B 16179.46899882 .00000021 0...,2 01000 32.1467 333.7511 0009366 165.3909 194...
1,1000,2016-06-30,6026,2016-06-27 11:15:21,1467040521,1 01000U 65008B 16179.46899882 .00000021 0...,2 01000 32.1467 333.7511 0009366 165.3909 194...
2,1000,2016-06-30,6026,2016-06-27 11:15:21,1467040521,1 01000U 65008B 16179.46899882 .00000021 0...,2 01000 32.1467 333.7511 0009366 165.3909 194...
3,10000,2016-06-30,6026,2016-06-30 10:49:53,1467298193,1 10000U 77034A 16182.45131225 -.00000171 0...,2 10000 15.5820 331.7785 0019081 259.0540 28...
4,10002,2016-06-30,6026,2016-06-28 23:10:32,1467169832,1 10002U 77034C 16180.96565494 -.00000126 0...,2 10002 16.1681 333.0471 0296361 5.9346 0...


### Memory check
Inspect the resulting dtypes and memory usage.  

The parsing was successful. As expected, `line1` and `line2` are large. Using `category` doesn't save much because so many rows are unique.  The `datetime` categories are surprisingly large.

In [None]:
#export
pd.DataFrame([df.dtypes, df.memory_usage(index=False, deep=True)], index=['Dtype', 'Mem']).T

Unnamed: 0,Dtype,Mem
satellite,uint16,30358
day_dt,datetime64[ns],121432
day,uint16,30358
tle_dt,datetime64[ns],121432
tle_ts,uint32,60716
line1,string,1912554
line2,string,1912554


In [None]:
#export
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15179 entries, 0 to 15178
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   satellite  15179 non-null  uint16        
 1   day_dt     15179 non-null  datetime64[ns]
 2   day        15179 non-null  uint16        
 3   tle_dt     15179 non-null  datetime64[ns]
 4   tle_ts     15179 non-null  uint32        
 5   line1      15179 non-null  string        
 6   line2      15179 non-null  string        
dtypes: datetime64[ns](2), string(2), uint16(2), uint32(1)
memory usage: 593.1 KB


In [None]:
#export
print(df.iloc[3]["line1"])
print(df.iloc[3]["line2"])


1 10000U 77034A   16182.45131225 -.00000171  00000-0  00000+0 0  1275
2 10000  15.5820 331.7785 0019081 259.0540  28.2803  0.96674507130362


### Skyfield

First, test the basic operation works as expected.

In [None]:
#export
def test_skyfield():
    lat =  45.0
    lon = -176.0
    earth_position = Topos(lat, lon)
    
    ts = load.timescale()
    t = ts.utc(datetime(2016, 6, 30).replace(tzinfo=tz.tzutc()))
    
    line1="1 10000U 77034A   16182.45131225 -.00000171  00000-0  00000+0 0  1275"
    line2="2 10000  15.5820 331.7785 0019081 259.0540  28.2803  0.96674507130362"
    satellite = EarthSatellite(line1, line2, '77034', ts)
    
    difference = satellite - earth_position
    
    topocentric = difference.at(t)
    alt, az, distance = topocentric.altaz()
    
    print(f'{alt.degrees:.1f}º, {az.degrees:.1f}º, {distance.km:.1f}km')
#
test_skyfield()

51.6º, 179.2º, 38068.6km


In [None]:
# https://rhodesmill.org/skyfield/earth-satellites.html

In [None]:
assert (datetime(1971, 6, 1) - datetime(1970, 6, 1)).days == 365

In [None]:
#export 
pd.DataFrame([51.5, 189, 2.3], index=['alt','az','days'])

Unnamed: 0,0
alt,51.5
az,189.0
days,2.3


### Can they see me?

Given at Lat/Lon/Time, what satellites can see me? 

We pre-partition the TLE data by Year/Month/Day, so we can quickly load only _today's_ TLE data, and check whether Lat/Lon can see it. 

#### First step: get Alt/Az/dt for each row.
Here we `apply` the `Skyfield.EarthSatellite` function to all TLE rows in the dataframe for today.

**Benchmark**: this takes about 6s on a laptop. @TODO: speed this up by 10x. 



** Minor glitch: cannot use faster `raw=True`**

In theory `apply(..., raw=True)` should be faster than default `apply`.  However, it's not working due to:
> AssertionError: Number of manager items must equal union of block items
>                # manager items: 7, # tot_items: 

* Possible solution: `https://www.nuomiphp.com/eplan/en/254300.html`
* On the other hand, it's an open pandas ticket: `https://github.com/pandas-dev/pandas/issues/34822`

So I `try` the default way first. But the `except` won't work until we track down the block manager issue.

In [None]:
#export
def satellite_alt_az_days(dt: datetime, lat: float, lon: float):
    '''Load tracks for day {dt} and return altitiude, azimuth, and timesep from {dt} for each row.
    
    '''
    earth_position = Topos(lat, lon)

    ts = load.timescale()
    t = ts.utc(dt.replace(tzinfo=tz.tzutc()))

    def eval_tle(row):
        '''Extract satellite info from line1/line2/tle_dt.

        Returns alt, az, and (days between dt and each row).
        
        TODO: Currently only works for `apply(raw=False)`.
        
        '''
        try:
            satellite = EarthSatellite(row['line1'], row['line2'], 'x', ts)
            delta_days = abs(dt - row['tle_dt'])
        except IndexError:
            # `apply(raw=True)` sends arrays instead of Series
            satellite = EarthSatellite(row[5], row[6], 'x', ts) 
            delta_days = abs(dt - row[3])
        topocentric = (satellite - earth_position).at(t)
        alt, az, distance = topocentric.altaz()
        return pd.Series([alt.degrees, az.degrees, delta_days])  
    
    df = load_day_file(dt)
    df_alt_az_days = pd.DataFrame(df.apply(eval_tle, axis=1, raw=False))
    df_alt_az_days.columns = ["altitude", "azimuth", "days"]
    #df_alt_az_days.reindex()
    return df_alt_az_days
#

df_alt_az_days = satellite_alt_az_days(datetime(2016, 6, 30), 45.0, -176.0)

data/VAULT_Data/TLE_daily/2016/06/30.tab.gz


In [None]:
#export
df_alt_az_days.count()

altitude    15179
azimuth     15179
days        15179
dtype: int64

In [None]:
#export
df_alt_az_days.head()

Unnamed: 0,altitude,azimuth,days
0,-42.413819,297.656662,2 days 12:44:39
1,-42.413819,297.656662,2 days 12:44:39
2,-42.413819,297.656662,2 days 12:44:39
3,51.6463,179.205373,0 days 10:49:53
4,-32.583116,299.09601,1 days 00:49:28


#### Second step: Calculate the hit quality

First approximation: 
* It's a **hit** if the alt > 0 (above the horizon).
* Smaller time difference -> better quality.

**TODO:** Kevin, did I capture that logic correctly? I'm confused how a 2-day lag can be "excellent".  These aren't _days_ are they?

In [None]:
#export
def hit_quality(df_alt_az_days):
    """Return hit/miss and quality as time proximity.
    
    Parameters
    ----------
    `df_alt_az_days`: Dataframe returned by `satellite_alt_az_days`.

    Returns
    --------
    Dataframe with columns ["hit", "miss"]. Each row will have exactly one filled, with
    a string denoting how recent the pass was, e.g. "excellent", "good", "poor", "stale". 
    
    """
    
    def eval_quality(row):
        """Inner function to be `apply`d to a dataframe."""
        ser = None
        days = row[2].days
        altitude = row[0]
        if days <= 2.0:
            if altitude > 0.0:
                vals = ["excellent", math.nan]
            else:
                vals = [math.nan, "excellent"]
        elif days <= 14.0:
            if altitude > 0.0:
                vals = ["good", math.nan]
            else:
                vals = [math.nan, "good"]
        elif days <= 56.0:
            if altitude > 0.0:
                vals = ["poor", math.nan]
            else:
                vals = [math.nan, "poor"]
        else:
            vals = [math.nan, "stale"]
        
        return pd.Series(vals)
    
    df_hit_quality = pd.DataFrame(df_alt_az_days.apply(eval_quality, axis=1))
    df_hit_quality.columns = ["hit", "miss"]
    return df_hit_quality
#

In [None]:
#export
df_hit_quality = hit_quality(df_alt_az_days)

In [None]:
#export
df_hit_quality["hit"].value_counts()

excellent    1713
good          165
poor           23
Name: hit, dtype: int64

In [None]:
#export
df_hit_quality["miss"].value_counts()

excellent    11600
good          1001
stale          469
poor           208
Name: miss, dtype: int64

In [None]:
#export
pd.concat([df_hit_quality["hit"].value_counts(), df_hit_quality["miss"].value_counts()], axis=1, sort=False)

Unnamed: 0,hit,miss
excellent,1713.0,11600
good,165.0,1001
poor,23.0,208
stale,,469


In [None]:
#export
df_alt_az_days_visible = df_alt_az_days[df_alt_az_days["altitude"]>0].copy()

In [None]:
#export
df_alt_az_days_visible.count()

altitude    1901
azimuth     1901
days        1901
dtype: int64

In [None]:
#export
df_alt_az_days_visible.head(5)

Unnamed: 0,altitude,azimuth,days
3,51.6463,179.205373,0 days 10:49:53
17,48.571486,219.021839,0 days 17:13:44
21,35.295619,324.390831,0 days 12:34:30
22,67.176418,358.140393,1 days 15:57:40
23,67.176418,358.140393,1 days 15:57:40


## Vizualize the results

Generate a polar alt/az plot of the qualifying satellites
* Excellent = blue
* Good = red
* Else = yellow


**TODO:** Is "0" here 0 altitude? That would be on the horizon, which is counter-intuitive. 

Note the band of satellites at southern bearings -- this ship was in the Northern hemisphere. 

In [None]:
#export
df_alt_az_days_visible["color"] = 2
df_alt_az_days_visible.loc[(df_alt_az_days_visible["days"].dt.days <= 14.0), "color"] = 1
df_alt_az_days_visible.loc[(df_alt_az_days_visible["days"].dt.days <= 2.0), "color"] = 0
df_alt_az_days_visible["R"] = 90.0 - df_alt_az_days_visible["altitude"]
#fig = px.scatter_polar(df_alt_az_days_visible, r="R", theta="azimuth", color_discrete_sequence=['black'])
fig = px.scatter_polar(df_alt_az_days_visible, r="R", theta="azimuth", color="color")
fig.show()

