In [2]:
import spacetrack.operators as op
from spacetrack import SpaceTrackClient
import pickle
import pandas as pd
import requests, base64
import numpy as np
import os

In [3]:
### Read Username and password from a text file
### txt file format is a single line with username and password sparated by a comma
##
#EXAMPLE####################################################
# username, password
############################################################

with open(r'C:\Users\dk412\Desktop\spacetrackcreds.txt', 'r') as f:
    content = f.read()
st_un = content.split(",")[0].strip()
st_pw = content.split(",")[1].strip()
udl_un = content.split(",")[2].strip()
udl_pw =content.split(",")[3].strip()

output_dir = r'C:\Users\dk412\Desktop\David\Python Projects\RusSat\output'


In [4]:
udl_un, udl_pw, st_un, st_pw

('david.kurtenbach2',
 '$up3rDup3r!98600',
 'dk4120@gmail.com',
 'Sup3rDup3r!98600')

List of all Russian NORAD IDs

In [5]:
st = SpaceTrackClient(identity=f'{st_un}', password= f'{st_pw}')
#st = SpaceTrackClient(identity=unam, password=pword)

## Test Query 
data = st.tle_latest(iter_lines=True, ordinal=1, epoch='>now-30',
                     mean_motion=op.inclusive_range(0.99, 1.01),
                     eccentricity=op.less_than(0.01), format='tle')

with open(os.path.join(output_dir,'tle_latest.txt'), 'w') as fp:
    for line in data:
        fp.write(line + '\n')

In [6]:
sat_cat = st.satcat(country='CIS', current='Y')
sat_cat

[{'INTLDES': '1957-001A',
  'NORAD_CAT_ID': '1',
  'OBJECT_TYPE': 'ROCKET BODY',
  'SATNAME': 'SL-1 R/B',
  'COUNTRY': 'CIS',
  'LAUNCH': '1957-10-04',
  'SITE': 'TTMTR',
  'DECAY': '1957-12-01',
  'PERIOD': '96.19',
  'INCLINATION': '65.10',
  'APOGEE': '938',
  'PERIGEE': '214',
  'COMMENT': None,
  'COMMENTCODE': '4',
  'RCSVALUE': '0',
  'RCS_SIZE': 'LARGE',
  'FILE': '1',
  'LAUNCH_YEAR': '1957',
  'LAUNCH_NUM': '1',
  'LAUNCH_PIECE': 'A',
  'CURRENT': 'Y',
  'OBJECT_NAME': 'SL-1 R/B',
  'OBJECT_ID': '1957-001A',
  'OBJECT_NUMBER': '1'},
 {'INTLDES': '1958-004A',
  'NORAD_CAT_ID': '7',
  'OBJECT_TYPE': 'ROCKET BODY',
  'SATNAME': 'SL-1 R/B',
  'COUNTRY': 'CIS',
  'LAUNCH': '1958-05-15',
  'SITE': 'TTMTR',
  'DECAY': '1958-12-03',
  'PERIOD': '102.74',
  'INCLINATION': '65.14',
  'APOGEE': '1571',
  'PERIGEE': '206',
  'COMMENT': None,
  'COMMENTCODE': None,
  'RCSVALUE': '0',
  'RCS_SIZE': None,
  'FILE': '1',
  'LAUNCH_YEAR': '1958',
  'LAUNCH_NUM': '4',
  'LAUNCH_PIECE': 'A',
  

In [8]:
from datetime import datetime

def filter_satellites_by_date(satellites, cutoff_date='2002-08-21'):
    cutoff = datetime.strptime(cutoff_date, '%Y-%m-%d')
    
    filtered_satellites = [
        sat for sat in satellites 
        if sat.get('LAUNCH') and datetime.strptime(sat['LAUNCH'], '%Y-%m-%d') > cutoff
    ]
    
    return filtered_satellites

filtered_sat_cat = filter_satellites_by_date(sat_cat)

In [9]:
len(filtered_sat_cat)

3117

In [10]:
cis_norad_ids = []

for e in filtered_sat_cat:
    norad_num = int(e.get('NORAD_CAT_ID'))
    cis_norad_ids.append(norad_num)

cis_norad_ids.sort()

Pull TLEs Spacetrack

In [10]:
single_query = st.tle(norad_cat_id= 48551, orderby='epoch', limit=None, format='tle').split('\n')

In [11]:
type(single_query), len(single_query)

(list, 4581)

In [12]:
test_lst = single_query[:20]
test_lst

['1 48551U 18084DF  21132.21928323  .00000181  00000-0  48110-4 0  9991',
 '2 48551  98.1899  20.3989 0034350  83.9558 276.5531 14.59012861 10033',
 '1 48551U 18084DF  21134.20810663  .00000215  00000-0  55495-4 0  9991',
 '2 48551  98.1892  22.3598 0034531  78.2300 282.2770 14.59014108 10324',
 '1 48551U 18084DF  21134.61958681  .00000207  00000-0  53649-4 0  9999',
 '2 48551  98.1891  22.7655 0034522  76.9934 283.5117 14.59014263 10386',
 '1 48551U 18084DF  21134.89390684  .00000206  00000-0  53588-4 0  9993',
 '2 48551  98.1890  23.0360 0034519  76.1756 284.3281 14.59014414 10426',
 '1 48551U 18084DF  21135.16822679  .00000225  00000-0  57547-4 0  9998',
 '2 48551  98.1890  23.3065 0034519  75.3572 285.1450 14.59014697 10462',
 '1 48551U 18084DF  21136.47124623  .00000198  00000-0  51706-4 0  9997',
 '2 48551  98.1888  24.5913 0034497  71.4543 289.0399 14.59015258 10654',
 '1 48551U 18084DF  21136.67698604  .00000200  00000-0  52302-4 0  9991',
 '2 48551  98.1888  24.7942 0034491  7

Parse Spacetrack to DF

In [13]:
def parse_line1(line1):
    return {
        'satellite_num': line1[2:7].strip(),
        'intl_des': line1[9:17].strip(),
        'epoch_year': line1[18:20].strip(),
        'epoch_day': line1[20:32].strip(),
        'first_derivative_mean_motion': line1[33:43].strip(),
        'second_derivative_mean_motion': line1[44:52].strip(),
        'bstar_drag': line1[53:61].strip(),
        'ephemeris_type': line1[62].strip(),
        'element_set_number': line1[64:68].strip(),
        'checksum_1': line1[68].strip()
    }

def parse_line2(line2):
    return {
        'satellite_num': line2[2:7].strip(),
        'inclination': line2[8:16].strip(),  
        'right_ascension': line2[17:25].strip(),  
        'eccentricity': line2[26:33].strip(),  
        'argument_of_perigee': line2[34:42].strip(), 
        'mean_anomaly': line2[43:51].strip(),  
        'mean_motion': line2[52:63].strip(),  
        'revolution_number': line2[63:68].strip(),  
        'checksum_2': line2[68].strip()
    }

tle_pairs = [(single_query[i], single_query[i+1]) for i in range(0, len(single_query)-1, 2)]

tle_data = []
for line1, line2 in tle_pairs:
    tle_dict = {}
    tle_dict.update(parse_line1(line1))
    tle_dict.update(parse_line2(line2))
    tle_data.append(tle_dict)

df = pd.DataFrame(tle_data)
df.head()

Unnamed: 0,satellite_num,intl_des,epoch_year,epoch_day,first_derivative_mean_motion,second_derivative_mean_motion,bstar_drag,ephemeris_type,element_set_number,checksum_1,inclination,right_ascension,eccentricity,argument_of_perigee,mean_anomaly,mean_motion,revolution_number,checksum_2
0,48551,18084DF,21,132.21928323,1.81e-06,00000-0,48110-4,0,999,1,98.1899,20.3989,34350,83.9558,276.5531,14.59012861,1003,3
1,48551,18084DF,21,134.20810663,2.15e-06,00000-0,55495-4,0,999,1,98.1892,22.3598,34531,78.23,282.277,14.59014108,1032,4
2,48551,18084DF,21,134.61958681,2.07e-06,00000-0,53649-4,0,999,9,98.1891,22.7655,34522,76.9934,283.5117,14.59014263,1038,6
3,48551,18084DF,21,134.89390684,2.06e-06,00000-0,53588-4,0,999,3,98.189,23.036,34519,76.1756,284.3281,14.59014414,1042,6
4,48551,18084DF,21,135.16822679,2.25e-06,00000-0,57547-4,0,999,8,98.189,23.3065,34519,75.3572,285.145,14.59014697,1046,2


In [14]:
df.shape

(2290, 18)

Savet to Parquet

In [None]:
df.to_parquet(os.path.join(output_dir,'test_tle_data.parquet'), index=False)

In [None]:
df_loaded = pd.read_parquet(os.path.join(output_dir,'test_tle_data.parquet'))
df_loaded.head()

UDL Testing

In [162]:
n = len(cis_norad_ids)
split_size = n // 10  
split_lists = [cis_norad_ids[i:i + split_size] for i in range(0, n, split_size)]

# Print results
for i, sublist in enumerate(split_lists):
    print(f"Split {i+1}: {sublist}")

Split 1: [1, 2, 3, 7, 8, 21, 34, 35, 36, 37, 38, 39, 40, 41, 42, 55, 56, 65, 66, 71, 72, 73, 76, 77, 78, 80, 91, 92, 93, 94, 95, 96, 97, 103, 104, 112, 114, 168, 169, 266, 267, 269, 270, 281, 283, 284, 287, 289, 297, 298, 338, 339, 346, 347, 348, 349, 363, 364, 365, 366, 367, 368, 371, 372, 373, 376, 379, 380, 381, 382, 383, 384, 386, 387, 389, 390, 391, 392, 393, 394, 395, 422, 423, 425, 428, 429, 430, 431, 435, 437, 438, 441, 442, 443, 448, 449, 450, 451, 452, 454, 456, 457, 458, 459, 460, 482, 483, 484, 485, 486, 487, 488, 489, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 512, 517, 518, 521, 522, 524, 554, 555, 563, 566, 567, 568, 569, 570, 571, 572, 580, 581, 582, 583, 584, 585, 586, 587, 588, 591, 592, 595, 596, 632, 633, 673, 676, 683, 684, 685, 686, 687, 688, 689, 690, 707, 708, 709, 710, 712, 713, 746, 748, 750, 751, 757, 758, 762, 763, 766, 767, 768, 769, 770, 772, 773, 774, 776, 777, 778, 779, 780, 781, 783, 784, 785, 791, 792, 793, 794, 795, 797, 798, 803, 804, 807

In [11]:
len(cis_norad_ids)

3117

In [None]:
shrt_query = '51511,41854'
query = ",".join(map(str,cis_norad_ids)) 
print(len(query))
query

18701


'27504,27531,27532,27534,27535,27541,27542,27543,27544,27547,27548,27549,27552,27553,27558,27560,27610,27611,27613,27614,27615,27616,27617,27618,27619,27620,27621,27622,27633,27646,27681,27682,27707,27708,27709,27710,27768,27769,27774,27775,27776,27777,27778,27779,27780,27781,27782,27817,27818,27819,27821,27823,27824,27834,27835,27836,27837,27840,27856,27857,27868,27869,27870,27873,27874,27939,27944,28052,28053,28061,28084,28085,28086,28087,28088,28089,28090,28091,28092,28093,28094,28098,28099,28100,28101,28102,28103,28104,28105,28106,28107,28108,28109,28110,28111,28112,28113,28114,28115,28116,28119,28120,28121,28122,28123,28124,28125,28126,28127,28128,28133,28135,28136,28139,28142,28143,28163,28164,28165,28166,28167,28168,28186,28188,28194,28195,28196,28197,28198,28199,28228,28229,28234,28235,28236,28240,28241,28242,28256,28261,28262,28350,28351,28352,28353,28354,28355,28356,28357,28359,28360,28367,28374,28380,28381,28394,28395,28396,28397,28399,28400,28419,28420,28421,28444,28445,284

In [None]:
basicAuth = "Basic " + base64.b64encode((f"{udl_un}:{udl_pw}").encode('utf-8')).decode("ascii")


bulk_url = f"https://unifieddatalibrary.com/udl/elset/history/aodr?epoch=%3E2016-08-21T00:00:00.000000Z&satNo={query}&outputFormat=JSON"
#his_url = f"https://unifieddatalibrary.com/udl/elset/history?epoch=2016-08-21T00:00:00.000000Z..2021-08-21T00:00:00.000000Z&satNo={query}"
#test_url = f"https://unifieddatalibrary.com/udl/elset/history?epoch=%3E2016-08-21T00:00:00.000000Z&satNo={query}"

result = requests.get(bulk_url, headers={'Authorization':basicAuth}, verify=False)

print(f"Status code: {result.status_code}")
print(f"Response content: {result.text[:200]}")  # Print first 200 chars to see what we got

if result.status_code == 200 and result.text:
    try:
        tle_df = pd.DataFrame(result.json())
        print("DataFrame created successfully")
    except json.JSONDecodeError as e:
        print(f"Error decoding JSON: {e}")
else:
    print(f"Request failed or empty response")



Status code: 429
Response content: User already has a Bulk Data job for this collection, please try again after those job(s) have completed.
Request failed or empty response


In [199]:
tle_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 27 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   argOfPerigee           10000 non-null  float64
 1   apogee                 10000 non-null  float64
 2   idOnOrbit              10000 non-null  object 
 3   meanMotionDDot         10000 non-null  float64
 4   epoch                  10000 non-null  object 
 5   source                 10000 non-null  object 
 6   origNetwork            10000 non-null  object 
 7   perigee                10000 non-null  float64
 8   semiMajorAxis          10000 non-null  float64
 9   createdAt              10000 non-null  object 
 10  idElset                10000 non-null  object 
 11  bStar                  10000 non-null  float64
 12  classificationMarking  10000 non-null  object 
 13  algorithm              10000 non-null  object 
 14  inclination            10000 non-null  float64
 15  per

In [200]:
tle_df['epoch'].max(),tle_df['epoch'].min()

('2022-01-02T06:42:02.808288Z', '2021-11-06T13:56:39.460128Z')

In [None]:
tle_df['epoch'].max(),tle_df['epoch'].min()

('2024-07-31T22:10:46.122816Z', '2022-05-01T08:02:11.025312Z')

In [None]:
basicAuth = "Basic " + base64.b64encode((f"{udl_un}:{udl_pw}").encode('utf-8')).decode("ascii")

tle_df = pd.DataFrame()

for e in cis_norad_ids:    
    test_url = f"https://unifieddatalibrary.com/udl/elset/history?epoch=%3E2016-08-21T00:00:00.000000Z&satNo={e}"

    #url = f"https://unifieddatalibrary.com/udl/elset/history/aodr?epoch=2016-08-21T00%3A00%3A00.000000Z&satNo={e}"
    #url = f"https://unifieddatalibrary.com/udl/elset/history/aodr?epoch=2016-08-21T00:00:00.000000Z..2021-08-21T00:00:00.000000Z&satNo=12747,13065,13074,13379,13800,13992,14176,14264,14759,15335&outputFormat=JSON"
    result = requests.get(test_url, headers={'Authorization':basicAuth}, verify=False)
    json_data = result.json()
    if json_data:
        if not isinstance(json_data,list):
            json_data= [json_data]
        df = pd.DataFrame(json_data)
        df['satNo'] = e
        print(f"Sat Num: {e} with shape {df.shape}")
        tle_df = pd.concat([tle_df, df],ignore_index = True)
    else:
        print(f'{e} has nothing')




Sat Num: 27504 with shape (1123, 28)




27531 has nothing




27532 has nothing




Sat Num: 27534 with shape (2275, 28)




Sat Num: 27535 with shape (2032, 28)




27541 has nothing




27542 has nothing




27543 has nothing




27544 has nothing




In [None]:
tle_df.to_parquet(os.path.join(output_dir,'test_tle_data.parquet'), index=False)

In [None]:
tle_df['satNo'].unique()

In [None]:
tle_df['epoch'].sort_values(ascending=True)

In [None]:
################################################################
#           PROD CODE - UDL
################################################################

import spacetrack.operators as op
from spacetrack import SpaceTrackClient
import pickle
import pandas as pd
import requests, base64
import numpy as np
import os
from datetime import datetime


def udl_tle_to_par(cntry_nm, output_dir, st_un, st_pw, udl_un, udl_pw):
#######################################################################
# Inputs: cntry_nm = 3 letter conuntry code for TLEs you want to pull, example Russia = 'CIS'
#         output_dir = directory to save final pickle and paraquet to
#         st_un, st_pw = SpaceTrack Username (st_un) & password (st_pw)
#         udl_un, udl_pw = UDL Username (udl_un) & password (udl_pw)
# Outputs: {cntry_nm}_satcat.pkl = Satellite Catalog data from Space track
#          {cntry_nm}_tle_data.parquet = Paraquet file containing dataframe of all TLEs
# Notes: Code is set to only query TLEs from February 2012 to current. Date chosen for 10 year look prior to Ukrain invasion
#        To change date modidfy the epoch in url line (see UDL documentation)
#######################################################################

    st = SpaceTrackClient(identity= f"{st_un}", password= f"{st_pw}")
    sat_cat = st.satcat(country=f'{cntry_nm}', current='Y')

    os.makedirs(output_dir, exist_ok=True)

    print("************* Generating Norad ID list ********************")
    with open(os.path.join(output_dir,f'{cntry_nm}_satcat.pkl'), 'wb') as file:
        pickle.dump(sat_cat, file)

    ####################################################################################################### 
    # FILTERING BY DATE
    #######################################################################################################
    def filter_by_launch(satellites, cutoff_date='2002-08-21'):
        cutoff = datetime.strptime(cutoff_date, '%Y-%m-%d')
        
        filtered_satellites = [
            sat for sat in satellites 
            if sat.get('LAUNCH') and datetime.strptime(sat['LAUNCH'], '%Y-%m-%d') > cutoff
        ]
        
        return filtered_satellites

    filtered_sat_cat = filter_by_launch(sat_cat)
    #######################################################################################################
    
    norad_ids = []

    for e in filtered_sat_cat:
        norad_num = int(e.get('NORAD_CAT_ID'))
        norad_ids.append(norad_num)

    norad_ids.sort()

    samp_lst = norad_ids[-10:] ########################### TESTING

    basicAuth = "Basic " + base64.b64encode((f"{udl_un}:{udl_pw}").encode('utf-8')).decode("ascii")

    tle_df = pd.DataFrame()

    for e in norad_ids:    
        test_url = f"https://unifieddatalibrary.com/udl/elset/history?epoch=%3E2016-08-21T00:00:00.000000Z&satNo={e}"
        #url = f"https://unifieddatalibrary.com/udl/elset/history/aodr?epoch=2016-08-21T00%3A00%3A00.000000Z&satNo={e}"
        #url = f"https://unifieddatalibrary.com/udl/elset/history/aodr?epoch=2016-08-21T00:00:00.000000Z..2021-08-21T00:00:00.000000Z&satNo=12747,13065,13074,13379,13800,13992,14176,14264,14759,15335&outputFormat=JSON"
        
        result = requests.get(test_url, headers={'Authorization':basicAuth}, verify=False)
        json_data = result.json()
        if json_data:
            if not isinstance(json_data,list):
                json_data= [json_data]
            df = pd.DataFrame(json_data)
            df['satNo'] = e
            print(f"Sat Num: {e} with shape {df.shape}")
            tle_df = pd.concat([tle_df, df],ignore_index = True)
        else:
            print(f'{e} has nothing')

    tle_df.to_parquet(os.path.join(output_dir,f'{cntry_nm}_tle_data.parquet'), index=False)

#with open(r'C:\Users\dk412\Desktop\spacetrackcreds.txt', 'r') as f:
with open('/homes/dkurtenb/projects/russat/spacetrackcreds.txt','r') as f:
    content = f.read()
st_un = content.split(",")[0].strip()
st_pw = content.split(",")[1].strip()
udl_un = content.split(",")[2].strip()
udl_pw =content.split(",")[3].strip()

#output_dir = r'C:\Users\dk412\Desktop\David\Python Projects\RusSat\output'
output_dir = '/homes/dkurtenb/projects/russat/output'
udl_tle_to_par('CIS', output_dir, st_un, st_pw, udl_un, udl_pw)

In [None]:
with open(r'C:\Users\dk412\Desktop\David\Python Projects\RusSat\output\CIS_satcat.pkl', 'rb') as file:
    data = pickle.load(file)
len(data)

In [None]:
data

Test HPC Output Parquet

In [None]:
df_loaded.to_parquet(r'C:\Users\dk412\Desktop\David\Python Projects\RusSat\dataout_HPC\COMPRESSED_CIS_tle_data.parquet', compression='gzip')
df_loaded.shape

In [None]:
df_loaded.info()

In [None]:
df_loaded['satNo'].unique()