In [1]:
import requests
from requests.exceptions import ReadTimeout, RequestException
from typing import Any, Dict, Optional
import logging
from datetime import datetime, timedelta
import pandas as pd
import glob
import os
from tqdm import tqdm
import pickle
import time
import spacetrack.operators as op
from spacetrack import SpaceTrackClient


In [2]:
def load_all_jsons(directory_path):

    json_files = glob.glob(os.path.join(directory_path, '*.json'))
    
    dfs = []
    
    for file in tqdm(json_files, desc="Loading JSON files"):
        try:
            df = pd.read_json(file)
            df['source_file'] = os.path.basename(file)
            dfs.append(df)
        except Exception as e:
            print(f"Error loading {file}: {e}")
            continue
    
    if dfs:
        combined_df = pd.concat(dfs, ignore_index=True)
        print(f"\nLoaded {len(json_files)} files into DataFrame with shape: {combined_df.shape}")
        return combined_df
    else:
        raise ValueError("No JSON files were successfully loaded")

In [3]:
#df['datetime'] = pd.to_datetime(df['epoch'])
#df['year'] = df['datetime'].dt.year
#df['month'] = df['datetime'].dt.month

#df.to_parquet(r"C:\Users\dk412\Desktop\David\Python Projects\RusSat\dataout_HPC\udl_CIS_data.parquet",engine='pyarrow', compression = 'gzip', index =True)

In [4]:
"""
monthly_counts = df.groupby(['year', 'month']).size().unstack(fill_value=0)

# Rename columns to month names for clarity
month_names = {
    1: 'January', 2: 'February', 3: 'March', 4: 'April',
    5: 'May', 6: 'June', 7: 'July', 8: 'August',
    9: 'September', 10: 'October', 11: 'November', 12: 'December'
}
monthly_counts = monthly_counts.rename(columns=month_names)

# Add row totals
monthly_counts['Total'] = monthly_counts.sum(axis=1)

# Add column totals
monthly_counts.loc['Total'] = monthly_counts.sum()

monthly_counts
"""

"\nmonthly_counts = df.groupby(['year', 'month']).size().unstack(fill_value=0)\n\n# Rename columns to month names for clarity\nmonth_names = {\n    1: 'January', 2: 'February', 3: 'March', 4: 'April',\n    5: 'May', 6: 'June', 7: 'July', 8: 'August',\n    9: 'September', 10: 'October', 11: 'November', 12: 'December'\n}\nmonthly_counts = monthly_counts.rename(columns=month_names)\n\n# Add row totals\nmonthly_counts['Total'] = monthly_counts.sum(axis=1)\n\n# Add column totals\nmonthly_counts.loc['Total'] = monthly_counts.sum()\n\nmonthly_counts\n"

In [5]:
parquet_file = r"D:\Russat\pull\udl_CIS_data.parquet"
df = pd.read_parquet(parquet_file)

In [6]:
df.shape

(9814048, 37)

In [7]:
filtered_df = df[(df['year'] == 2022) & ((df['month']==2) | (df['month']==3) | (df['month']==4))]
sat_lst = list(filtered_df['satNo'])
norad_id_date_filter_lst = list(set(sat_lst))
norad_id_date_filter_lst.sort()

In [8]:
with open(r'C:\Users\dk412\Desktop\David\Python Projects\RusSat\dataout_HPC\CIS_satcat.pkl', 'rb') as f:  # 'rb' means read binary mode
    data = pickle.load(f)

In [9]:
norad_list = [item['NORAD_CAT_ID'] for item in data 
             if item['OBJECT_TYPE'] in ['ROCKET BODY', 'PAYLOAD', 'UNKNOWN']]
norad_id_typefilter_lst = list(set(norad_list))
norad_id_typefilter_lst.sort()
norad_id_typefilter_lst = [int(x) for x in norad_id_typefilter_lst]

In [10]:
nord_id_typeDate_filter = list(set(norad_id_date_filter_lst) & set(norad_id_typefilter_lst))
nord_id_typeDate_filter.sort()
len(nord_id_typeDate_filter)

2562

In [None]:
"""
with open(r'C:\Users\dk412\Desktop\spacetrackcreds.txt', 'r') as f:
    content = f.read()
st_un = content.split(",")[0].strip()
st_pw = content.split(",")[1].strip()
udl_un = content.split(",")[2].strip()
udl_pw = content.split(",")[3].strip()

st = SpaceTrackClient(identity=f'{st_un}', password=f'{st_pw}')

full_lst = []
try:
    norad_ids = ','.join(str(i) for i in nord_id_typeDate_filter)
    query = st.tle(norad_cat_id=norad_ids, orderby='epoch', limit=None, format='tle')
    tles = query.split('\n')
    full_lst.extend([tles[i:i+2] for i in range(0, len(tles), 2)])
    
    time.sleep(5)
    
except Exception as e:
    print(f"Error occurred: {e}")
    time.sleep(900)
"""

In [8]:
norad_ids = nord_id_typeDate_filter
batch_size = 100
all_tles = []
total_batches = (len(nord_id_typeDate_filter) + 100 - 1) // 100

for i in range(0, len(norad_ids), batch_size):
    batch = norad_ids[i:i + batch_size]
    batch_str = ','.join(str(id) for id in batch)
    batch_num = (i // batch_size) + 1


In [None]:
def fetch_tle_batch(st_client, norad_ids, batch_size=50):
    all_tles = []
    total_batches = (len(norad_ids) + batch_size - 1) // batch_size
    
    print(f"Processing {len(norad_ids)} NORAD IDs in {total_batches} batches")
    
    for i in range(0, len(norad_ids), batch_size):
        batch = norad_ids[i:i + batch_size]
        batch_str = ','.join(str(id) for id in batch)
        batch_num = (i // batch_size) + 1
        
        print(f"Fetching batch {batch_num}/{total_batches} ({len(batch)} IDs)")
        
        retry_count = 0
        max_retries = 4
        
        while retry_count < max_retries:
            try:
                #query = st_client.tle(norad_cat_id=batch_str, orderby='epoch', limit=None, format='tle')
                query = st_client.tle(norad_cat_id=batch_str, epoch = '>2022-02-01', orderby='epoch', limit=None, format='tle')
                tles = query.split('\n')
                batch_tles = [tles[i:i+2] for i in range(0, len(tles), 2)]
                all_tles.extend(batch_tles)
                
                print(f"Successfully fetched {len(batch_tles)} TLE pairs")
                time.sleep(5)  
                break
                
            except Exception as e:
                retry_count += 1
                print(f"Error on batch {batch_num}: {str(e)}")
                if retry_count < max_retries:
                    wait_time = 900  
                    print(f"Retrying in {wait_time} seconds... (Attempt {retry_count + 1}/{max_retries})")
                    time.sleep(wait_time)
                else:
                    print(f"Failed to fetch batch {batch_num} after {max_retries} attempts")
                    
    return all_tles

In [None]:
with open(r"C:\Users\dk412\Desktop\David\Python Projects\RusSat\spacetrackcreds.txt", 'r') as f:
    content = f.read()
st_un = content.split(",")[0].strip()
st_pw = content.split(",")[1].strip()
udl_un = content.split(",")[2].strip()
udl_pw = content.split(",")[3].strip()

st = SpaceTrackClient(identity=f'{st_un}', password=f'{st_pw}')

full_lst = fetch_tle_batch(st, nord_id_typeDate_filter, batch_size=100)

In [None]:
import pandas as pd

def parse_scientific_notation(string):
    try:
        if string.strip() == '+00000-0' or string.strip() == '+00000+0':
            return 0.0
        
        mantissa = float(string[0] + '.' + string[1:6])
        exponent = int(string[6:8])
        return mantissa * (10 ** exponent)
    except:
        return 0.0

def parse_tle_to_df(tle_list):
    data = []
    
    for tle in tle_list:
        # Skip if not a proper TLE pair
        if not isinstance(tle, list) or len(tle) != 2:
            print(f"Skipping invalid TLE pair: {tle}")
            continue
            
        line1, line2 = tle
        line1_data = {
            'line1': line1,
            'line2': line2,
            # Line 1 elements
            'catalog_number': int(line1[2:7]),
            'classification': line1[7],
            'launch_year': line1[9:11],
            'launch_number': line1[11:14],
            'launch_piece': line1[14:17].strip(),
            'epoch_year': int(line1[18:20]),
            'epoch_day': float(line1[20:32]),
            'mean_motion_dot': float(line1[33:43]),
            'mean_motion_ddot': parse_scientific_notation(line1[44:52] + line1[52:54]),
            'bstar': parse_scientific_notation(line1[53:61] + line1[61:63]),
            'ephemeris_type': int(line1[63]) if line1[63].strip() else 0,
            'element_number': int(line1[64:68]) if line1[64:68].strip() else 0,
            # Line 2 elements
            'satellite_number': int(line2[2:7]),
            'inclination': float(line2[8:16]),
            'ra_of_asc_node': float(line2[17:25]),
            'eccentricity': float('0.' + line2[26:33]),
            'arg_of_perigee': float(line2[34:42]),
            'mean_anomaly': float(line2[43:51]),
            'mean_motion': float(line2[52:63]),
            'rev_at_epoch': int(line2[63:68]) if line2[63:68].strip() else 0
        }
        data.append(line1_data)

    return pd.DataFrame(data)

df = parse_tle_to_df(full_lst)

if not df.empty:
    df.to_parquet(r"C:\Users\dk412\Desktop\David\Python Projects\RusSat\dataout_HPC\spacetrack_tle_df.parquet",engine='pyarrow', compression = 'gzip', index =True)

In [None]:
####################################################################
#                     PROD CODE- Space Track TLE Fetch
####################################################################

# Imports
import requests
from requests.exceptions import ReadTimeout, RequestException
from typing import Any, Dict, Optional
import logging
from datetime import datetime, timedelta
import pandas as pd
import glob
import os
from tqdm import tqdm
import pickle
import time
import spacetrack.operators as op
from spacetrack import SpaceTrackClient

  
#DIRECTORIES
#Local
print('LOCAL')
parquet_file = r"C:\Users\dk412\Desktop\David\Python Projects\RusSat\dataout_HPC\udl_CIS_data.parquet"
sat_cat_pkl = r"C:\Users\dk412\Desktop\David\Python Projects\RusSat\dataout_HPC\CIS_satcat.pkl"
login_creds = r"C:\Users\dk412\Desktop\David\Python Projects\RusSat\spacetrackcreds.txt"
parquet_out = r"C:\Users\dk412\Desktop\David\Python Projects\RusSat\dataout_HPC\spacetrack_tle_df.parquet"
"""

#Beocat
print('BEOCAT')
parquet_file = "/homes/dkurtenb/projects/russat/output/udl_CIS_data.parquet"
sat_cat_pkl = '/homes/dkurtenb/projects/russat/output/CIS_satcat.pkl'
login_creds = '/homes/dkurtenb/projects/russat/spacetrackcreds.txt'
parquet_out = "/homes/dkurtenb/projects/russat/output/spacetrack_tle_df.parquet"
        """
def print_progress(message):
    timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    print(f"{timestamp} - {message}")


# UDL Data to get teh list of NORAD IDs that have TLEs during Feb-April 2022
print_progress("Reading Parquet file...")
df = pd.read_parquet(parquet_file)

filtered_df = df[(df['year'] == 2022) & ((df['month']==2) | (df['month']==3) | (df['month']==4))]
sat_lst = list(filtered_df['satNo'])
norad_id_datefilter_lst = list(set(sat_lst))
norad_id_datefilter_lst.sort()

#Spacetrack Satcat data to get list of norad id that have object type ROCKET BODY, PAYLOAD, or UNKNOWN 
print_progress("Filtering satellites")
with open(sat_cat_pkl, 'rb') as f:
    data = pickle.load(f)

norad_list = [item['NORAD_CAT_ID'] for item in data 
            if item['OBJECT_TYPE'] in ['ROCKET BODY', 'PAYLOAD', 'UNKNOWN']]
norad_id_typefilter_lst = list(set(norad_list))
norad_id_typefilter_lst.sort()
norad_id_typefilter_lst = [int(x) for x in norad_id_typefilter_lst]

# Norad IDs filtered by Date (2022 Feb-April) & Type (ROCKET BODY, PAYLOAD, or UNKNOWN)
nord_id_typeDate_filter = list(set(norad_id_datefilter_lst) & set(norad_id_typefilter_lst))
nord_id_typeDate_filter.sort()
print_progress(f"Found {len(nord_id_typeDate_filter)} satellites after date filtering")

#SpaceTrack Fetch
print_progress("Starting SpaceTrack fetch")    
with open(login_creds, 'r') as f:
    content = f.read()
st_un = content.split(",")[0].strip()
st_pw = content.split(",")[1].strip()

def fetch_tle_batch(norad_ids, batch_size=50):
    all_tles = []
    total_batches = (len(norad_ids) + batch_size - 1) // batch_size
    failed_batches = []

    print_progress(f"Processing {len(norad_ids)} NORAD IDs in {total_batches} batches")
    
    for i in range(0, len(norad_ids), batch_size):
        batch = norad_ids[i:i + batch_size]
        batch_str = ','.join(str(id) for id in batch)
        batch_num = (i // batch_size) + 1
        
        print_progress(f"Fetching batch {batch_num}/{total_batches} ({len(batch)} IDs)")
        
        retry_count = 0
        max_retries = 10
        
        while retry_count < max_retries:
            try:
                st = SpaceTrackClient(identity=f'{st_un}', password=f'{st_pw}')
                st.timeout = (30,300)

                query = st.tle(norad_cat_id=batch_str, orderby='epoch', limit=None, format='tle')
                tles = query.split('\n')
                batch_tles = [tles[i:i+2] for i in range(0, len(tles), 2)]
                all_tles.extend(batch_tles)
                
                print_progress(f"Successfully fetched {len(batch_tles)} TLE pairs")
                time.sleep(15)  
                break
                
            except Exception as e:
                retry_count += 1
                print_progress(f"Error on batch {batch_num}: {str(e)}")
                if retry_count < max_retries:
                    wait_time = 300 *(2**retry_count)  
                    print_progress(f"Retrying in {wait_time} seconds... (Attempt {retry_count + 1}/{max_retries})")                        
                    time.sleep(wait_time)
                else:
                    print_progress(f"Failed to fetch batch {batch_num} after {max_retries} attempts")
                    failed_batches.append({'batch_num':batch_num,
                                            'norad_ids': batch
                                            })
                    with open('failed_batches.txt','a') as f:
                        f.write(f"Batch {batch_num}:{batch_str}\n")

    if failed_batches:
        print("\nFailed Batches Summary:")
        print(f"Total failed batches: {len(failed_batches)}")
        print("Failed batch numbers:", [b['batch_num'] for b in failed_batches])  

    return all_tles

full_lst = fetch_tle_batch(nord_id_typeDate_filter, batch_size=12)    

import os
file_path = os.path.join(r'C:\Users\dk412\Desktop\David\Python Projects\RusSat\dataout_HPC', 'spacetrack_tle.txt')
with open(file_path, 'w') as f:
    for item in full_lst:
        f.write(str(item) + '\n')

Import and parse the Spacetrack Data Sparse - text file is missing recent data 

In [None]:
with open(r"D:\Russat\pull\raw_spacetrack_tle.txt", 'r') as file:
    full_lst = file.readlines()

In [None]:
full_lst[:5]

In [None]:
def split_list(lst):
    n = len(lst)
    size = n // 4  # Integer division
    return [lst[i:i + size] for i in range(0, n, size)]

batches = split_list(full_lst)

In [None]:
len(batches[4])

In [None]:
import pandas as pd
import ast
from datetime import datetime

#with open(r'C:\Users\dk412\Desktop\David\Python Projects\RusSat\dataout_HPC\spacetrack_tle.txt', 'r') as file:
#    full_lst = file.readlines()

#length= (len(full_lst))//2
#first_half = full_lst[:length]
#second_half = full_lst[length:]
    
#tle_lst = batches[0] 
#tle_lst = batches[1] 
#tle_lst = batches[2] 
#tle_lst = batches[3] 
tle_lst = batches[4] 

def print_progress(message):
    print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')} - {message}")

def parse_scientific_notation(string):
    try:
        string = string.strip()
        if string in ['+00000-0', '+00000+0', '']:
            return 0.0
        mantissa = float(string[0] + '.' + string[1:6])
        exponent = int(string[6:8])
        return mantissa * (10 ** exponent)
    except:
        return 0.0

def safe_parse(func, string, default=0):
    try:
        return func(string)
    except:
        return default

def parse_tle_strings(tle_strings):
    data = []
    
    for tle_string in tle_strings:
        try:
            tle = ast.literal_eval(tle_string.strip())
            line1, line2 = tle
            
            if len(line1) < 69 or len(line2) < 69:
                continue
                
            line_data = {
                'line1': line1,
                'line2': line2,
                'catalog_number': safe_parse(int, line1[2:7]),
                'classification': line1[7] if len(line1) > 7 else '',
                'launch_year': line1[9:11] if len(line1) > 10 else '',
                'launch_number': line1[11:14] if len(line1) > 13 else '',
                'launch_piece': line1[14:17].strip() if len(line1) > 16 else '',
                'epoch_year': safe_parse(int, line1[18:20]),
                'epoch_day': safe_parse(float, line1[20:32]),
                'mean_motion_dot': safe_parse(float, line1[33:43]),
                'mean_motion_ddot': parse_scientific_notation(line1[44:52] + line1[52:54]),
                'bstar': parse_scientific_notation(line1[53:61] + line1[61:63]),
                'ephemeris_type': safe_parse(int, line1[63:64]),
                'element_number': safe_parse(int, line1[64:68]),
                'satellite_number': safe_parse(int, line2[2:7]),
                'inclination': safe_parse(float, line2[8:16]),
                'ra_of_asc_node': safe_parse(float, line2[17:25]),
                'eccentricity': safe_parse(float, '0.' + line2[26:33].strip()),
                'arg_of_perigee': safe_parse(float, line2[34:42]),
                'mean_anomaly': safe_parse(float, line2[43:51]),
                'mean_motion': safe_parse(float, line2[52:63]),
                'rev_at_epoch': safe_parse(int, line2[63:68])
            }
            data.append(line_data)
        except Exception as e:
            print_progress(f"Error parsing TLE: {str(e)}")
            continue

    if not data:  # Check if we have any data before creating DataFrame
        return pd.DataFrame()
        
    df = pd.DataFrame(data)
    df['is_valid'] = (df['catalog_number'] > 0) & df['epoch_year'].notna() & df['epoch_day'].notna() & df['mean_motion'].notna()
    
    return df

def main():
    parquet_out = r"C:\Users\dk412\Desktop\David\Python Projects\RusSat\dataout_HPC\spacetrack_tle_df_E.parquet"

    
    df = parse_tle_strings(tle_lst)
    
    if not df.empty:
        df.to_parquet(parquet_out, engine='pyarrow', compression='gzip', index=True)
        print_progress('DONE')
    else:
        print_progress('No valid TLE data found')

    return df

if __name__ == "__main__":
    df = main()

In [None]:
df = pd.read_parquet(r"C:\Users\dk412\Desktop\David\Python Projects\RusSat\dataout_HPC\spacetrack_tle_df_D.parquet")
df.shape