In [39]:
import pandas as pd
import pickle
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from collections import Counter
import numpy as np
from typing import Tuple, List, Dict
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.gridspec import GridSpec
from matplotlib.dates import DateFormatter
import os
import joblib
from scipy.stats import median_abs_deviation
from skyfield.api import load, EarthSatellite
from datetime import datetime, timezone
import numpy as np
import sys
sys.path.append(r'C:\Users\dk412\Desktop\David\Python Projects\RusSat\prod_code')
from helpers import *

import warnings
warnings.filterwarnings('ignore')

In [40]:
df = pd.read_parquet(r'C:\Users\dk412\Desktop\David\Python Projects\RusSat\dataout_HPC\model_test_train.parquet')

In [41]:
unique_ids = df['NORAD_CAT_ID'].unique()
unique_ids.sort()
len(unique_ids)

2544

In [45]:
def build_anom_model(NORAD_ID_NUM):

    os.makedirs(rf"C:\Users\dk412\Desktop\David\Python Projects\RusSat\output\training_plots\plots_training_{NORAD_ID_NUM}", exist_ok=True)

    samp_df = df[df['NORAD_CAT_ID']==NORAD_ID_NUM]
    samp_df = samp_df.sort_values(by='datetime', ascending=False)
    orb_df = samp_df[['datetime','inclination','ra_of_asc_node', 'eccentricity', 'arg_of_perigee', 'mean_anomaly', 'mean_motion']]
    orb_df = orb_df.set_index('datetime', drop = True)
    
    plot_save_dir = rf"C:\Users\dk412\Desktop\David\Python Projects\RusSat\output\training_plots\plots_training_{NORAD_ID_NUM}"
    
    feature_names = list(orb_df)
    
    detector, anomalies, explanations, timestamps, anomaly_details = run_anomaly_detection_pipeline(
        orb_df,
        feature_names=feature_names,
        model_path=r"C:\Users\dk412\Desktop\David\Python Projects\RusSat\anomaly_model",
        should_train=True,
        NORAD_ID_NUM=NORAD_ID_NUM,  
        plot_save_dir=plot_save_dir  
    )
    
    return orb_df, detector, anomalies, explanations, samp_df   

unique_ids = df['NORAD_CAT_ID'].unique()[:5]
total_sats = len(unique_ids)

all_orbital_features = ['inclination', 'ra_of_asc_node', 'eccentricity', 'arg_of_perigee', 'mean_anomaly', 'mean_motion']
anom_columns = [f'anom_{feat}' for feat in all_orbital_features]

for count, x in enumerate(unique_ids, 1):
    orb_df, detector, anomalies, explanations, samp_df = build_anom_model(x)
    
    anom_dict = {exp['sample_index']: [feat['feature'] for feat in exp['anomalous_features']] for exp in explanations}    

    full_df = samp_df.copy(deep=False)
    full_df.reset_index(inplace=True, drop = True)

    all_features = set().union(*[set(features) for features in anom_dict.values()]) 
    
    anom_df = pd.DataFrame(0, 
                        index=anom_dict.keys(),
                        columns=anom_columns)

    for key, features in anom_dict.items():
        anom_df.loc[key, [f'anom_{feat}' for feat in features]] = 1

    full_df = full_df.join(anom_df, how='left')

    full_df['anom_count'] = full_df.filter(like='anom_').sum(axis=1)
    full_df = full_df.fillna(0)

    for col in anom_columns:
        if col not in full_df.columns:
            full_df[col] = 0

    mode = 'w' if count == 1 else 'a'
    header = count == 1
    full_df.to_csv(r'C:\Users\dk412\Desktop\David\Python Projects\RusSat\output\anom_df_TRAIN_all_sats.csv', mode=mode, header=header, index=False)
    
    progress = count/df['NORAD_CAT_ID'].nunique()*100
    print(f"\nModel number {count} out of {df['NORAD_CAT_ID'].nunique()} complete. Progress: {progress:.3f}% done\n")

    del full_df, orb_df, detector, anomalies, explanations, samp_df
    del anom_df, anom_dict

Training new model...
Initializing detector with input_dim=6, latent_dim=4
Using device: cpu
Starting training with data shape: (2918, 6)


KeyboardInterrupt: 

************************************ WORKING

In [1]:
import os

In [5]:
files = os.listdir(r"C:\Users\dk412\Desktop\David\Python Projects\RusSat\anomaly_model")

trained = list({int(f.split("_")[0].strip("'\"")) for f in files})
sat = df['NORAD_CAT_ID'].unique().tolist()

sats_to_train = list(set(sat) - set(trained))

In [6]:
print(f"Total satellites: {len(sat)}")
print(f"Already trained: {len(trained)}")
print(f"Need training: {len(sats_to_train)}")

Total satellites: 2544
Already trained: 5
Need training: 2539


In [36]:
test = sats_to_train[:10]
test

[8195, 16393, 16396, 16397, 16398, 32782, 16402, 16404, 16408, 16409]

In [37]:
test.insert(0,51511)

In [38]:
test

[51511, 8195, 16393, 16396, 16397, 16398, 32782, 16402, 16404, 16408, 16409]