In [1]:
import pandas as pd
import numpy as np
import os
from sklearn.metrics import mean_squared_error, mean_absolute_error
import matplotlib.pyplot as plt
import pickle
import clean_data

init_cols = ['BSTAR', 'INCLINATION', 'RA_OF_ASC_NODE', 'ECCENTRICITY', 'ARG_OF_PERICENTER', 'MEAN_ANOMALY',
              'MEAN_MOTION', 'NORAD_CAT_ID', 'EPOCH',
             ]

def load_all():
    train_df = pd.read_pickle('../model_0/data/train.pkl' )
    #test_df = pd.read_pickle('../model_0/data/test.pkl' )
    return train_df #, test_df

train_df = load_all()
#train_df,test_df = clean_all(train_df, test_df)

In [2]:
def truncate_data(df):
    df=df[df['INCLINATION'].between(0,180) & df['RA_OF_ASC_NODE'].between(0,360) & df['ECCENTRICITY'].between(0,0.25) &
          df['ARG_OF_PERICENTER'].between(0,360) & df['MEAN_ANOMALY'].between(0,360) & df['MEAN_MOTION'].between(11.25,17) &
          (df.EPOCH >= '1990-01-01')]
    df=df.reset_index(drop=True)
    return df

In [3]:
#train_df = train_df[train_df['NORAD_CAT_ID'] == 14631].reset_index(drop=True)

print('>>> Truncating data...')
%time train_df = truncate_data(train_df) # 14.5s
%time train_df = train_df[init_cols]  # 4s

print('>>> Normalizing data...')
%time train_df = clean_data.normalize_all_columns(train_df) # 53.4s

print('>>> Building index map...')
%time idx_map = clean_data.create_index_map(train_df, threaded=True, batch_size=50) # 3min 29s

print('>>> Building inputs and labels')
%time X_train,y_train = clean_data.build_xy(train_df, idx_map) # 59min 41s
%time X_train = clean_data.normalize_epoch_diff(X_train, drop_epoch=True) # 19s

print('>>> Saving data')
X_train.to_pickle('data/x_train1.pkl')
y_train.to_pickle('data/y_train1.pkl')

>>> Truncating data...
CPU times: user 10.3 s, sys: 4.19 s, total: 14.5 s
Wall time: 14.5 s
CPU times: user 2.84 s, sys: 1.3 s, total: 4.14 s
Wall time: 4.13 s
>>> Normalizing data...
CPU times: user 47.8 s, sys: 5.54 s, total: 53.3 s
Wall time: 53.3 s
>>> Building index map...


12800it [02:44, 77.67it/s]                            


CPU times: user 27min 42s, sys: 16min 5s, total: 43min 47s
Wall time: 3min 29s
>>> Building inputs and labels
CPU times: user 48min 26s, sys: 11min 21s, total: 59min 47s
Wall time: 59min 41s
CPU times: user 10.7 s, sys: 8.31 s, total: 19 s
Wall time: 19 s
