In [1]:
import os
import pandas 
import numpy as np
import matplotlib.pyplot as plt

In [2]:
def res_file_to_df(file):
    '''
    Converts a res file into a pandas DataFrame for a single day
    '''
    # header information
    columns = ['MJD', 
               'frac of a day', 
               'GPS range AB [m]', 
               'GPS range rate AB [m/s]', 
               'Kband range [m]',
               'Kband range rate [m/s]',
               'O-C range rate [m/s]',
               'Latitude [deg]',
               'Longitude [deg]',
               'Arg. of lat. [deg]',
               'beta [deg]']
    # collect data
    data_array = [] # empty container 
    with open(path) as f:
        lines = f.readlines()
        for i, line in enumerate(lines):
            if i < 12: continue # skip header information
            line_entries = line.split(' ') # split lines with space delimiter
            line_entries = np.array(line_entries) # convert to numpy array
            line_entries = np.delete(line_entries, np.argwhere(line_entries=='')).astype(float) # convert to floats
            data_array.append(line_entries) # collect vectors
    data_array = np.vstack(data_array) # stack vectors into an array 
    df = pandas.DataFrame(data=data_array,columns=columns) # convert stack to pandas DataFrame
    return df

## Calculate the MI  
Setup for the MINE-network  
Use the post fit residuals (7th column O-C range rate [m/s]) as one random variable  
The other random variable is a vector of what remains. This vector gives the position of the satellite over earth with some redundant infomation, like different coordinates and time stamps.  
For the upper channel we feed in vectors and residuals as they appear in the tables  
For the lower channel we shuffle the residuals  
The network will then try and distinguish between residuals tied to actual positions and those that are random   
If the network can distinguish then the position is important, i.e., some signal has been consumed into the residuals

In [3]:
# create datasets by concatenating data across all days from all 12 months
X = None
y = None
root_path  = '/Users/brandonlpanos/Desktop/grace/datasets/'
for month in os.listdir(root_path):    
    for day in os.listdir(root_path + month):
        path = root_path + month + '/' + day
        
        try: df = res_file_to_df(path) # convert res file into pandas DataFrame
        except: continue # some datasets are empty, if any problems just skip
            
        y_day = np.array(df['O-C range rate [m/s]']) # target random variable residuals 
        df2 = df.copy()
        df2 = df2.drop(['O-C range rate [m/s]'], axis=1) # drop target variable from df
        X_day = df2.to_numpy() # construct matrix out of remaining columns
        
        # concatenate x matrices
        try: X = np.concatenate( (X, X_day), axis=0 )
        except: X = X_day
        
        # concatenate y vectors 
        try: y = np.concatenate( (y, y_day) )
        except: y = y_day
print(X.shape, y.shape)

(5981627, 10) (5981627,)


In [4]:
# np.savez('/Users/brandonlpanos/Desktop/grace/data', X=X, y=y)

In [5]:
fhand = np.load('/Users/brandonlpanos/Desktop/grace/data.npz')
X = fhand['X']
y = fhand['y']
print(X.shape, y.shape)

(5981627, 10) (5981627,)
