# Planet Simulator with Tensorflow Pipeline

Author: Craig Boger
06/07/2020

This script takes the model prototyping and learning of v1.01 and tries to expand upon it in 2 key areas.

1) Perform data normalization and processing in TF data libraries.
2) Take predictions in normalized form and output them in their unormalized form for use in simulation.

## Straight Up Just Stealing Someone's Code and Trying to Run It

Credit to benrules2: https://gist.github.com/benrules2/220d56ea6fe9a85a4d762128b11adfba

In [1]:
import math
import random
%matplotlib widget
import matplotlib.pyplot as plot
from mpl_toolkits.mplot3d import Axes3D

class point:
    def __init__(self, x,y,z):
        self.x = x
        self.y = y
        self.z = z

class body:
    def __init__(self, location, mass, velocity, name = ""):
        self.location = location
        self.mass = mass
        self.velocity = velocity
        self.name = name

def calculate_single_body_acceleration(bodies, body_index):
    G_const = 6.67408e-11 #m3 kg-1 s-2
    acceleration = point(0,0,0)
    target_body = bodies[body_index]
    for index, external_body in enumerate(bodies):
        if index != body_index:
            r = (target_body.location.x - external_body.location.x)**2 + (target_body.location.y - external_body.location.y)**2 + (target_body.location.z - external_body.location.z)**2
            r = math.sqrt(r)
            tmp = G_const * external_body.mass / r**3
            acceleration.x += tmp * (external_body.location.x - target_body.location.x)
            acceleration.y += tmp * (external_body.location.y - target_body.location.y)
            acceleration.z += tmp * (external_body.location.z - target_body.location.z)

    return acceleration

def compute_velocity(bodies, time_step = 1):
    for body_index, target_body in enumerate(bodies):
        acceleration = calculate_single_body_acceleration(bodies, body_index)

        target_body.velocity.x += acceleration.x * time_step
        target_body.velocity.y += acceleration.y * time_step
        target_body.velocity.z += acceleration.z * time_step 


def update_location(bodies, time_step = 1):
    for target_body in bodies:
        target_body.location.x += target_body.velocity.x * time_step
        target_body.location.y += target_body.velocity.y * time_step
        target_body.location.z += target_body.velocity.z * time_step

def compute_gravity_step(bodies, time_step = 1):
    compute_velocity(bodies, time_step = time_step)
    update_location(bodies, time_step = time_step)

def plot_output(bodies, outfile = None):
    fig = plot.figure()
    colours = ['r','b','g','y','m','c']
    ax = fig.add_subplot(1,1,1, projection='3d')
    max_range = 0
    for current_body in bodies: 
        max_dim = max(max(current_body["x"]),max(current_body["y"]),max(current_body["z"]))
        if max_dim > max_range:
            max_range = max_dim
        ax.plot(current_body["x"], current_body["y"], current_body["z"], c = random.choice(colours), label = current_body["name"])        
    
    ax.set_xlim([-max_range,max_range])    
    ax.set_ylim([-max_range,max_range])
    ax.set_zlim([-max_range,max_range])
    ax.legend()        

    if outfile:
        plot.savefig(outfile)
    else:
        plot.show()

def run_simulation(bodies, names = None, time_step = 1, number_of_steps = 10000, report_freq = 100):

    #create output container for each body
    body_locations_hist = []
    for current_body in bodies:
        body_locations_hist.append({"x":[], "y":[], "z":[], "name":current_body.name})
        
    for i in range(1,number_of_steps):
        compute_gravity_step(bodies, time_step = 1000)            
        
        if i % report_freq == 0:
            for index, body_location in enumerate(body_locations_hist):
                body_location["x"].append(bodies[index].location.x)
                body_location["y"].append(bodies[index].location.y)           
                body_location["z"].append(bodies[index].location.z)       

    return body_locations_hist        
            
#planet data (location (m), mass (kg), velocity (m/s)
sun = {"location":point(0,0,0), "mass":2e30, "velocity":point(0,0,0)}
mercury = {"location":point(0,5.7e10,0), "mass":3.285e23, "velocity":point(47000,0,0)}
venus = {"location":point(0,1.1e11,0), "mass":4.8e24, "velocity":point(35000,0,0)}
earth = {"location":point(0,1.5e11,0), "mass":6e24, "velocity":point(30000,0,0)}
mars = {"location":point(0,2.2e11,0), "mass":2.4e24, "velocity":point(24000,0,0)}
jupiter = {"location":point(0,7.7e11,0), "mass":1e28, "velocity":point(13000,0,0)}
saturn = {"location":point(0,1.4e12,0), "mass":5.7e26, "velocity":point(9000,0,0)}
uranus = {"location":point(0,2.8e12,0), "mass":8.7e25, "velocity":point(6835,0,0)}
neptune = {"location":point(0,4.5e12,0), "mass":1e26, "velocity":point(5477,0,0)}
pluto = {"location":point(0,3.7e12,0), "mass":1.3e22, "velocity":point(4748,0,0)}
# TODO: Add random sattellite here.
satellite_1 = {"location":point(1e5,3.7e5,0), "mass":1.7e1, "velocity":point(4748,0,0)}

if __name__ == "__main__":

    #build list of planets in the simulation, or create your own
    bodies = [
        body( location = sun["location"], mass = sun["mass"], velocity = sun["velocity"], name = "sun"),
        body( location = earth["location"], mass = earth["mass"], velocity = earth["velocity"], name = "earth"),
        body( location = mars["location"], mass = mars["mass"], velocity = mars["velocity"], name = "mars"),
        body( location = venus["location"], mass = venus["mass"], velocity = venus["velocity"], name = "venus"),
        body( location = mercury["location"], mass = mercury["mass"], velocity = mercury["velocity"], name = "mercury"),
        body( location = jupiter["location"], mass = jupiter["mass"], velocity = jupiter["velocity"], name = "jupiter"),
        body( location = saturn["location"], mass = saturn["mass"], velocity = saturn["velocity"], name = "saturn"),
        
        #body( location = satellite_1["location"], mass = satellite_1["mass"], velocity = satellite_1["velocity"], name = "sattellite_1")
        ]
    
    # Original defaults of simulation
    # motions = run_simulation(bodies, time_step = 100, number_of_steps = 80000, report_freq = 1000)
    # Try messing with report frequency to get more data.
    motions = run_simulation(bodies, time_step = 100, number_of_steps = 300000, report_freq = 100)
    plot_output(motions, outfile = 'orbits.png')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Take motions data from the above simulation and convert it to a Pandas dataframe.  The "motions" output is a list of python dictionaries that can be converted into a dataframe and then manipulated.

In [2]:
import pandas as pd
import numpy as np

motions_df = pd.DataFrame(motions)
motions_df.head(100)

Unnamed: 0,x,y,z,name
0,"[6.172247210875407, 49.37608388489302, 166.613...","[6056.974666438165, 24107.571980251058, 54150....","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",sun
1,"[2999802268.7576785, 5998418130.158717, 899466...","[149970049800.53976, 149880804272.02502, 14973...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",earth
2,"[2399949856.0043564, 4799598821.688435, 719864...","[219986083529.99896, 219944611139.58228, 21987...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",mars
3,"[3499415066.461402, 6995320910.112684, 1048421...","[109944304437.46532, 109778376868.05923, 10950...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",venus
4,"[4694355206.034602, 9354859805.633783, 1394777...","[56792631335.43017, 56175843268.08082, 5515329...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",mercury
5,"[1299999366.480654, 2599994931.4665866, 389998...","[769998863548.677, 769995476701.667, 769989839...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",jupiter
6,"[899999928.8208524, 1799999430.5240006, 269999...","[1399999647585.8357, 1399998597321.9666, 13999...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",saturn


In [3]:
# Trying to separate out each row of list or dataframe into its own dataframe.
# Will later put these dataframes back together into 1 large dataframe.

motions_df_list = []
for body in motions:
    motions_df_list.append(pd.DataFrame(body))

In [4]:
motions_df_list[3]

Unnamed: 0,x,y,z,name
0,3.499415e+09,1.099443e+11,0.0,venus
1,6.995321e+09,1.097784e+11,0.0,venus
2,1.048421e+10,1.095024e+11,0.0,venus
3,1.396259e+10,1.091166e+11,0.0,venus
4,1.742697e+10,1.086215e+11,0.0,venus
...,...,...,...,...
2994,-5.289650e+10,8.213631e+10,0.0,venus
2995,-5.030464e+10,8.447291e+10,0.0,venus
2996,-4.763994e+10,8.672745e+10,0.0,venus
2997,-4.490500e+10,8.889761e+10,0.0,venus


In [5]:
# Combine the dataframes into a single, large dataframe.
# Can later choose a planet to be the target we train to predict.
complete_motion_df = None

for body in motions_df_list:
    # Append name of body to each column and remove the name column
    body_name = body.loc[0, "name"]
    body.columns = [body_name + "_x",
                    body_name + "_y",
                    body_name + "_z",
                    "name"]
    # Add current body to the complete dataframe.
    complete_motion_df = pd.concat([complete_motion_df, body.iloc[:, 0:3]], axis=1)

complete_motion_df.head(100)

Unnamed: 0,sun_x,sun_y,sun_z,earth_x,earth_y,earth_z,mars_x,mars_y,mars_z,venus_x,...,venus_z,mercury_x,mercury_y,mercury_z,jupiter_x,jupiter_y,jupiter_z,saturn_x,saturn_y,saturn_z
0,6.172247e+00,6.056975e+03,0.0,2.999802e+09,1.499700e+11,0.0,2.399950e+09,2.199861e+11,0.0,3.499415e+09,...,0.0,4.694355e+09,5.679263e+10,0.0,1.299999e+09,7.699989e+11,0.0,8.999999e+08,1.400000e+12,0.0
1,4.937608e+01,2.410757e+04,0.0,5.998418e+09,1.498808e+11,0.0,4.799599e+09,2.199446e+11,0.0,6.995321e+09,...,0.0,9.354860e+09,5.617584e+10,0.0,2.599995e+09,7.699955e+11,0.0,1.799999e+09,1.399999e+12,0.0
2,1.666139e+02,5.415063e+04,0.0,8.994662e+09,1.497323e+11,0.0,7.198646e+09,2.198756e+11,0.0,1.048421e+10,...,0.0,1.394778e+10,5.515330e+10,0.0,3.899983e+09,7.699898e+11,0.0,2.699998e+09,1.399997e+12,0.0
3,3.948280e+02,9.618418e+04,0.0,1.198735e+10,1.495246e+11,0.0,9.596791e+09,2.197790e+11,0.0,1.396259e+10,...,0.0,1.843961e+10,5.373113e+10,0.0,5.199959e+09,7.699820e+11,0.0,3.599995e+09,1.399994e+12,0.0
4,7.708690e+02,1.502055e+05,0.0,1.497529e+10,1.492578e+11,0.0,1.199373e+10,2.196549e+11,0.0,1.742697e+10,...,0.0,2.279721e+10,5.191794e+10,0.0,6.499921e+09,7.699718e+11,0.0,4.499991e+09,1.399991e+12,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,4.138605e+06,5.394756e+07,0.0,1.441446e+11,-4.894462e+10,0.0,1.875072e+11,1.033965e+11,0.0,1.578892e+10,...,0.0,3.050288e+10,-4.133032e+10,0.0,1.242400e+11,7.596504e+11,0.0,8.633698e+10,1.396785e+12,0.0
96,4.259034e+06,5.506067e+07,0.0,1.431961e+11,-5.174263e+10,0.0,1.885864e+11,1.011770e+11,0.0,1.238219e+10,...,0.0,2.606635e+10,-4.407853e+10,0.0,1.255223e+11,7.594341e+11,0.0,8.723499e+10,1.396718e+12,0.0
97,4.381576e+06,5.618472e+07,0.0,1.421936e+11,-5.452104e+10,0.0,1.896398e+11,9.894374e+10,0.0,8.963735e+09,...,0.0,2.137176e+10,-4.638960e+10,0.0,1.268043e+11,7.592156e+11,0.0,8.813296e+10,1.396650e+12,0.0
98,4.506234e+06,5.731969e+07,0.0,1.411373e+11,-5.727882e+10,0.0,1.906674e+11,9.669701e+10,0.0,5.536788e+09,...,0.0,1.646399e+10,-4.823713e+10,0.0,1.280859e+11,7.589949e+11,0.0,8.903089e+10,1.396581e+12,0.0


In [6]:
complete_motion_df.shape

(2999, 21)

At this point, we have a single dataframe with all bodies and all positions with each time step as the index of our rows.

# Trying to Create a tf.data Dataset from the Constructed, Unrandomized, Unnormalized Data

### Imports

In [7]:
# Python ≥3.5 is required
import sys
assert sys.version_info >= (3, 5)

# Scikit-Learn ≥0.20 is required
# Probably not needed since not using regressor or doing any feature engineering.
import sklearn
from sklearn.preprocessing import StandardScaler  # Scaler for normalizing data.
from sklearn.preprocessing import MinMaxScaler  # Scaler for normalizing data.
assert sklearn.__version__ >= "0.20"

# TensorFlow ≥2.0 is required
import tensorflow as tf
assert tf.__version__ >= "2.0"
# Recommended to enable eager execution when developing model.
# Processing data: https://www.youtube.com/watch?v=oFFbKogYdfc
# tf.enable_eager_execution()

# Import Keras
from tensorflow import keras

# to make this notebook's output stable across runs
#np.random.seed(42)

# Use sklearn for data processing


# Common imports
import numpy as np
import os

In [8]:
tf.__version__

'2.2.0'

In [9]:
keras.__version__

'2.3.0-tf'

## Start Here with Trying to Process Data with Tensorflow Datasets

One of the difficulties is using a mixture of numpy, pandas, and sklearn to take input data (influx), arrange it, split it out, normalize it, and then train a model.  With tf.data input pipelines (similar to sklearn pipelines), we can create data and machine learning pipelines for training or inference.  This allows us to encapsulate not only the machine learning into a Tensorflow model, but the necessary transformations to that data.  That allows us to deploy the model and data transformations as a single object to the later simulator. \
The input pipeline let's us take raw data from any source, like csv, numpy arrays, distributed file system, etc, and convert it into the tensors we will use to train our model.

Intro to tensors:
https://www.tensorflow.org/guide/tensor

Good source on how data loading and preprocessing is usually done: https://stackoverflow.com/questions/55321905/want-to-split-train-and-test-data-gotten-from-a-csv-with-tensorflow
1) Load the data into memory with numpy
2) Split the data into train and validation

Since we are not using a massive dataset, then we might be able to use tf.split to split an exsting tf dataset into train and validation.
https://docs.w3cub.com/tensorflow~python/tf/split/

### Creating a tf dataset from slices (numpy array, pandas dataframe, etc). 
https://www.tensorflow.org/tutorials/load_data/pandas_dataframe

Probably one of the better articles on using tensorflow datasets: \
https://adventuresinmachinelearning.com/tensorflow-dataset-tutorial/

TF documentation on tf.data: Building Tensorflow Input Pipelines: \
https://www.tensorflow.org/guide/data


In [10]:
# Start with randomizing the data.  This is going to be a bit redundant, but fuck it.
# I can't figure out right now how to split a tensorflow dataset into train, validation, and test sets.
complete_motion_df = complete_motion_df.sample(frac=1).reset_index(drop=True)
complete_motion_df.head(10)

Unnamed: 0,sun_x,sun_y,sun_z,earth_x,earth_y,earth_z,mars_x,mars_y,mars_z,venus_x,...,venus_z,mercury_x,mercury_y,mercury_z,jupiter_x,jupiter_y,jupiter_z,saturn_x,saturn_y,saturn_z
0,19130520000.0,5697825000.0,0.0,103122500000.0,-122359400000.0,0.0,-73020010000.0,-171626300000.0,0.0,-80029640000.0,...,0.0,-2017515000.0,58372420000.0,0.0,-663100500000.0,-275300300000.0,0.0,1107118000000.0,-248785000000.0,0.0
1,714060700.0,1970896000.0,0.0,-85906860000.0,124820300000.0,0.0,182593500000.0,115851500000.0,0.0,29192430000.0,...,0.0,-4852436000.0,58683330000.0,0.0,656303400000.0,383233900000.0,0.0,534222000000.0,1270662000000.0,0.0
2,5277441000.0,6153926000.0,0.0,-70634720000.0,135778500000.0,0.0,114099600000.0,-161983200000.0,0.0,116188500000.0,...,0.0,12740790000.0,-44206870000.0,0.0,594912000000.0,-430391400000.0,0.0,985020000000.0,869518600000.0,0.0
3,16095210000.0,7067926000.0,0.0,-27396950000.0,-140017800000.0,0.0,64726780000.0,221223600000.0,0.0,47183030000.0,...,0.0,-18972950000.0,51081780000.0,0.0,-434439400000.0,-566121200000.0,0.0,1181920000000.0,45808410000.0,0.0
4,285824300.0,1090659000.0,0.0,97748860000.0,-116888000000.0,0.0,-158711400000.0,147685400000.0,0.0,109025200000.0,...,0.0,-16074030000.0,-47249510000.0,0.0,524804100000.0,556024800000.0,0.0,395023300000.0,1330987000000.0,0.0
5,6100623000.0,6546513000.0,0.0,128390600000.0,94615510000.0,0.0,-73516980000.0,-176460000000.0,0.0,-71148230000.0,...,0.0,-16535750000.0,-39350610000.0,0.0,528410600000.0,-505361600000.0,0.0,1023429000000.0,808404200000.0,0.0
6,3514854000.0,5051286000.0,0.0,121112600000.0,-92510510000.0,0.0,90392250000.0,206102200000.0,0.0,87925900000.0,...,0.0,-41142150000.0,38290030000.0,0.0,709412300000.0,-217565600000.0,0.0,878551100000.0,1006023000000.0,0.0
7,20202140000.0,4964760000.0,0.0,-129334200000.0,28977580000.0,0.0,-175580900000.0,89009540000.0,0.0,72002600000.0,...,0.0,-31271320000.0,-7908808000.0,0.0,-711417900000.0,-121722600000.0,0.0,1050034000000.0,-372371300000.0,0.0
8,2505772000.0,4203653000.0,0.0,-30622490000.0,150544900000.0,0.0,-176354700000.0,123048300000.0,0.0,-107975600000.0,...,0.0,-1467028000.0,61056850000.0,0.0,745487000000.0,-53017340000.0,0.0,794254400000.0,1090635000000.0,0.0
9,4006126000.0,5400038000.0,0.0,-19874840000.0,-146158900000.0,0.0,186052100000.0,119003600000.0,0.0,-76285370000.0,...,0.0,-36747930000.0,-26828820000.0,0.0,682571100000.0,-285031400000.0,0.0,912317800000.0,966948200000.0,0.0


In [11]:
# Assuming last 3 columns in the dataframe are the target x,y, and z values.  
target = complete_motion_df.iloc[:,-3:]
# Drop target from main dataframe.
complete_motion_df.drop(complete_motion_df.iloc[:,-3:], axis = 1, inplace = True)
# Split the x, y, and z coordinates out for the target to use a specific dataset for each possible coordinate output.
# Convert targets to numpy arrays as well so we can use them in the model.
target_x_np = target.iloc[:,0].to_numpy()
target_y_np = target.iloc[:,1].to_numpy()
target_z_np = target.iloc[:,2].to_numpy()
# Convert dataframe with whole dataset to a numpy array for use in both testing and converting to tf.dataset for training.
# Usually training, validation, and test data would be coming from different CSV files or sources.
# complete_motion_df only consists of input data at this point.
complete_motion_np = complete_motion_df.to_numpy()
#Split into train, validation, and test sets.
# Setup train, validation, and test splits
DATASET_SIZE = len(complete_motion_df)
train_size = int(0.7 * DATASET_SIZE)
val_size = int(0.15 * DATASET_SIZE)
test_size = int(0.15 * DATASET_SIZE)
#Split data
X_train, X_valid, X_test = complete_motion_np[:train_size], complete_motion_np[train_size:(train_size+val_size)], complete_motion_np[(train_size + val_size):]
y_train_x, y_valid_x, y_test_x = target_x_np[:train_size], target_x_np[train_size:(train_size+val_size)], target_x_np[(train_size + val_size):]
y_train_y, y_valid_y, y_test_y = target_y_np[:train_size], target_y_np[train_size:(train_size+val_size)], target_y_np[(train_size + val_size):]
y_train_z, y_valid_z, y_test_z = target_z_np[:train_size], target_z_np[train_size:(train_size+val_size)], target_z_np[(train_size + val_size):]

In [16]:
# Create tensorflow datasets from numpy arrays for train and va;idation data.
train_data = tf.data.Dataset.from_tensor_slices((X_train, y_train_x.reshape(-1,1), y_train_y.reshape(-1,1), y_train_z.reshape(-1,1)))
validation_data = tf.data.Dataset.from_tensor_slices((X_valid, y_valid_x.reshape(-1,1), y_valid_y.reshape(-1,1), y_valid_z.reshape(-1,1)))

In [None]:
# Extract the input and targets while also converting to a numpy array.
#complete_target_x = complete_motion_df.iloc[:, -3].values.reshape(-1,1)
#complete_target_y = complete_motion_df.iloc[:, -2].values.reshape(-1,1)
#complete_target_z = complete_motion_df.iloc[:, -1].values.reshape(-1,1)
#complete_X_input = complete_motion_df.iloc[:, 0:-3].values
# Create a tensorflow dataset from the numpy array "slices" of the raw dataframe.
#dataset = tf.data.Dataset.from_tensor_slices((complete_X_input, complete_target_x, complete_target_y, complete_target_z))

In [17]:
# Iterate through dataset and print the input and targets.
# Will select top 5 to iterate through.
for feat, targ_x, targ_y, targ_z in train_data.take(5):
    print('Features: {} Target_X: {} Target_Y: {} Target_Z: {}'.format(feat, targ_x, targ_y, targ_z))

Features: [ 1.91305181e+10  5.69782500e+09  0.00000000e+00  1.03122544e+11
 -1.22359366e+11  0.00000000e+00 -7.30200126e+10 -1.71626298e+11
  0.00000000e+00 -8.00296384e+10  5.46822814e+10  0.00000000e+00
 -2.01751456e+09  5.83724159e+10  0.00000000e+00 -6.63100477e+11
 -2.75300342e+11  0.00000000e+00] Target_X: [1.10711815e+12] Target_Y: [-2.48784994e+11] Target_Z: [0.]
Features: [ 7.14060685e+08  1.97089623e+09  0.00000000e+00 -8.59068594e+10
  1.24820252e+11  0.00000000e+00  1.82593503e+11  1.15851466e+11
  0.00000000e+00  2.91924336e+10  1.08257820e+11  0.00000000e+00
 -4.85243646e+09  5.86833262e+10  0.00000000e+00  6.56303439e+11
  3.83233919e+11  0.00000000e+00] Target_X: [5.34222009e+11] Target_Y: [1.27066174e+12] Target_Z: [0.]
Features: [ 5.27744127e+09  6.15392572e+09  0.00000000e+00 -7.06347222e+10
  1.35778460e+11  0.00000000e+00  1.14099593e+11 -1.61983171e+11
  0.00000000e+00  1.16188484e+11  3.78256828e+07  0.00000000e+00
  1.27407942e+10 -4.42068683e+10  0.00000000e+00

### Randomize the Data and Create Batches

Since the dataset is not that large, a batch size of 1 for processing the data should be ok.  Just randomizing training data for shits and giggles even though already randomized from pandas dataframe.  Hopfully in the future I can figure out how to read the whole dataset into a tensforflow dataset, then split up that dataset into multiple.

In [21]:
train_data = train_data.shuffle(buffer_size=100).batch(1)

In [23]:
for feat, targ_x, targ_y, targ_z in train_data.take(5):
    print('Features: {} Target_X: {} Target_Y: {} Target_Z: {}'.format(feat, targ_x, targ_y, targ_z))

Features: [[ 1.86430047e+09  3.54901340e+09  0.00000000e+00 -1.28758496e+11
  -7.53271950e+10  0.00000000e+00 -1.68560097e+11 -1.06995372e+11
   0.00000000e+00  1.12683880e+11 -4.32772765e+09  0.00000000e+00
  -2.50563357e+10 -4.01237018e+10  0.00000000e+00  7.47532526e+11
   7.47924725e+10  0.00000000e+00]] Target_X: [[7.25337426e+11]] Target_Y: [[1.14871212e+12]] Target_Z: [[0.]]
Features: [[ 2.20634147e+10  3.18474861e+09  0.00000000e+00 -1.03384927e+11
  -8.38927282e+10  0.00000000e+00  2.02629479e+11 -9.15594642e+10
   0.00000000e+00 -7.78932919e+10  5.05564305e+10  0.00000000e+00
  -2.33843382e+10  3.52166996e+10  0.00000000e+00 -6.90472192e+11
   2.49423723e+11  0.00000000e+00]] Target_X: [[8.57381081e+11]] Target_Y: [[-6.37352384e+11]] Target_Z: [[0.]]
Features: [[ 1.99753454e+10  5.13380497e+09  0.00000000e+00 -1.22205437e+11
  -4.95788590e+10  0.00000000e+00 -1.88943919e+11  2.32520785e+10
   0.00000000e+00  1.27627395e+11 -2.30576754e+10  0.00000000e+00
   7.22307712e+10 -4.

In [24]:
train_data.shape()

AttributeError: 'BatchDataset' object has no attribute 'shape'

In [None]:
# Try using tf.split to split the tf dataset into training, validation, and test datasets.
DATASET_SIZE = len(complete_motion_df)
train_size = int(0.7 * DATASET_SIZE)
val_size = int(0.15 * DATASET_SIZE)
test_size = int(0.15 * DATASET_SIZE)
# Split into 3 tensors with sizes calculated above along dimension 0 (rows).
# https://docs.w3cub.com/tensorflow~python/tf/split/
training_data, validation_data, test_data = tf.split(dataset, num_or_size_splits=3, axis=1)

# Try a Quick Neural Net for Predicting Jupiter's Position

## Create Preprocessing Layer

Try creating a preprocessing layer of the neural network that will standardize the data without external normalization.

In [25]:
# Create standardization class we inheret from keras.layers
# Can possibly be replaced with keras.layers.Normalization
# Stolen from https://github.com/ageron/handson-ml2/blob/master/13_loading_and_preprocessing_data.ipynb
class Standardization(keras.layers.Layer):
    def adapt(self, data_sample):
        self.means_ = np.mean(data_sample, axis=0, keepdims=True)
        self.stds_ = np.std(data_sample, axis=0, keepdims=True)
    def call(self, inputs):
        return (inputs - self.means_) / (self.stds_ + keras.backend.epsilon())

In [28]:
# Now we need to adapt the standardization class to each column of our training data.
#Set the input shape
std_layer = Standardization(input_shape = complete_motion_np.shape[1:])
std_layer.adapt(X_train)

## Try Creating Single Input, Multiple Output Regression Model

Trying to create a regression NN where instead of designating an output layer of 3 nodes, 3 output layers of a single node are used to designate specific datasets and loss functions.  Still need to figure out later how to get a 3 node output to correspond to the input training data.

Use functional API to build basic NN architecture.

In [None]:
# Use functional API to build basic NN architecture.
#input_main = keras.layers.Input(shape=complete_motion_np.shape[1:])
#hidden1 = keras.layers.Dense(300, activation="tanh")(input_main)
#hidden2 = keras.layers.Dense(300, activation="tanh")(hidden1)
#output_x = keras.layers.Dense(1, name="output_x")(hidden2)
#output_y = keras.layers.Dense(1, name="output_y")(hidden2)
#output_z = keras.layers.Dense(1, name="output_z")(hidden2)

In [29]:
# Use functional API to build basic NN architecture.
# CONTINUE FROM HERE.  MAYBE SWITCH TO BATCH NORMALIZATION FROM BOOK?
input_main = std_layer
hidden1 = keras.layers.Dense(300, activation="tanh")(input_main)
hidden2 = keras.layers.Dense(300, activation="tanh")(hidden1)
output_x = keras.layers.Dense(1, name="output_x")(hidden2)
output_y = keras.layers.Dense(1, name="output_y")(hidden2)
output_z = keras.layers.Dense(1, name="output_z")(hidden2)

AttributeError: 'Standardization' object has no attribute 'shape'

Create model with specified input and output layers

In [None]:
# Create model with specified input and output layers
model = keras.Model(inputs=[input_main], outputs=[output_x, output_y, output_z])
model.summary()

In [None]:
# Compile model with specified loss functions for each output and specify weighting to provide each output.
# Weighting X and Y output more than Z
model.compile(loss=["mse", "mse", "mse"], 
              loss_weights=[0.4, 0.4, 0.2], 
              optimizer=keras.optimizers.Adam(learning_rate=1e-4),
              metrics=["mse"])

Train the model with separate x, y, z training sets.

In [None]:
history = model.fit(
    [X_train], [y_train_x, y_train_y, y_train_z],
    epochs=1000,
    validation_data=([X_valid], [y_valid_x, y_valid_y, y_valid_z])
)

In [None]:
# Convert training history to dataframe for analysis and plotting.
complete_history_data = pd.DataFrame(history.history)
complete_history_data.head(-9)

In [None]:
import matplotlib.pyplot as plt

In [None]:
# Create figure of subplots to plot total loss, x coordinate loss, y coordinate loss, and z coordinate MSEs.
fig2, mse_plots = plt.subplots(2,2)


#plot losses in each quadrant of the figure.
mse_plots[0][0].plot(complete_history_data[["output_x_mse", "val_output_x_mse"]])
#mse_plots[0][0].set_ylim(0,1)

mse_plots[0][1].plot(complete_history_data[["output_y_mse", "val_output_y_mse"]])
mse_plots[0][1].set_ylim(0,1)

mse_plots[1][0].plot(complete_history_data[["output_z_mse", "val_output_z_mse"]])
mse_plots[1][0].set_ylim(0,1)

plt.show()


In [None]:
# Create figure of subplots to plot total loss, x coordinate loss, y coordinate loss, and z coordinate loss.
fig, loss_plots = plt.subplots(2,2)


#plot losses in each quadrant of the figure.
loss_plots[0][0].plot(complete_history_data[["loss", "val_loss"]])
loss_plots[0][0].set_ylim(0,1)

loss_plots[0][1].plot(complete_history_data[["output_x_loss", "val_output_x_loss"]])
loss_plots[0][1].set_ylim(0,1)

loss_plots[1][0].plot(complete_history_data[["output_y_loss", "val_output_y_loss"]])
loss_plots[1][0].set_ylim(0,1)

loss_plots[1][1].plot(complete_history_data[["output_z_loss", "val_output_z_loss"]])
loss_plots[1][1].set_ylim(0,1)


plt.show()


### Evaluate the Model with Test Data

In [None]:
y_test_x.shape

In [None]:
model.evaluate([X_test],[y_test_x, y_test_y, y_test_z])

### Predict Values and Inspect Differences

In [None]:
x_pred, y_pred, z_pred = model.predict([X_test])

In [None]:
pred_model_comparison = pd.DataFrame(data=np.concatenate((x_pred, y_test_x.reshape(-1,1), y_pred, y_test_y.reshape(-1,1), z_pred, y_test_z.reshape(-1,1)), axis=1),
                                    columns=['pred_x', 'model_x', 'pred_y', 'model_y', 'pred_z', 'model_z'])
pred_model_comparison.head(10)

In [None]:
pred_model_comparison[["pred_x", "model_x"]].plot()

In [None]:
pred_model_comparison[["pred_y", "model_y"]].plot()

In [None]:
pred_model_comparison[["pred_z", "model_z"]].plot()