# 2D Planet Simulator to Generate Data for ML Inferred Physics

Author: Craig Boger
06/01/2020

Looks like the simulator is 2D, but can be changed to 3D if needed.

This is a script to generate some quick simulated data for orbiting objects in a 2D space to fead into a neural network to predict the next position of a body traveling through a system.

## Straight Up Just Stealing Someone's Code and Trying to Run It

Credit to benrules2: https://gist.github.com/benrules2/220d56ea6fe9a85a4d762128b11adfba

In [1]:
import math
import random
%matplotlib widget
import matplotlib.pyplot as plot
from mpl_toolkits.mplot3d import Axes3D

class point:
    def __init__(self, x,y,z):
        self.x = x
        self.y = y
        self.z = z

class body:
    def __init__(self, location, mass, velocity, name = ""):
        self.location = location
        self.mass = mass
        self.velocity = velocity
        self.name = name

def calculate_single_body_acceleration(bodies, body_index):
    G_const = 6.67408e-11 #m3 kg-1 s-2
    acceleration = point(0,0,0)
    target_body = bodies[body_index]
    for index, external_body in enumerate(bodies):
        if index != body_index:
            r = (target_body.location.x - external_body.location.x)**2 + (target_body.location.y - external_body.location.y)**2 + (target_body.location.z - external_body.location.z)**2
            r = math.sqrt(r)
            tmp = G_const * external_body.mass / r**3
            acceleration.x += tmp * (external_body.location.x - target_body.location.x)
            acceleration.y += tmp * (external_body.location.y - target_body.location.y)
            acceleration.z += tmp * (external_body.location.z - target_body.location.z)

    return acceleration

def compute_velocity(bodies, time_step = 1):
    for body_index, target_body in enumerate(bodies):
        acceleration = calculate_single_body_acceleration(bodies, body_index)

        target_body.velocity.x += acceleration.x * time_step
        target_body.velocity.y += acceleration.y * time_step
        target_body.velocity.z += acceleration.z * time_step 


def update_location(bodies, time_step = 1):
    for target_body in bodies:
        target_body.location.x += target_body.velocity.x * time_step
        target_body.location.y += target_body.velocity.y * time_step
        target_body.location.z += target_body.velocity.z * time_step

def compute_gravity_step(bodies, time_step = 1):
    compute_velocity(bodies, time_step = time_step)
    update_location(bodies, time_step = time_step)

def plot_output(bodies, outfile = None):
    fig = plot.figure()
    colours = ['r','b','g','y','m','c']
    ax = fig.add_subplot(1,1,1, projection='3d')
    max_range = 0
    for current_body in bodies: 
        max_dim = max(max(current_body["x"]),max(current_body["y"]),max(current_body["z"]))
        if max_dim > max_range:
            max_range = max_dim
        ax.plot(current_body["x"], current_body["y"], current_body["z"], c = random.choice(colours), label = current_body["name"])        
    
    ax.set_xlim([-max_range,max_range])    
    ax.set_ylim([-max_range,max_range])
    ax.set_zlim([-max_range,max_range])
    ax.legend()        

    if outfile:
        plot.savefig(outfile)
    else:
        plot.show()

def run_simulation(bodies, names = None, time_step = 1, number_of_steps = 10000, report_freq = 100):

    #create output container for each body
    body_locations_hist = []
    for current_body in bodies:
        body_locations_hist.append({"x":[], "y":[], "z":[], "name":current_body.name})
        
    for i in range(1,number_of_steps):
        compute_gravity_step(bodies, time_step = 1000)            
        
        if i % report_freq == 0:
            for index, body_location in enumerate(body_locations_hist):
                body_location["x"].append(bodies[index].location.x)
                body_location["y"].append(bodies[index].location.y)           
                body_location["z"].append(bodies[index].location.z)       

    return body_locations_hist        
            
#planet data (location (m), mass (kg), velocity (m/s)
sun = {"location":point(0,0,0), "mass":2e30, "velocity":point(0,0,0)}
mercury = {"location":point(0,5.7e10,0), "mass":3.285e23, "velocity":point(47000,0,0)}
venus = {"location":point(0,1.1e11,0), "mass":4.8e24, "velocity":point(35000,0,0)}
earth = {"location":point(0,1.5e11,0), "mass":6e24, "velocity":point(30000,0,0)}
mars = {"location":point(0,2.2e11,0), "mass":2.4e24, "velocity":point(24000,0,0)}
jupiter = {"location":point(0,7.7e11,0), "mass":1e28, "velocity":point(13000,0,0)}
saturn = {"location":point(0,1.4e12,0), "mass":5.7e26, "velocity":point(9000,0,0)}
uranus = {"location":point(0,2.8e12,0), "mass":8.7e25, "velocity":point(6835,0,0)}
neptune = {"location":point(0,4.5e12,0), "mass":1e26, "velocity":point(5477,0,0)}
pluto = {"location":point(0,3.7e12,0), "mass":1.3e22, "velocity":point(4748,0,0)}
# TODO: Add random sattellite here.
satellite_1 = {"location":point(1e5,3.7e5,0), "mass":1.7e1, "velocity":point(4748,0,0)}

if __name__ == "__main__":

    #build list of planets in the simulation, or create your own
    bodies = [
        body( location = sun["location"], mass = sun["mass"], velocity = sun["velocity"], name = "sun"),
        body( location = earth["location"], mass = earth["mass"], velocity = earth["velocity"], name = "earth"),
        body( location = mars["location"], mass = mars["mass"], velocity = mars["velocity"], name = "mars"),
        body( location = venus["location"], mass = venus["mass"], velocity = venus["velocity"], name = "venus"),
        body( location = mercury["location"], mass = mercury["mass"], velocity = mercury["velocity"], name = "mercury"),
        body( location = jupiter["location"], mass = jupiter["mass"], velocity = jupiter["velocity"], name = "jupiter"),
        body( location = saturn["location"], mass = saturn["mass"], velocity = saturn["velocity"], name = "saturn"),
        
        #body( location = satellite_1["location"], mass = satellite_1["mass"], velocity = satellite_1["velocity"], name = "sattellite_1")
        ]
    
    # Original defaults of simulation
    # motions = run_simulation(bodies, time_step = 100, number_of_steps = 80000, report_freq = 1000)
    # Try messing with report frequency to get more data.
    motions = run_simulation(bodies, time_step = 100, number_of_steps = 300000, report_freq = 100)
    plot_output(motions, outfile = 'orbits.png')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Take motions data from the above simulation and convert it to a Pandas dataframe.  The "motions" output is a list of python dictionaries that can be converted into a dataframe and then manipulated.

In [2]:
import pandas as pd
import numpy as np

motions_df = pd.DataFrame(motions)
motions_df.head(100)

Unnamed: 0,x,y,z,name
0,"[6.172247210875407, 49.37608388489302, 166.613...","[6056.974666438165, 24107.571980251058, 54150....","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",sun
1,"[2999802268.7576785, 5998418130.158717, 899466...","[149970049800.53976, 149880804272.02502, 14973...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",earth
2,"[2399949856.0043564, 4799598821.688435, 719864...","[219986083529.99896, 219944611139.58228, 21987...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",mars
3,"[3499415066.461402, 6995320910.112684, 1048421...","[109944304437.46532, 109778376868.05923, 10950...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",venus
4,"[4694355206.034602, 9354859805.633783, 1394777...","[56792631335.43017, 56175843268.08082, 5515329...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",mercury
5,"[1299999366.480654, 2599994931.4665866, 389998...","[769998863548.677, 769995476701.667, 769989839...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",jupiter
6,"[899999928.8208524, 1799999430.5240006, 269999...","[1399999647585.8357, 1399998597321.9666, 13999...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",saturn


In [3]:
# Trying to separate out each row of list or dataframe into its own dataframe.
# Will later put these dataframes back together into 1 large dataframe.

motions_df_list = []
for body in motions:
    motions_df_list.append(pd.DataFrame(body))

In [4]:
motions_df_list[3]

Unnamed: 0,x,y,z,name
0,3.499415e+09,1.099443e+11,0.0,venus
1,6.995321e+09,1.097784e+11,0.0,venus
2,1.048421e+10,1.095024e+11,0.0,venus
3,1.396259e+10,1.091166e+11,0.0,venus
4,1.742697e+10,1.086215e+11,0.0,venus
...,...,...,...,...
2994,-5.289650e+10,8.213631e+10,0.0,venus
2995,-5.030464e+10,8.447291e+10,0.0,venus
2996,-4.763994e+10,8.672745e+10,0.0,venus
2997,-4.490500e+10,8.889761e+10,0.0,venus


In [5]:
# Combine the dataframes into a single, large dataframe.
# Can later choose a planet to be the target we train to predict.
complete_motion_df = None

for body in motions_df_list:
    # Append name of body to each column and remove the name column
    body_name = body.loc[0, "name"]
    body.columns = [body_name + "_x",
                    body_name + "_y",
                    body_name + "_z",
                    "name"]
    # Add current body to the complete dataframe.
    complete_motion_df = pd.concat([complete_motion_df, body.iloc[:, 0:3]], axis=1)

complete_motion_df.head(100)

Unnamed: 0,sun_x,sun_y,sun_z,earth_x,earth_y,earth_z,mars_x,mars_y,mars_z,venus_x,...,venus_z,mercury_x,mercury_y,mercury_z,jupiter_x,jupiter_y,jupiter_z,saturn_x,saturn_y,saturn_z
0,6.172247e+00,6.056975e+03,0.0,2.999802e+09,1.499700e+11,0.0,2.399950e+09,2.199861e+11,0.0,3.499415e+09,...,0.0,4.694355e+09,5.679263e+10,0.0,1.299999e+09,7.699989e+11,0.0,8.999999e+08,1.400000e+12,0.0
1,4.937608e+01,2.410757e+04,0.0,5.998418e+09,1.498808e+11,0.0,4.799599e+09,2.199446e+11,0.0,6.995321e+09,...,0.0,9.354860e+09,5.617584e+10,0.0,2.599995e+09,7.699955e+11,0.0,1.799999e+09,1.399999e+12,0.0
2,1.666139e+02,5.415063e+04,0.0,8.994662e+09,1.497323e+11,0.0,7.198646e+09,2.198756e+11,0.0,1.048421e+10,...,0.0,1.394778e+10,5.515330e+10,0.0,3.899983e+09,7.699898e+11,0.0,2.699998e+09,1.399997e+12,0.0
3,3.948280e+02,9.618418e+04,0.0,1.198735e+10,1.495246e+11,0.0,9.596791e+09,2.197790e+11,0.0,1.396259e+10,...,0.0,1.843961e+10,5.373113e+10,0.0,5.199959e+09,7.699820e+11,0.0,3.599995e+09,1.399994e+12,0.0
4,7.708690e+02,1.502055e+05,0.0,1.497529e+10,1.492578e+11,0.0,1.199373e+10,2.196549e+11,0.0,1.742697e+10,...,0.0,2.279721e+10,5.191794e+10,0.0,6.499921e+09,7.699718e+11,0.0,4.499991e+09,1.399991e+12,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,4.138605e+06,5.394756e+07,0.0,1.441446e+11,-4.894462e+10,0.0,1.875072e+11,1.033965e+11,0.0,1.578892e+10,...,0.0,3.050288e+10,-4.133032e+10,0.0,1.242400e+11,7.596504e+11,0.0,8.633698e+10,1.396785e+12,0.0
96,4.259034e+06,5.506067e+07,0.0,1.431961e+11,-5.174263e+10,0.0,1.885864e+11,1.011770e+11,0.0,1.238219e+10,...,0.0,2.606635e+10,-4.407853e+10,0.0,1.255223e+11,7.594341e+11,0.0,8.723499e+10,1.396718e+12,0.0
97,4.381576e+06,5.618472e+07,0.0,1.421936e+11,-5.452104e+10,0.0,1.896398e+11,9.894374e+10,0.0,8.963735e+09,...,0.0,2.137176e+10,-4.638960e+10,0.0,1.268043e+11,7.592156e+11,0.0,8.813296e+10,1.396650e+12,0.0
98,4.506234e+06,5.731969e+07,0.0,1.411373e+11,-5.727882e+10,0.0,1.906674e+11,9.669701e+10,0.0,5.536788e+09,...,0.0,1.646399e+10,-4.823713e+10,0.0,1.280859e+11,7.589949e+11,0.0,8.903089e+10,1.396581e+12,0.0


In [6]:
complete_motion_df.shape

(2999, 21)

At this point, we have a single dataframe with all bodies and all positions with each time step as the index of our rows.

### Checking for any NA values that could be messing up optimizer calculations in Tensorflow

In [7]:
complete_motion_df.isnull().any()
# Doesn't look like any null or missing values.

sun_x        False
sun_y        False
sun_z        False
earth_x      False
earth_y      False
earth_z      False
mars_x       False
mars_y       False
mars_z       False
venus_x      False
venus_y      False
venus_z      False
mercury_x    False
mercury_y    False
mercury_z    False
jupiter_x    False
jupiter_y    False
jupiter_z    False
saturn_x     False
saturn_y     False
saturn_z     False
dtype: bool

# Try a Quick Neural Net for Predicting Jupiter's Position

Basic Regression with Tensorflow: https://www.tensorflow.org/tutorials/keras/regression


### Imports

In [8]:
# Python ≥3.5 is required
import sys
assert sys.version_info >= (3, 5)

# Scikit-Learn ≥0.20 is required
# Probably not needed since not using regressor or doing any feature engineering.
import sklearn
from sklearn.preprocessing import StandardScaler  # Scaler for normalizing data.
from sklearn.preprocessing import MinMaxScaler  # Scaler for normalizing data.
from sklearn.compose import ColumnTransformer   # Used to apply scaling and standardization per column in a dataframe.
assert sklearn.__version__ >= "0.20" 

# TensorFlow ≥2.0 is required
import tensorflow as tf
assert tf.__version__ >= "2.0"
# Recommended to enable eager execution when developing model.
# Processing data: https://www.youtube.com/watch?v=oFFbKogYdfc
# tf.enable_eager_execution()

# Import Keras
from tensorflow import keras
# Plot NN model
from tensorflow.keras.utils import plot_model


# to make this notebook's output stable across runs
#np.random.seed(42)

# Use sklearn for data processing


# Common imports
import numpy as np
import os

In [9]:
tf.__version__

'2.2.0'

In [10]:
keras.__version__

'2.3.0-tf'

## Randomize Data, Normalize Data, and Convert Pandas Dataframe to Numpy Arrays for Input to NN

Randomize rows in dataframe before training, validation, and test splitting of data.  Shuffle dataframe in place and reset index.\ 
https://stackoverflow.com/questions/29576430/shuffle-dataframe-rows

In [11]:
complete_motion_df = complete_motion_df.sample(frac=1).reset_index(drop=True)
complete_motion_df.head(10)

Unnamed: 0,sun_x,sun_y,sun_z,earth_x,earth_y,earth_z,mars_x,mars_y,mars_z,venus_x,...,venus_z,mercury_x,mercury_y,mercury_z,jupiter_x,jupiter_y,jupiter_z,saturn_x,saturn_y,saturn_z
0,8399934000.0,7316674000.0,0.0,-142230800000.0,-12588100000.0,0.0,-87952180000.0,203628900000.0,0.0,-30590190000.0,...,0.0,57633970000.0,-11438330000.0,0.0,318872100000.0,-649885500000.0,0.0,1105117000000.0,641299300000.0,0.0
1,6961340000.0,6887434000.0,0.0,113440300000.0,-102893000000.0,0.0,-194519800000.0,-38452390000.0,0.0,-2813562000.0,...,0.0,-35342540000.0,-23462580000.0,0.0,453256900000.0,-569964600000.0,0.0,1057962000000.0,745456900000.0,0.0
2,8984389000.0,7444292000.0,0.0,-100697600000.0,110529200000.0,0.0,17073130000.0,227181900000.0,0.0,-100158200000.0,...,0.0,4132690000.0,64225760000.0,0.0,262058000000.0,-673139700000.0,0.0,1121013000000.0,598997300000.0,0.0
3,9883581000.0,7592700000.0,0.0,87296650000.0,136327200000.0,0.0,163308000000.0,160430800000.0,0.0,85913120000.0,...,0.0,4414575000.0,64314910000.0,0.0,172951800000.0,-699119900000.0,0.0,1142160000000.0,533562400000.0,0.0
4,18797360000.0,5894679000.0,0.0,162597100000.0,-44064550000.0,0.0,13488040000.0,-192816700000.0,0.0,-73290350000.0,...,0.0,41853410000.0,-39787310000.0,0.0,-643384100000.0,-316693200000.0,0.0,1120558000000.0,-213046100000.0,0.0
5,7376933000.0,7028936000.0,0.0,25524450000.0,-145323700000.0,0.0,-199602300000.0,47036070000.0,0.0,99724640000.0,...,0.0,46387370000.0,47250690000.0,0.0,415379100000.0,-596518600000.0,0.0,1072852000000.0,715278700000.0,0.0
6,14696200000.0,7426983000.0,0.0,159834700000.0,49749190000.0,0.0,-166516700000.0,122493500000.0,0.0,-49505820000.0,...,0.0,68480660000.0,12789540000.0,0.0,-305604600000.0,-644893600000.0,0.0,1190583000000.0,164705800000.0,0.0
7,15067970.0,138302600.0,0.0,16616230000.0,-152367300000.0,0.0,202667700000.0,-40814030000.0,0.0,-110277600000.0,...,0.0,53570280000.0,-2272714000.0,0.0,199146700000.0,743109900000.0,0.0,139234500000.0,1391623000000.0,0.0
8,15017720000.0,7357603000.0,0.0,163840500000.0,-23697650000.0,0.0,-128437000000.0,170270900000.0,0.0,33933400000.0,...,0.0,-23224620000.0,-27584940000.0,0.0,-336018700000.0,-629471700000.0,0.0,1189663000000.0,138023500000.0,0.0
9,11435170000.0,7715802000.0,0.0,64775040000.0,-136034300000.0,0.0,179653500000.0,-106032800000.0,0.0,-91581260000.0,...,0.0,-37183400000.0,-12470830000.0,0.0,17047540000.0,-716931700000.0,0.0,1169653000000.0,419174700000.0,0.0


In [12]:
# Assuming last 3 columns in the dataframe are the target x,y, and z values.  
target = complete_motion_df.iloc[:,-3:]
# Drop target from main dataframe.
complete_motion_df.drop(complete_motion_df.iloc[:,-3:], axis = 1, inplace = True)
target.head(5)

Unnamed: 0,saturn_x,saturn_y,saturn_z
0,1105117000000.0,641299300000.0,0.0
1,1057962000000.0,745456900000.0,0.0
2,1121013000000.0,598997300000.0,0.0
3,1142160000000.0,533562400000.0,0.0
4,1120558000000.0,-213046100000.0,0.0


Split the x, y, and z coordinates out for the target to use a specific dataset for each possible coordinate output.

In [13]:
target_x = target.iloc[:,0]
target_y = target.iloc[:,1]
target_z = target.iloc[:,2]

Convert all pandas dataframes to numpy arrays so they are compatible with Tensorflow.

In [14]:
complete_motion_np = complete_motion_df.to_numpy()
target_np = target.to_numpy()
target_x_np = target_x.to_numpy()
target_y_np = target_y.to_numpy()
target_z_np = target_z.to_numpy()

Split into train, validation, and test datasets.

In [15]:
#Split into train, validation, and test sets.
# Setup train, validation, and test splits
DATASET_SIZE = len(complete_motion_df)
train_size = int(0.7 * DATASET_SIZE)
val_size = int(0.15 * DATASET_SIZE)
test_size = int(0.15 * DATASET_SIZE)

X_train, X_valid, X_test = complete_motion_np[:train_size], complete_motion_np[train_size:(train_size+val_size)], complete_motion_np[(train_size + val_size):]
y_train_x, y_valid_x, y_test_x = target_x_np[:train_size], target_x_np[train_size:(train_size+val_size)], target_x_np[(train_size + val_size):]
y_train_y, y_valid_y, y_test_y = target_y_np[:train_size], target_y_np[train_size:(train_size+val_size)], target_y_np[(train_size + val_size):]
y_train_z, y_valid_z, y_test_z = target_z_np[:train_size], target_z_np[train_size:(train_size+val_size)], target_z_np[(train_size + val_size):]

Use sklearn standard scaler to normalize all columns of the dataframe.  Could also provide numpy ndarrays as input, but most likely will later need to move to normalization methods supported by Tensorflow pipelines.\
https://stackoverflow.com/questions/24645153/pandas-dataframe-columns-scaling-with-sklearn \
https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html \
https://stackoverflow.com/questions/35723472/how-to-use-sklearn-fit-transform-with-pandas-and-return-dataframe-instead-of-num 

This is a pretty horrible way to do it since the fit_transform object returns a numpy ndarray.  Essentially converting from dataframe, to ndarray, and back to dataframe so I can be lazy and not rewrite the code to split out the data.  I apologize in advance for being a lazy bastard. 

I tried only scaling the input data, but the target data also needs to be in the range of the output activation function.  For example, if my output acivation function is only in the range of 0 to 1, then I have to have target values in the range of 0 to 1.  I'll need to use both the sklearn fit(), transform(), and inverse_transform() for the MinMaxScaler() to take the data in, normalize both the training and target values, train the model, then use the inverse_transform() to output the data on a normal scale. \
https://machinelearningmastery.com/how-to-improve-neural-network-stability-and-modeling-performance-with-data-scaling/

Found out later that data needs to be split up before performing Scaling or Standardization. \
https://towardsdatascience.com/preprocessing-with-sklearn-a-complete-and-comprehensive-guide-670cb98fcfb9
Use the ColumnTransformer to scale each column of the dataframe and save the scaler object for each column.  Can later be used to convert the data back to a normal space.

### Data Scaling Method

We will fit a column transformer to the training data only.  We will then use the transformer to tranform the validation and test data as well for the input data. 

We will make separate transformers for the output values so we can later call the inverse_transforms to get data back into our original coordinate system.

In [16]:
# We will create a transformer fit to the input training data.

# Using column transformer to scale each column of the dataframe independently.
train_cols_to_transform = list(range(0,np.size(X_train,1)))   #Create a list of all the columns in the numpy array
t = [('scale', MinMaxScaler(feature_range=(-1,1)), train_cols_to_transform)]

input_transformer = ColumnTransformer(transformers=t,
                                     remainder='passthrough')

# Fit the column transformer to the training data.
input_transformer.fit(X_train)

# Use the fitted transformer to scale the training data
X_train = input_transformer.transform(X_train)
# Use the transformer fitted to the training data to also scale the validation and test data
X_valid = input_transformer.transform(X_valid)
X_test = input_transformer.transform(X_test)

In [17]:
pd.DataFrame(X_train)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17
0,-0.266688,0.895480,-1.0,-0.942156,-0.094770,-1.0,-0.450111,0.888719,-1.0,-0.341876,-0.875485,-1.0,0.709740,-0.318078,-1.0,0.416301,-0.909797,-1.0
1,-0.392277,0.784280,-1.0,0.625552,-0.675863,-1.0,-0.934892,-0.249042,-1.0,-0.114655,0.989166,-1.0,-0.716349,-0.526326,-1.0,0.598515,-0.802300,-1.0
2,-0.215665,0.928541,-1.0,-0.687485,0.697463,-1.0,0.027654,0.999416,-1.0,-0.910964,0.210112,-1.0,-0.110871,0.992345,-1.0,0.339266,-0.941075,-1.0
3,-0.137166,0.966988,-1.0,0.465246,0.863468,-1.0,0.692884,0.685692,-1.0,0.611157,0.736774,-1.0,-0.106548,0.993889,-1.0,0.218445,-0.976019,-1.0
4,0.641005,0.527093,-1.0,0.926968,-0.297315,-1.0,0.011346,-0.974540,-1.0,-0.691176,-0.523025,-1.0,0.467695,-0.809053,-1.0,-0.888434,-0.461641,-1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2094,0.996928,-0.470716,-1.0,0.961757,-0.289613,-1.0,-0.896154,-0.055073,-1.0,-0.670172,0.504580,-1.0,-0.495535,0.511966,-1.0,-0.780987,0.618941,-1.0
2095,-0.817940,-0.018814,-1.0,-0.977095,0.144800,-1.0,-0.974726,-0.182932,-1.0,0.297370,-0.882809,-1.0,0.289526,0.794733,-1.0,0.999972,0.002305,-1.0
2096,0.135291,0.986892,-1.0,-0.872761,0.333888,-1.0,-0.472516,-0.830602,-1.0,0.916990,-0.078239,-1.0,0.638120,-0.560102,-1.0,-0.206502,-0.978531,-1.0
2097,-0.872027,-0.202725,-1.0,0.229662,-0.932464,-1.0,-0.110991,-0.985461,-1.0,-0.568118,0.808253,-1.0,-0.428685,-0.894000,-1.0,0.980884,0.188800,-1.0


In [18]:
#Save copy of y_valid_x to later check inverse transform
y_valid_x_copy = y_valid_x.copy()
y_train_x_copy = y_train_x.copy()

scaled_range = (-1000,1000)
# Next, we create scalers for each target value so that we can later use the inverse transform.
# We go ahead and scale the training data.  We will use the created scalers in the next cell to scale the validation and test data.
target_x_scaler = MinMaxScaler(feature_range=scaled_range)   # Create the transformer
y_train_x = target_x_scaler.fit_transform(y_train_x.reshape(-1,1))   # Fit the transformer to the training data and transform the data at the same time.
target_y_scaler = MinMaxScaler(feature_range=scaled_range)   # Create the transformer
y_train_y = target_y_scaler.fit_transform(y_train_y.reshape(-1,1))   # Fit the transformer to the training data and transform the data at the same time.
target_z_scaler = MinMaxScaler(feature_range=scaled_range)   # Create the transformer
y_train_z = target_z_scaler.fit_transform(y_train_z.reshape(-1,1))   # Fit the transformer to the training data and transform the data at the same time.


In [19]:
# Use scalers from previous block to scale validation and test data.

# Scale validation data
y_valid_x = target_x_scaler.fit_transform(y_valid_x.reshape(-1,1))
y_valid_y = target_y_scaler.fit_transform(y_valid_y.reshape(-1,1))
y_valid_z = target_z_scaler.fit_transform(y_valid_z.reshape(-1,1))

# Scale test data
y_test_x = target_x_scaler.fit_transform(y_test_x.reshape(-1,1))
y_test_y = target_y_scaler.fit_transform(y_test_y.reshape(-1,1))
y_test_z = target_z_scaler.fit_transform(y_test_z.reshape(-1,1))

In [20]:
# For shits and giggles.  Let's try and see if inverse_transform works.
pd.DataFrame(y_train_x_copy)

Unnamed: 0,0
0,1.105117e+12
1,1.057962e+12
2,1.121013e+12
3,1.142160e+12
4,1.120558e+12
...,...
2094,6.693107e+11
2095,7.509007e+11
2096,1.186206e+12
2097,6.728040e+11


In [21]:
# Try inverse_transform on one of the datasets.
pd.DataFrame(target_x_scaler.inverse_transform(y_train_x))

Unnamed: 0,0
0,1.105902e+12
1,1.059213e+12
2,1.121641e+12
3,1.142579e+12
4,1.121190e+12
...,...
2094,6.744034e+11
2095,7.551869e+11
2096,1.186189e+12
2097,6.778621e+11


In [None]:
# Create an sklearn scaler and fit our data to the scaler with a range of -1 to 1
###scaler = MinMaxScaler(feature_range=(-1,1))
# Fit the scaler to the provided data set.
###scaler.fit(complete_motion_df.values)
# Apply the transformation.  Create Numpy ndarrays with scaled data from dataframe.  Indexes and column labels don't survive.
###scaled_data = StandardScaler().fit_transform(complete_motion_df.values)
# Overwrite previous dataframe with scaled data and carry indexes and column labels over.
###complete_motion_df = pd.DataFrame(scaled_data, index=complete_motion_df.index, columns=complete_motion_df.columns)




# Create Numpy ndarrays with scaled data from dataframe.  Indexes and column labels don't survive.
#scaled_data = StandardScaler().fit_transform(complete_motion_df.values)
# Overwrite previous dataframe with scaled data and carry indexes and column labels over.
#complete_motion_df = pd.DataFrame(scaled_data, index=complete_motion_df.index, columns=complete_motion_df.columns)
complete_motion_df.head(100)

## Try Creating Single Input, Multiple Output Regression Model

Trying to create a regression NN where instead of designating an output layer of 3 nodes, 3 output layers of a single node are used to designate specific datasets and loss functions.  Still need to figure out later how to get a 3 node output to correspond to the input training data.

Use functional API to build basic NN architecture.

In [None]:
# Use functional API to build basic NN architecture.
input_main = keras.layers.Input(shape=complete_motion_np.shape[1:])
hidden1 = keras.layers.Dense(300, activation="tanh")(input_main)
hidden2 = keras.layers.Dense(300, activation="tanh")(hidden1)
output_x = keras.layers.Dense(1, name="output_x")(hidden2)
output_y = keras.layers.Dense(1, name="output_y")(hidden2)
output_z = keras.layers.Dense(1, name="output_z")(hidden2)

Create model with specified input and output layers

In [None]:
# Create model with specified input and output layers
model = keras.Model(inputs=[input_main], outputs=[output_x, output_y, output_z])
model.summary()

In [None]:
# Compile model with specified loss functions for each output and specify weighting to provide each output.
# Weighting X and Y output more than Z
model.compile(loss=["mse", "mse", "mse"], 
              loss_weights=[0.4, 0.4, 0.2], 
              optimizer=keras.optimizers.Adam(learning_rate=1e-3),
              metrics=["mse"])

Train the model with separate x, y, z training sets.

In [None]:
history = model.fit(
    [X_train], [y_train_x, y_train_y, y_train_z],
    epochs=200,
    validation_data=([X_valid], [y_valid_x, y_valid_y, y_valid_z])
)

In [None]:
# Convert training history to dataframe for analysis and plotting.
complete_history_data = pd.DataFrame(history.history)
complete_history_data.head(-9)

In [None]:
import matplotlib.pyplot as plt

In [None]:
# Create figure of subplots to plot total loss, x coordinate loss, y coordinate loss, and z coordinate MSEs.
fig2, mse_plots = plt.subplots(2,2)


#plot losses in each quadrant of the figure.
mse_plots[0][0].plot(complete_history_data[["output_x_mse", "val_output_x_mse"]])
#mse_plots[0][0].set_ylim(0,1)

mse_plots[0][1].plot(complete_history_data[["output_y_mse", "val_output_y_mse"]])
#mse_plots[0][1].set_ylim(0,1)

mse_plots[1][0].plot(complete_history_data[["output_z_mse", "val_output_z_mse"]])
#mse_plots[1][0].set_ylim(0,1)

plt.show()


In [None]:
# Create figure of subplots to plot total loss, x coordinate loss, y coordinate loss, and z coordinate loss.
fig, loss_plots = plt.subplots(2,2)


#plot losses in each quadrant of the figure.
loss_plots[0][0].plot(complete_history_data[["loss", "val_loss"]])
#loss_plots[0][0].set_ylim(0,1)

loss_plots[0][1].plot(complete_history_data[["output_x_loss", "val_output_x_loss"]])
#loss_plots[0][1].set_ylim(0,1)

loss_plots[1][0].plot(complete_history_data[["output_y_loss", "val_output_y_loss"]])
#loss_plots[1][0].set_ylim(0,1)

loss_plots[1][1].plot(complete_history_data[["output_z_loss", "val_output_z_loss"]])
#loss_plots[1][1].set_ylim(0,1)


plt.show()


### Evaluate the Model with Test Data

In [None]:
y_test_x.shape

In [None]:
model.evaluate([X_test],[y_test_x, y_test_y, y_test_z])

### Predict Values and Inspect Differences

In [None]:
x_pred, y_pred, z_pred = model.predict([X_test])

In [None]:
scaler.inverse_transform(scaled_data)

Take predicted and model values and scale them back to the real coordinate system using the sklearn inverse_transform.

In [None]:
# Inverse transform the predictions
x_pred = scaler.inverse_transform(x_pred)
y_pred = scaler.inverse_transform(y_pred)
z_pred = scaler.inverse_transform(z_pred)
# Inverse transform the model target values.  The benchmark for reality.
y_test_x = scaler.inverse_transform(y_test_x)
y_test_y = scaler.inverse_transform(y_test_y)
y_test_z = scaler.inverse_transform(y_test_z)

In [None]:
pred_model_comparison = pd.DataFrame(data=np.concatenate((x_pred, y_test_x.reshape(-1,1), y_pred, y_test_y.reshape(-1,1), z_pred, y_test_z.reshape(-1,1)), axis=1),
                                    columns=['pred_x', 'model_x', 'pred_y', 'model_y', 'pred_z', 'model_z'])
pred_model_comparison.head(10)

In [None]:
pred_model_comparison[["pred_x", "model_x"]].plot()

In [None]:
pred_model_comparison[["pred_y", "model_y"]].plot()

In [None]:
pred_model_comparison[["pred_z", "model_z"]].plot()