# 2D Planet Simulator to Generate Data for ML Inferred Physics

Author: Craig Boger
06/01/2020

Looks like the simulator is 2D, but can be changed to 3D if needed.

This is a script to generate some quick simulated data for orbiting objects in a 2D space to fead into a neural network to predict the next position of a body traveling through a system.

## Straight Up Just Stealing Someone's Code and Trying to Run It

Credit to benrules2: https://gist.github.com/benrules2/220d56ea6fe9a85a4d762128b11adfba

In [1]:
import math
import random
%matplotlib widget
import matplotlib.pyplot as plot
from mpl_toolkits.mplot3d import Axes3D

class point:
    def __init__(self, x,y,z):
        self.x = x
        self.y = y
        self.z = z

class body:
    def __init__(self, location, mass, velocity, name = ""):
        self.location = location
        self.mass = mass
        self.velocity = velocity
        self.name = name

def calculate_single_body_acceleration(bodies, body_index):
    G_const = 6.67408e-11 #m3 kg-1 s-2
    acceleration = point(0,0,0)
    target_body = bodies[body_index]
    for index, external_body in enumerate(bodies):
        if index != body_index:
            r = (target_body.location.x - external_body.location.x)**2 + (target_body.location.y - external_body.location.y)**2 + (target_body.location.z - external_body.location.z)**2
            r = math.sqrt(r)
            tmp = G_const * external_body.mass / r**3
            acceleration.x += tmp * (external_body.location.x - target_body.location.x)
            acceleration.y += tmp * (external_body.location.y - target_body.location.y)
            acceleration.z += tmp * (external_body.location.z - target_body.location.z)

    return acceleration

def compute_velocity(bodies, time_step = 1):
    for body_index, target_body in enumerate(bodies):
        acceleration = calculate_single_body_acceleration(bodies, body_index)

        target_body.velocity.x += acceleration.x * time_step
        target_body.velocity.y += acceleration.y * time_step
        target_body.velocity.z += acceleration.z * time_step 


def update_location(bodies, time_step = 1):
    for target_body in bodies:
        target_body.location.x += target_body.velocity.x * time_step
        target_body.location.y += target_body.velocity.y * time_step
        target_body.location.z += target_body.velocity.z * time_step

def compute_gravity_step(bodies, time_step = 1):
    compute_velocity(bodies, time_step = time_step)
    update_location(bodies, time_step = time_step)

def plot_output(bodies, outfile = None):
    fig = plot.figure()
    colours = ['r','b','g','y','m','c']
    ax = fig.add_subplot(1,1,1, projection='3d')
    max_range = 0
    for current_body in bodies: 
        max_dim = max(max(current_body["x"]),max(current_body["y"]),max(current_body["z"]))
        if max_dim > max_range:
            max_range = max_dim
        ax.plot(current_body["x"], current_body["y"], current_body["z"], c = random.choice(colours), label = current_body["name"])        
    
    ax.set_xlim([-max_range,max_range])    
    ax.set_ylim([-max_range,max_range])
    ax.set_zlim([-max_range,max_range])
    ax.legend()        

    if outfile:
        plot.savefig(outfile)
    else:
        plot.show()

def run_simulation(bodies, names = None, time_step = 1, number_of_steps = 10000, report_freq = 100):

    #create output container for each body
    body_locations_hist = []
    for current_body in bodies:
        body_locations_hist.append({"x":[], "y":[], "z":[], "name":current_body.name})
        
    for i in range(1,number_of_steps):
        compute_gravity_step(bodies, time_step = 1000)            
        
        if i % report_freq == 0:
            for index, body_location in enumerate(body_locations_hist):
                body_location["x"].append(bodies[index].location.x)
                body_location["y"].append(bodies[index].location.y)           
                body_location["z"].append(bodies[index].location.z)       

    return body_locations_hist        
            
#planet data (location (m), mass (kg), velocity (m/s)
sun = {"location":point(0,0,0), "mass":2e30, "velocity":point(0,0,0)}
mercury = {"location":point(0,5.7e10,0), "mass":3.285e23, "velocity":point(47000,0,0)}
venus = {"location":point(0,1.1e11,0), "mass":4.8e24, "velocity":point(35000,0,0)}
earth = {"location":point(0,1.5e11,0), "mass":6e24, "velocity":point(30000,0,0)}
mars = {"location":point(0,2.2e11,0), "mass":2.4e24, "velocity":point(24000,0,0)}
jupiter = {"location":point(0,7.7e11,0), "mass":1e28, "velocity":point(13000,0,0)}
saturn = {"location":point(0,1.4e12,0), "mass":5.7e26, "velocity":point(9000,0,0)}
uranus = {"location":point(0,2.8e12,0), "mass":8.7e25, "velocity":point(6835,0,0)}
neptune = {"location":point(0,4.5e12,0), "mass":1e26, "velocity":point(5477,0,0)}
pluto = {"location":point(0,3.7e12,0), "mass":1.3e22, "velocity":point(4748,0,0)}
# TODO: Add random sattellite here.
satellite_1 = {"location":point(1e5,3.7e5,0), "mass":1.7e1, "velocity":point(4748,0,0)}

if __name__ == "__main__":

    #build list of planets in the simulation, or create your own
    bodies = [
        body( location = sun["location"], mass = sun["mass"], velocity = sun["velocity"], name = "sun"),
        body( location = earth["location"], mass = earth["mass"], velocity = earth["velocity"], name = "earth"),
        body( location = mars["location"], mass = mars["mass"], velocity = mars["velocity"], name = "mars"),
        body( location = venus["location"], mass = venus["mass"], velocity = venus["velocity"], name = "venus"),
        body( location = mercury["location"], mass = mercury["mass"], velocity = mercury["velocity"], name = "mercury"),
        body( location = jupiter["location"], mass = jupiter["mass"], velocity = jupiter["velocity"], name = "jupiter"),
        body( location = saturn["location"], mass = saturn["mass"], velocity = saturn["velocity"], name = "saturn"),
        
        #body( location = satellite_1["location"], mass = satellite_1["mass"], velocity = satellite_1["velocity"], name = "sattellite_1")
        ]
    
    # Original defaults of simulation
    # motions = run_simulation(bodies, time_step = 100, number_of_steps = 80000, report_freq = 1000)
    # Try messing with report frequency to get more data.
    motions = run_simulation(bodies, time_step = 100, number_of_steps = 300000, report_freq = 100)
    plot_output(motions, outfile = 'orbits.png')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Take motions data from the above simulation and convert it to a Pandas dataframe.  The "motions" output is a list of python dictionaries that can be converted into a dataframe and then manipulated.

In [2]:
import pandas as pd
import numpy as np

motions_df = pd.DataFrame(motions)
motions_df.head(100)

Unnamed: 0,x,y,z,name
0,"[6.172247210875407, 49.37608388489302, 166.613...","[6056.974666438165, 24107.571980251058, 54150....","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",sun
1,"[2999802268.7576785, 5998418130.158717, 899466...","[149970049800.53976, 149880804272.02502, 14973...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",earth
2,"[2399949856.0043564, 4799598821.688435, 719864...","[219986083529.99896, 219944611139.58228, 21987...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",mars
3,"[3499415066.461402, 6995320910.112684, 1048421...","[109944304437.46532, 109778376868.05923, 10950...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",venus
4,"[4694355206.034602, 9354859805.633783, 1394777...","[56792631335.43017, 56175843268.08082, 5515329...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",mercury
5,"[1299999366.480654, 2599994931.4665866, 389998...","[769998863548.677, 769995476701.667, 769989839...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",jupiter
6,"[899999928.8208524, 1799999430.5240006, 269999...","[1399999647585.8357, 1399998597321.9666, 13999...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",saturn


In [3]:
# Trying to separate out each row of list or dataframe into its own dataframe.
# Will later put these dataframes back together into 1 large dataframe.

motions_df_list = []
for body in motions:
    motions_df_list.append(pd.DataFrame(body))

In [4]:
motions_df_list[3]

Unnamed: 0,x,y,z,name
0,3.499415e+09,1.099443e+11,0.0,venus
1,6.995321e+09,1.097784e+11,0.0,venus
2,1.048421e+10,1.095024e+11,0.0,venus
3,1.396259e+10,1.091166e+11,0.0,venus
4,1.742697e+10,1.086215e+11,0.0,venus
...,...,...,...,...
2994,-5.289650e+10,8.213631e+10,0.0,venus
2995,-5.030464e+10,8.447291e+10,0.0,venus
2996,-4.763994e+10,8.672745e+10,0.0,venus
2997,-4.490500e+10,8.889761e+10,0.0,venus


In [5]:
# Combine the dataframes into a single, large dataframe.
# Can later choose a planet to be the target we train to predict.
complete_motion_df = None

for body in motions_df_list:
    # Append name of body to each column and remove the name column
    body_name = body.loc[0, "name"]
    body.columns = [body_name + "_x",
                    body_name + "_y",
                    body_name + "_z",
                    "name"]
    # Add current body to the complete dataframe.
    complete_motion_df = pd.concat([complete_motion_df, body.iloc[:, 0:3]], axis=1)

complete_motion_df.head(100)

Unnamed: 0,sun_x,sun_y,sun_z,earth_x,earth_y,earth_z,mars_x,mars_y,mars_z,venus_x,...,venus_z,mercury_x,mercury_y,mercury_z,jupiter_x,jupiter_y,jupiter_z,saturn_x,saturn_y,saturn_z
0,6.172247e+00,6.056975e+03,0.0,2.999802e+09,1.499700e+11,0.0,2.399950e+09,2.199861e+11,0.0,3.499415e+09,...,0.0,4.694355e+09,5.679263e+10,0.0,1.299999e+09,7.699989e+11,0.0,8.999999e+08,1.400000e+12,0.0
1,4.937608e+01,2.410757e+04,0.0,5.998418e+09,1.498808e+11,0.0,4.799599e+09,2.199446e+11,0.0,6.995321e+09,...,0.0,9.354860e+09,5.617584e+10,0.0,2.599995e+09,7.699955e+11,0.0,1.799999e+09,1.399999e+12,0.0
2,1.666139e+02,5.415063e+04,0.0,8.994662e+09,1.497323e+11,0.0,7.198646e+09,2.198756e+11,0.0,1.048421e+10,...,0.0,1.394778e+10,5.515330e+10,0.0,3.899983e+09,7.699898e+11,0.0,2.699998e+09,1.399997e+12,0.0
3,3.948280e+02,9.618418e+04,0.0,1.198735e+10,1.495246e+11,0.0,9.596791e+09,2.197790e+11,0.0,1.396259e+10,...,0.0,1.843961e+10,5.373113e+10,0.0,5.199959e+09,7.699820e+11,0.0,3.599995e+09,1.399994e+12,0.0
4,7.708690e+02,1.502055e+05,0.0,1.497529e+10,1.492578e+11,0.0,1.199373e+10,2.196549e+11,0.0,1.742697e+10,...,0.0,2.279721e+10,5.191794e+10,0.0,6.499921e+09,7.699718e+11,0.0,4.499991e+09,1.399991e+12,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,4.138605e+06,5.394756e+07,0.0,1.441446e+11,-4.894462e+10,0.0,1.875072e+11,1.033965e+11,0.0,1.578892e+10,...,0.0,3.050288e+10,-4.133032e+10,0.0,1.242400e+11,7.596504e+11,0.0,8.633698e+10,1.396785e+12,0.0
96,4.259034e+06,5.506067e+07,0.0,1.431961e+11,-5.174263e+10,0.0,1.885864e+11,1.011770e+11,0.0,1.238219e+10,...,0.0,2.606635e+10,-4.407853e+10,0.0,1.255223e+11,7.594341e+11,0.0,8.723499e+10,1.396718e+12,0.0
97,4.381576e+06,5.618472e+07,0.0,1.421936e+11,-5.452104e+10,0.0,1.896398e+11,9.894374e+10,0.0,8.963735e+09,...,0.0,2.137176e+10,-4.638960e+10,0.0,1.268043e+11,7.592156e+11,0.0,8.813296e+10,1.396650e+12,0.0
98,4.506234e+06,5.731969e+07,0.0,1.411373e+11,-5.727882e+10,0.0,1.906674e+11,9.669701e+10,0.0,5.536788e+09,...,0.0,1.646399e+10,-4.823713e+10,0.0,1.280859e+11,7.589949e+11,0.0,8.903089e+10,1.396581e+12,0.0


In [6]:
complete_motion_df.shape

(2999, 21)

At this point, we have a single dataframe with all bodies and all positions with each time step as the index of our rows.

### Checking for any NA values that could be messing up optimizer calculations in Tensorflow

In [7]:
complete_motion_df.isnull().any()
# Doesn't look like any null or missing values.

sun_x        False
sun_y        False
sun_z        False
earth_x      False
earth_y      False
earth_z      False
mars_x       False
mars_y       False
mars_z       False
venus_x      False
venus_y      False
venus_z      False
mercury_x    False
mercury_y    False
mercury_z    False
jupiter_x    False
jupiter_y    False
jupiter_z    False
saturn_x     False
saturn_y     False
saturn_z     False
dtype: bool

# Try a Quick Neural Net for Predicting Jupiter's Position

Basic Regression with Tensorflow: https://www.tensorflow.org/tutorials/keras/regression


### Imports

In [8]:
# Python ≥3.5 is required
import sys
assert sys.version_info >= (3, 5)

# Scikit-Learn ≥0.20 is required
# Probably not needed since not using regressor or doing any feature engineering.
import sklearn
from sklearn.preprocessing import StandardScaler  # Scaler for normalizing data.
from sklearn.preprocessing import MinMaxScaler  # Scaler for normalizing data.
assert sklearn.__version__ >= "0.20"

# TensorFlow ≥2.0 is required
import tensorflow as tf
assert tf.__version__ >= "2.0"
# Recommended to enable eager execution when developing model.
# Processing data: https://www.youtube.com/watch?v=oFFbKogYdfc
# tf.enable_eager_execution()

# Import Keras
from tensorflow import keras

# to make this notebook's output stable across runs
#np.random.seed(42)

# Use sklearn for data processing


# Common imports
import numpy as np
import os

In [9]:
tf.__version__

'2.2.0'

In [10]:
keras.__version__

'2.3.0-tf'

## Randomize Data, Normalize Data, and Convert Pandas Dataframe to Numpy Arrays for Input to NN

Randomize rows in dataframe before training, validation, and test splitting of data.  Shuffle dataframe in place and reset index.\ 
https://stackoverflow.com/questions/29576430/shuffle-dataframe-rows

In [11]:
complete_motion_df = complete_motion_df.sample(frac=1).reset_index(drop=True)
complete_motion_df.head(10)

Unnamed: 0,sun_x,sun_y,sun_z,earth_x,earth_y,earth_z,mars_x,mars_y,mars_z,venus_x,...,venus_z,mercury_x,mercury_y,mercury_z,jupiter_x,jupiter_y,jupiter_z,saturn_x,saturn_y,saturn_z
0,7327582000.0,7012888000.0,0.0,37057180000.0,-143485600000.0,0.0,-200825600000.0,37048420000.0,0.0,91224640000.0,...,0.0,31106510000.0,58457450000.0,0.0,419922700000.0,-593517500000.0,0.0,1071140000000.0,718856600000.0,0.0
1,13567160.0,127983300.0,0.0,33959710000.0,-149441000000.0,0.0,205893000000.0,-25861980000.0,0.0,-110775800000.0,...,0.0,48561570000.0,26488450000.0,0.0,191609100000.0,745141300000.0,0.0,133864200000.0,1392258000000.0,0.0
2,22602790000.0,2466220000.0,0.0,62755930000.0,147062000000.0,0.0,-81646750000.0,-168424500000.0,0.0,63510780000.0,...,0.0,-22950530000.0,-23283150000.0,0.0,-624130600000.0,398768300000.0,0.0,748052000000.0,-737070300000.0,0.0
3,1876543000.0,3562609000.0,0.0,-131646500000.0,-70176690000.0,0.0,-171500700000.0,-102702300000.0,0.0,112054600000.0,...,0.0,-33505610000.0,-34063760000.0,0.0,747714000000.0,72137980000.0,0.0,726811200000.0,1147560000000.0,0.0
4,14201230000.0,7519172000.0,0.0,90581440000.0,136840700000.0,0.0,-194372200000.0,32940440000.0,0.0,-93801530000.0,...,0.0,-37354470000.0,-5082980000.0,0.0,-258064100000.0,-665608600000.0,0.0,1190855000000.0,205107100000.0,0.0
5,10245130000.0,7636535000.0,0.0,143143500000.0,79056900000.0,0.0,200299100000.0,105568600000.0,0.0,121229000000.0,...,0.0,48805340000.0,-26940920000.0,0.0,136758700000.0,-706288000000.0,0.0,1149567000000.0,507106400000.0,0.0
6,19578810000.0,5410729000.0,0.0,-31222610000.0,-139292200000.0,0.0,-162506000000.0,-86822370000.0,0.0,63526830000.0,...,0.0,-22788620000.0,-24858200000.0,0.0,-686462200000.0,-215065400000.0,0.0,1086091000000.0,-298750300000.0,0.0
7,19941030000.0,5158676000.0,0.0,-117688900000.0,-60450030000.0,0.0,-189095400000.0,13182370000.0,0.0,130132500000.0,...,0.0,73336440000.0,15077270000.0,0.0,-702174800000.0,-162269800000.0,0.0,1066223000000.0,-340890400000.0,0.0
8,22724160000.0,2283544000.0,0.0,145111500000.0,90200860000.0,0.0,-147098900000.0,-109295600000.0,0.0,-46651180000.0,...,0.0,65162300000.0,38521090000.0,0.0,-600456400000.0,436741600000.0,0.0,715671000000.0,-762123800000.0,0.0
9,7488408000.0,7064440000.0,0.0,-710359900.0,-146160300000.0,0.0,-195135400000.0,69141610000.0,0.0,113346600000.0,...,0.0,61240940000.0,7173277000.0,0.0,405074000000.0,-603149000000.0,0.0,1076662000000.0,707201800000.0,0.0


Use sklearn standard scaler to normalize all columns of the dataframe.  Could also provide numpy ndarrays as input, but most likely will later need to move to normalization methods supported by Tensorflow pipelines.\
https://stackoverflow.com/questions/24645153/pandas-dataframe-columns-scaling-with-sklearn \
https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html \
https://stackoverflow.com/questions/35723472/how-to-use-sklearn-fit-transform-with-pandas-and-return-dataframe-instead-of-num \

This is a pretty horrible way to do it since the fit_transform object returns a numpy ndarray.  Essentially converting from dataframe, to ndarray, and back to dataframe so I can be lazy and not rewrite the code to split out the data.  I apologize in advance for being a lazy bastard.

In [12]:
# Create Numpy ndarrays with scaled data from dataframe.  Indexes and column labels don't survive.
scaled_data = MinMaxScaler().fit_transform(complete_motion_df.values)
# Overwrite previous dataframe with scaled data and carry indexes and column labels over.
complete_motion_df = pd.DataFrame(scaled_data, index=complete_motion_df.index, columns=complete_motion_df.columns)
complete_motion_df.head(100)

Unnamed: 0,sun_x,sun_y,sun_z,earth_x,earth_y,earth_z,mars_x,mars_y,mars_z,venus_x,...,venus_z,mercury_x,mercury_y,mercury_z,jupiter_x,jupiter_y,jupiter_z,saturn_x,saturn_y,saturn_z
0,0.319848,0.908390,0.0,0.578602,0.031466,0.0,0.018219,0.552880,0.0,0.827254,...,0.0,0.651429,0.946136,0.0,0.776658,0.083010,0.0,0.899350,0.690814,0.0
1,0.000592,0.016577,0.0,0.569106,0.012305,0.0,0.943303,0.405052,0.0,0.001090,...,0.0,0.785293,0.669483,0.0,0.621872,0.983283,0.0,0.111733,0.996486,0.0
2,0.986609,0.319453,0.0,0.657390,0.966272,0.0,0.289292,0.070056,0.0,0.713907,...,0.0,0.236861,0.238770,0.0,0.068836,0.750341,0.0,0.627851,0.029936,0.0
3,0.081911,0.461470,0.0,0.061387,0.267330,0.0,0.084919,0.224492,0.0,0.912447,...,0.0,0.155913,0.145477,0.0,0.998886,0.530676,0.0,0.610002,0.885412,0.0
4,0.619882,0.973970,0.0,0.742698,0.933386,0.0,0.032897,0.543227,0.0,0.070514,...,0.0,0.126396,0.396271,0.0,0.317013,0.034527,0.0,0.999950,0.457611,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,0.207457,0.759828,0.0,0.015405,0.537381,0.0,0.923807,0.313070,0.0,0.494505,...,0.0,0.036280,0.529639,0.0,0.921616,0.229920,0.0,0.803630,0.777218,0.0
96,0.104379,0.530377,0.0,0.234693,0.920383,0.0,0.034686,0.671104,0.0,0.088947,...,0.0,0.019180,0.536922,0.0,0.998646,0.460822,0.0,0.657248,0.864143,0.0
97,0.433748,0.984407,0.0,0.763603,0.910800,0.0,0.861266,0.825623,0.0,0.844792,...,0.0,0.590264,0.985005,0.0,0.605604,0.011199,0.0,0.959982,0.604933,0.0
98,0.112245,0.552445,0.0,0.453252,0.989023,0.0,0.102209,0.798073,0.0,0.017749,...,0.0,0.703305,0.855995,0.0,0.996480,0.438467,0.0,0.671938,0.856970,0.0


In [13]:
# Assuming last 3 columns in the dataframe are the target x,y, and z values.  
target = complete_motion_df.iloc[:,-3:]
# Drop target from main dataframe.
complete_motion_df.drop(complete_motion_df.iloc[:,-3:], axis = 1, inplace = True)
target.head(5)

Unnamed: 0,saturn_x,saturn_y,saturn_z
0,0.89935,0.690814,0.0
1,0.111733,0.996486,0.0
2,0.627851,0.029936,0.0
3,0.610002,0.885412,0.0
4,0.99995,0.457611,0.0


Split the x, y, and z coordinates out for the target to use a specific dataset for each possible coordinate output.

In [14]:
target_x = target.iloc[:,0]
target_y = target.iloc[:,1]
target_z = target.iloc[:,2]

Convert all pandas dataframes to numpy arrays.

In [15]:
complete_motion_np = complete_motion_df.to_numpy()
target_np = target.to_numpy()
target_x_np = target_x.to_numpy()
target_y_np = target_y.to_numpy()
target_z_np = target_z.to_numpy()

Split into train, validation, and test datasets.

In [16]:
#Split into train, validation, and test sets.
# Setup train, validation, and test splits
DATASET_SIZE = len(complete_motion_df)
train_size = int(0.7 * DATASET_SIZE)
val_size = int(0.15 * DATASET_SIZE)
test_size = int(0.15 * DATASET_SIZE)

X_train, X_valid, X_test = complete_motion_np[:train_size], complete_motion_np[train_size:(train_size+val_size)], complete_motion_np[(train_size + val_size):]
y_train_x, y_valid_x, y_test_x = target_x_np[:train_size], target_x_np[train_size:(train_size+val_size)], target_x_np[(train_size + val_size):]
y_train_y, y_valid_y, y_test_y = target_y_np[:train_size], target_y_np[train_size:(train_size+val_size)], target_y_np[(train_size + val_size):]
y_train_z, y_valid_z, y_test_z = target_z_np[:train_size], target_z_np[train_size:(train_size+val_size)], target_z_np[(train_size + val_size):]

In [17]:
#Block to output and troubleshoot datatypes.  For some reason, getting nan with model loss calculation.
type(X_train[0, 0])
# y_train_x

numpy.float64

## Try Creating Single Input, Multiple Output Regression Model

Trying to create a regression NN where instead of designating an output layer of 3 nodes, 3 output layers of a single node are used to designate specific datasets and loss functions.  Still need to figure out later how to get a 3 node output to correspond to the input training data.

Use functional API to build basic NN architecture.

In [18]:
# Use functional API to build basic NN architecture.
input_main = keras.layers.Input(shape=complete_motion_np.shape[1:])
hidden1 = keras.layers.Dense(300, activation="tanh")(input_main)
hidden2 = keras.layers.Dense(300, activation="tanh")(hidden1)
output_x = keras.layers.Dense(1, name="output_x")(hidden2)
output_y = keras.layers.Dense(1, name="output_y")(hidden2)
output_z = keras.layers.Dense(1, name="output_z")(hidden2)

Create model with specified input and output layers

In [19]:
# Create model with specified input and output layers
model = keras.Model(inputs=[input_main], outputs=[output_x, output_y, output_z])
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 18)]         0                                            
__________________________________________________________________________________________________
dense (Dense)                   (None, 300)          5700        input_1[0][0]                    
__________________________________________________________________________________________________
dense_1 (Dense)                 (None, 300)          90300       dense[0][0]                      
__________________________________________________________________________________________________
output_x (Dense)                (None, 1)            301         dense_1[0][0]                    
______________________________________________________________________________________________

In [20]:
# Compile model with specified loss functions for each output and specify weighting to provide each output.
# Weighting X and Y output more than Z
model.compile(loss=["mse", "mse", "mse"], 
              loss_weights=[0.4, 0.4, 0.2], 
              optimizer=keras.optimizers.Adam(learning_rate=1e-4),
              metrics=["mse"])

Train the model with separate x, y, z training sets.

In [21]:
history = model.fit(
    [X_train], [y_train_x, y_train_y, y_train_z],
    epochs=1000,
    validation_data=([X_valid], [y_valid_x, y_valid_y, y_valid_z])
)

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

In [22]:
# Convert training history to dataframe for analysis and plotting.
complete_history_data = pd.DataFrame(history.history)
complete_history_data.head(-9)

Unnamed: 0,loss,output_x_loss,output_y_loss,output_z_loss,output_x_mse,output_y_mse,output_z_mse,val_loss,val_output_x_loss,val_output_y_loss,val_output_z_loss,val_output_x_mse,val_output_y_mse,val_output_z_mse
0,0.062579,0.082012,0.069430,1.000729e-02,0.082012,0.069430,1.000729e-02,0.008030,0.009961,0.009380,1.467551e-03,0.009961,0.009380,1.467551e-03
1,0.005898,0.005650,0.008849,4.918633e-04,0.005650,0.008849,4.918633e-04,0.003935,0.002448,0.007356,6.568433e-05,0.002448,0.007356,6.568433e-05
2,0.003809,0.001993,0.007512,3.514256e-05,0.001993,0.007512,3.514256e-05,0.003064,0.001421,0.006225,2.846680e-05,0.001421,0.006225,2.846680e-05
3,0.002991,0.001464,0.006007,1.617736e-05,0.001464,0.006007,1.617736e-05,0.002669,0.001506,0.005162,8.356568e-06,0.001506,0.005162,8.356568e-06
4,0.002491,0.001461,0.004761,1.220002e-05,0.001461,0.004761,1.220002e-05,0.002104,0.001469,0.003788,6.834039e-06,0.001469,0.003788,6.834039e-06
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
986,0.000004,0.000003,0.000007,7.642404e-07,0.000003,0.000007,7.642404e-07,0.000015,0.000010,0.000027,2.654637e-07,0.000010,0.000027,2.654637e-07
987,0.000011,0.000007,0.000022,7.221171e-07,0.000007,0.000022,7.221171e-07,0.000005,0.000007,0.000006,9.011930e-07,0.000007,0.000006,9.011930e-07
988,0.000010,0.000006,0.000020,5.098674e-07,0.000006,0.000020,5.098674e-07,0.000026,0.000020,0.000045,1.341201e-06,0.000020,0.000045,1.341201e-06
989,0.000037,0.000049,0.000042,1.242625e-06,0.000049,0.000042,1.242625e-06,0.000054,0.000112,0.000021,6.507198e-07,0.000112,0.000021,6.507198e-07


In [23]:
import matplotlib.pyplot as plt

In [24]:
# Create figure of subplots to plot total loss, x coordinate loss, y coordinate loss, and z coordinate MSEs.
fig2, mse_plots = plt.subplots(2,2)


#plot losses in each quadrant of the figure.
mse_plots[0][0].plot(complete_history_data[["output_x_mse", "val_output_x_mse"]])
#mse_plots[0][0].set_ylim(0,1)

mse_plots[0][1].plot(complete_history_data[["output_y_mse", "val_output_y_mse"]])
mse_plots[0][1].set_ylim(0,1)

mse_plots[1][0].plot(complete_history_data[["output_z_mse", "val_output_z_mse"]])
mse_plots[1][0].set_ylim(0,1)

plt.show()


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [25]:
# Create figure of subplots to plot total loss, x coordinate loss, y coordinate loss, and z coordinate loss.
fig, loss_plots = plt.subplots(2,2)


#plot losses in each quadrant of the figure.
loss_plots[0][0].plot(complete_history_data[["loss", "val_loss"]])
loss_plots[0][0].set_ylim(0,1)

loss_plots[0][1].plot(complete_history_data[["output_x_loss", "val_output_x_loss"]])
loss_plots[0][1].set_ylim(0,1)

loss_plots[1][0].plot(complete_history_data[["output_y_loss", "val_output_y_loss"]])
loss_plots[1][0].set_ylim(0,1)

loss_plots[1][1].plot(complete_history_data[["output_z_loss", "val_output_z_loss"]])
loss_plots[1][1].set_ylim(0,1)


plt.show()


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

### Evaluate the Model with Test Data

In [26]:
y_test_x.shape

(451,)

In [27]:
model.evaluate([X_test],[y_test_x, y_test_y, y_test_z])



[3.822768576355884e-06,
 6.816439963586163e-06,
 1.3646830439029145e-06,
 2.7515986857906682e-06,
 6.816439963586163e-06,
 1.3646830439029145e-06,
 2.7515986857906682e-06]

### Predict Values and Inspect Differences

In [28]:
x_pred, y_pred, z_pred = model.predict([X_test])

In [29]:
pred_model_comparison = pd.DataFrame(data=np.concatenate((x_pred, y_test_x.reshape(-1,1), y_pred, y_test_y.reshape(-1,1), z_pred, y_test_z.reshape(-1,1)), axis=1),
                                    columns=['pred_x', 'model_x', 'pred_y', 'model_y', 'pred_z', 'model_z'])
pred_model_comparison.head(10)

Unnamed: 0,pred_x,model_x,pred_y,model_y,pred_z,model_z
0,0.348529,0.349857,0.966569,0.964938,0.0004071583,0.0
1,0.813266,0.815923,0.768258,0.767925,0.002131606,0.0
2,0.041952,0.04083,1.000534,0.999521,-0.0007146086,0.0
3,0.712017,0.715151,0.836259,0.834126,0.0008625513,0.0
4,0.988962,0.988194,0.369778,0.368709,-0.0006717156,0.0
5,0.797625,0.796888,0.125009,0.123892,8.688075e-07,0.0
6,0.847538,0.846254,0.162578,0.162518,-0.000639855,0.0
7,0.613683,0.61679,0.883019,0.882515,0.001877944,0.0
8,0.864075,0.863789,0.179523,0.178193,-0.0002838793,0.0
9,0.793825,0.792717,0.122576,0.120938,-0.0002958748,0.0


In [30]:
pred_model_comparison[["pred_x", "model_x"]].plot()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<matplotlib.axes._subplots.AxesSubplot at 0x214ffa62f08>

In [31]:
pred_model_comparison[["pred_y", "model_y"]].plot()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<matplotlib.axes._subplots.AxesSubplot at 0x2160aba0548>

In [32]:
pred_model_comparison[["pred_z", "model_z"]].plot()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<matplotlib.axes._subplots.AxesSubplot at 0x2160fccee08>