### Solution outline:
We split training data on 50/50 %. We use last point of 50% data to predict the orbit of each satellite for the following 50% of train data. We also use very last point of train data to predict the orbit of each satellite for test data. We predict on the 1st stage using pure phisics with no ML using plyades library. 

In [1]:
import pandas as pd
import numpy as np
import gc
from tqdm import tqdm_notebook

# libs for 1st stage simulation
import plyades
import astropy
from astropy import units as units
# libs for 2nd stage prediction
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor

PATH = '../'
RND_STATE = 13

In [2]:
%%time
train = pd.read_csv(PATH + 'train.csv')
test = pd.read_csv(PATH + 'Track 1/test.csv')
submission = pd.read_csv(PATH + 'Track 1/submission.csv')

Wall time: 2.62 s


In [3]:
# metric function for score calculation
def smape(satellite_predicted_values, satellite_true_values): 
    # the division, addition and subtraction are pointwise 
    return np.mean(np.abs(satellite_predicted_values - satellite_true_values) / (np.abs(satellite_predicted_values) + np.abs(satellite_true_values)))

### 1st Stage: Simulation of orbit

In [9]:
simulationTrain = submission.iloc[0:1, :].copy()
simulationTrain.drop(index = 0, inplace = True)

In [4]:
def findClosest(timesTest, timesSim):
    timesTest = timesTest.apply(lambda x: pd.to_datetime(x))
    timesSim = timesSim.apply(lambda x: pd.to_datetime(str(x)))
    indexes = []
    for time in timesTest.values:
        arg = np.argmin(np.abs(timesSim.values - time))
        indexes.append(timesSim.index[arg])
    return indexes

In [15]:
# Validation
import astropy.units as u
trainSize = 0.01
smp = []
for index, ID in tqdm_notebook(enumerate(test['sat_id'].unique())):
    dataTrain = train[train['sat_id'] == ID]
    size = 1
    dataTest = dataTrain.iloc[size:]
    dataTrain = dataTrain.iloc[:size]

    dt = dataTrain['epoch'].iloc[-1]
    vec = dataTrain.iloc[-1, 3:9]
    xx,yy,zz,vx,vy,vz = vec[0], vec[1], vec[2], vec[3], vec[4], vec[5]
    iss_r = np.array([xx,yy,zz,]) * astropy.units.km
    iss_v = np.array([vx,vy,vz,]) * astropy.units.km/astropy.units.s
    iss_t = astropy.time.Time(dt)
    frame = 'ECI'
    body = plyades.bodies.URANUS
    iss = plyades.State(iss_r, iss_v, iss_t, frame, body)

    @property
    def elements(self):
        return kepler.elements(self.body.mu, self.r, self.v)

    @iss.gravity
    def newton_j2(f, t, y, params):
        r = np.sqrt(np.square(y[:3]).sum())
        mu = params['body'].mu.value
        j2 = params['body'].j2
        r_m = params['body'].mean_radius.value
        rx, ry, rz = y[:3]
        f[:3] += y[3:]
        pj = -3/2*mu*j2*r_m**2/r**5
        f[3] += -mu*rx/r**3 + pj*rx*(1-5*rz**2/r**2)
        f[4] += -mu*ry/r**3 + pj*ry*(1-5*rz**2/r**2)
        f[5] += -mu*rz/r**3 + pj*rz*(3-5*rz**2/r**2)


    frac = (pd.to_datetime(dataTest['epoch']).iloc[-1] - pd.to_datetime(dataTrain['epoch']).iloc[-1]) / pd.Timedelta('360 days')
    try:
        j2_orbit = iss.propagate(dt = frac * units.year, max_step = 100000, interpolate=200*dataTest.shape[0])
    except:
        try:
            j2_orbit = iss.propagate(dt = frac * units.year, max_step = 10000, interpolate=200*dataTest.shape[0])
        except:
            try:
                j2_orbit = iss.propagate(dt = frac * units.year, max_step = 1000, interpolate=200*dataTest.shape[0])
            except:
                j2_orbit = iss.propagate(dt = frac * units.year, max_step = 300, interpolate=200*dataTest.shape[0])
    
    timesSim = pd.DataFrame(np.asarray(j2_orbit.table['epoch'])).iloc[:, 0]
    timesTest = dataTest['epoch']
    
    idx = findClosest(timesTest, timesSim)
    predictions = pd.DataFrame(np.asarray(j2_orbit.table['rx', 'ry', 'rz', 'vx', 'vy', 'vz'])).loc[idx, :]

    real = dataTest[['x', 'y', 'z', 'Vx', 'Vy', 'Vz']].values
    simulationTrain = simulationTrain.append(dataTest[simulationTrain.columns])
    simulationTrain.loc[dataTest.index, 1:] = predictions.values
    smp.append(100*(1 - np.mean(smape(real, predictions))))
    print(ID, smp[-1])
print(f'LB: {np.mean(smp)}')

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "<ipython-input-15-625984d2c177>", line 41, in <module>
    j2_orbit = iss.propagate(dt = frac * units.year, max_step = 100000, interpolate=3*dataTest.shape[0])
  File "C:\Users\Артем\Desktop\IDAO 2020\QualificationsGit\art\plyades\core.py", line 206, in propagate
    for t, y in p:
  File "C:\Users\Артем\Desktop\IDAO 2020\QualificationsGit\art\plyades\propagator.py", line 37, in __iter__
    yield self.step()
  File "C:\Users\Артем\Desktop\IDAO 2020\QualificationsGit\art\plyades\propagator.py", line 31, in step
    self.solver.integrate(self.dt, step=True)
  File "C:\Anaconda3\lib\site-packages\scipy\integrate\_ode.py", line 432, in integrate
    self.f_params, self.jac_params)
  File "C:\Anaconda3\lib\site-packages\scipy\integrate\_ode.py", line 1172, in run
    tuple(self.call_args) + (f_params,)))
  File "C:\Users\Артем\Desktop\IDAO 2020\QualificationsGit\art\plyades\propagator.py", line 17, in _rhs
    def _rhs(self, t, y, params):
Keyboar

KeyboardInterrupt: 

In [None]:
# saving train predictions for the 1st stage (1 - trainSize will be the 2nd stage training set)
simulationTrain = simulationTrain.reset_index(drop = True)
simulationTrain.to_csv('simulationTrain.csv', index = None)

In [None]:
# Prediction for the test set
for index, ID in tqdm_notebook(enumerate(test['sat_id'].unique())):
    dataTrain = train[train['sat_id'] == ID]
    dataTest = test[test['sat_id'] == ID]

    dt = dataTrain['epoch'].iloc[-1]
    vec = dataTrain.iloc[-1, 3:9]
    xx,yy,zz,vx,vy,vz = vec[0], vec[1], vec[2], vec[3], vec[4], vec[5]
    iss_r = np.array([xx,yy,zz,]) * astropy.units.km
    iss_v = np.array([vx,vy,vz,]) * astropy.units.km/astropy.units.s
    iss_t = astropy.time.Time(dt)
    frame = 'ECI'
    body = plyades.bodies.EARTH
    iss = plyades.State(iss_r, iss_v, iss_t, frame, body)

    @property
    def elements(self):
        return kepler.elements(self.body.mu, self.r, self.v)

    @iss.gravity
    def newton_j2(f, t, y, params):
        r = np.sqrt(np.square(y[:3]).sum())
        mu = params['body'].mu.value
        j2 = params['body'].j2
        r_m = params['body'].mean_radius.value
        rx, ry, rz = y[:3]
        f[:3] += y[3:]
        pj = -3/2*mu*j2*r_m**2/r**5
        f[3] += -mu*rx/r**3 + pj*rx*(1-5*rz**2/r**2)
        f[4] += -mu*ry/r**3 + pj*ry*(1-5*rz**2/r**2)
        f[5] += -mu*rz/r**3 + pj*rz*(3-5*rz**2/r**2)

    frac = (pd.to_datetime(dataTest['epoch']).iloc[-1] - pd.to_datetime(dataTrain['epoch']).iloc[-1]) / pd.Timedelta('360 days')
    try:
        j2_orbit = iss.propagate(dt = frac * units.year, max_step = 100000, interpolate=300*dataTest.shape[0])
    except:
        try:
            j2_orbit = iss.propagate(dt = frac * units.year, max_step = 10000, interpolate=300*dataTest.shape[0])
        except:
            try:
                j2_orbit = iss.propagate(dt = frac * units.year, max_step = 1000, interpolate=300*dataTest.shape[0])
            except:
                j2_orbit = iss.propagate(dt = frac * units.year, max_step = 300, interpolate=300*dataTest.shape[0])
            
    
    timesSim = pd.DataFrame(np.asarray(j2_orbit.table['epoch'])).iloc[:, 0]
    timesTest = dataTest['epoch']
    
    idx = findClosest(timesTest, timesSim)
    predictions = pd.DataFrame(np.asarray(j2_orbit.table['rx', 'ry', 'rz', 'vx', 'vy', 'vz'])).loc[idx, :]

    submission.loc[dataTest.index, 1:] = predictions.values

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

In [11]:
# saving test predictions for the 1st stage
submission.to_csv('simulationTest.csv', index = None)