In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits import mplot3d
import seaborn as sns
import gc
from tqdm import tqdm

%matplotlib inline

PATH = '../'

In [5]:
%%time
train = pd.read_csv(PATH + 'train.csv')
test = pd.read_csv(PATH + 'Track 1/test.csv')
submission = pd.read_csv(PATH + 'Track 1/submission.csv')

Wall time: 2.55 s


In [6]:
def smape(satellite_predicted_values, satellite_true_values): 
    # the division, addition and subtraction are pointwise 
    return np.mean(np.abs(satellite_predicted_values - satellite_true_values) / (np.abs(satellite_predicted_values) + np.abs(satellite_true_values)))

In [7]:
training_columns = ['epoch', 'sat_id', 'x_sim', 'y_sim', 'z_sim', 'Vx_sim', 'Vy_sim', 'Vz_sim']
prediction_columns = ['x','y','z','Vx', 'Vy', 'Vz']
X = train.loc[:, training_columns]
y = train.loc[:, prediction_columns]
Xtest = test.loc[:, training_columns]

In [8]:
X.drop(columns = 'epoch', inplace = True)
Xtest.drop(columns = 'epoch', inplace = True)

In [9]:
def splitData(X,y, sat_id, perc = 0.8):
    size = X[X['sat_id']==sat_id].shape[0]
    Xval = X[X['sat_id'] == sat_id].iloc[int(size * perc):, :]
    Xtr = X[X['sat_id'] == sat_id].iloc[:int(size * perc), :]
    
    yval = y[X['sat_id'] == sat_id].iloc[int(size * perc):, :]
    ytr = y[X['sat_id'] == sat_id].iloc[:int(size * perc), :]
    return Xtr, Xval, ytr, yval

In [10]:
shiftedSat = [0,
 1,
 2,
 3,
 4,
 7,
 10,
 12,
 13,
 15,
 17,
 18,
 19,
 21,
 23,
 25,
 27,
 32,
 33,
 37,
 38,
 41,
 45,
 48,
 50,
 52,
 59,
 62,
 63,
 65,
 66,
 70,
 73,
 80,
 82,
 84,
 85,
 86,
 88,
 89,
 90,
 92,
 94,
 98,
 99,
 100,
 101,
 109,
 112,
 118,
 123,
 124,
 125,
 126,
 127,
 133,
 134,
 136,
 141,
 149,
 150,
 151,
 155,
 166,
 167,
 173,
 174,
 175,
 177,
 184,
 186,
 188,
 190,
 195,
 202,
 204,
 206,
 208,
 209,
 213,
 217,
 223,
 230,
 236,
 241,
 242,
 243,
 244,
 246,
 247,
 248,
 249,
 251,
 255,
 256,
 257,
 258,
 264,
 266,
 270,
 272,
 274,
 275,
 278,
 280,
 282,
 283,
 286,
 290,
 291,
 308,
 315,
 318,
 320,
 323,
 325,
 328,
 329,
 334,
 340,
 348,
 351,
 361,
 363,
 364,
 365,
 366,
 368,
 371,
 372,
 375,
 376,
 380,
 381,
 382,
 385,
 390,
 392,
 406,
 407,
 409,
 412,
 415,
 416,
 418,
 420,
 421,
 422,
 429,
 437,
 439,
 440,
 441,
 450,
 451,
 452,
 454,
 461,
 464,
 468,
 470,
 472,
 473,
 476,
 477,
 478,
 480,
 482,
 483,
 486,
 490,
 491,
 493,
 494,
 495,
 498,
 500,
 501,
 502,
 503,
 505,
 508,
 509,
 511,
 512,
 514,
 518,
 521,
 523,
 527,
 529,
 530,
 531,
 533,
 535,
 536,
 538,
 539,
 541,
 542,
 546,
 548,
 549,
 550,
 552,
 555,
 558,
 559,
 560,
 565,
 568,
 569,
 575,
 576,
 577,
 580,
 581,
 584,
 585,
 587,
 588,
 590,
 592,
 595,
 596,
 597,
 598]

In [22]:
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.ensemble import RandomForestRegressor
from sklearn.cluster import KMeans
RND_SEED = 44
cluster = KMeans(n_clusters=24, tol = 1e-3, algorithm = 'elkan', random_state=RND_SEED)
model = LinearRegression()
smp = []
SMP = []
trainSize = 0.5
submissionValid = train.copy()[['id', 'sat_id', 'x', 'y', 'z', 'Vx', 'Vy', 'Vz']]
smp = []
nTargets = 6
for sat_id in tqdm(Xtest['sat_id'].unique()):
    if sat_id not in shiftedSat:
        satXtest = Xtest[Xtest['sat_id'] == sat_id]
        smp = []
        Xtr, Xval, ytr, yval = splitData(X,y, sat_id=sat_id, perc = trainSize)
        for i in range(nTargets):
            model.fit(Xtr, ytr.iloc[:, i])
            ypred = model.predict(Xval)
            smp.append(smape(ypred, yval.iloc[:, i]))
            SMP.append(smape(ypred, yval.iloc[:, i]))
        #print(f'SatId:',sat_id, 'Score:', 100*(1- np.mean(smp)))
    else:
        Xtr, Xval, ytr, yval = splitData(X,y, sat_id=sat_id, perc = trainSize)

        Xall = pd.concat([Xtr, Xval])
        Xval = Xall.shift(2*24).fillna(0).loc[Xval.index, :]
        for i in range(nTargets):
            model.fit(Xtr, ytr.iloc[:, i])
            ypred = model.predict(Xval)
            smp.append(smape(ypred, yval.iloc[:, i]))
            SMP.append(smape(ypred, yval.iloc[:, i]))
        #print(f'SatId:',sat_id, 'Score:', 100*(1- np.mean(smp)))
        #print(f'SatId:',sat_id, 'Score:', 100*(1- np.mean(smp)))
print(100*(1- np.mean(SMP)))

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 300/300 [00:07<00:00, 39.11it/s]


75.86127856012645


In [None]:
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.ensemble import RandomForestRegressor
model = RandomForestRegressor()
nTargets = 6
sh= 3
for sat_id in tqdm(Xtest['sat_id'].unique()):
    if sat_id not in shiftedSat:
        satX = X[X['sat_id'] == sat_id].drop(columns = ['sat_id'])
        satX = pd.concat([satX, satX.shift(1).fillna(0), 
                      satX.shift(2).fillna(0), satX.shift(3).fillna(0),
                     satX.shift(4).fillna(0)], axis = 1)
        satY = y[X['sat_id'] == sat_id]
        satXtest = Xtest[Xtest['sat_id'] == sat_id].drop(columns = ['sat_id'])
        
        satXtest = pd.concat([satXtest, satXtest.shift(1).fillna(0),satXtest.shift(2).fillna(0), satXtest.shift(3).fillna(0),
                         satXtest.shift(4).fillna(0)], axis = 1)

        for i in range(nTargets):
            model.fit(satX, satY.iloc[:, i])
            ypred = model.predict(satXtest)
            submission.loc[satXtest.index, submission.columns[i+1]] = ypred
    else:
        satX = X[X['sat_id'] == sat_id].drop(columns = ['sat_id'])
        
        satX = pd.concat([satX, satX.shift(1).fillna(0), 
                      satX.shift(2).fillna(0), satX.shift(3).fillna(0),
                     satX.shift(4).fillna(0)], axis = 1)
        
        satY = y[X['sat_id'] == sat_id]
        satXtest = Xtest[Xtest['sat_id'] == sat_id].drop(columns = ['sat_id'])
        
        satXtest = pd.concat([satXtest, satXtest.shift(1).fillna(0),satXtest.shift(2).fillna(0), satXtest.shift(3).fillna(0),
                         satXtest.shift(4).fillna(0)], axis = 1)

        for i in range(nTargets):
            model.fit(satX, satY.iloc[:, i])
            ypred = model.predict(satXtest)
            submission.loc[satXtest.index, submission.columns[i+1]] = ypred

  3%|██████                                                                                                                                                                                                    | 9/300 [00:03<01:12,  4.02it/s]

In [27]:
submission.to_csv('submission.csv', index = None)