In [84]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits import mplot3d
import seaborn as sns
import gc
from tqdm import tqdm
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor
from sklearn.cluster import KMeans
RND_SEED = 44


%matplotlib inline

PATH = '../'

In [85]:
%%time
train = pd.read_csv(PATH + 'train.csv')
test = pd.read_csv(PATH + 'Track 1/test.csv')
submission = pd.read_csv(PATH + 'Track 1/submission.csv')

Wall time: 2.6 s


In [86]:
def smape(satellite_predicted_values, satellite_true_values): 
    # the division, addition and subtraction are pointwise 
    return np.mean(np.abs(satellite_predicted_values - satellite_true_values) / (np.abs(satellite_predicted_values) + np.abs(satellite_true_values)))

In [87]:
training_columns = ['epoch', 'sat_id', 'x_sim', 'y_sim', 'z_sim', 'Vx_sim', 'Vy_sim', 'Vz_sim']
prediction_columns = ['x','y','z','Vx', 'Vy', 'Vz']
X = train.loc[:, training_columns]
y = train.loc[:, prediction_columns]
Xtest = test.loc[:, training_columns]

In [88]:
X.drop(columns = 'epoch', inplace = True)
Xtest.drop(columns = 'epoch', inplace = True)

In [89]:
linearSat = [6,
 9,
 24,
 26,
 28,
 29,
 34,
 35,
 36,
 39,
 40,
 42,
 44,
 49,
 51,
 53,
 54,
 57,
 64,
 68,
 75,
 91,
 93,
 102,
 106,
 113,
 114,
 121,
 128,
 130,
 132,
 135,
 138,
 142,
 143,
 144,
 145,
 152,
 156,
 158,
 159,
 160,
 162,
 165,
 170,
 172,
 183,
 187,
 191,
 193,
 203,
 207,
 210,
 211,
 220,
 225,
 227,
 229,
 233,
 239,
 240,
 245,
 253,
 254,
 261,
 263,
 268,
 277,
 281,
 284,
 292,
 293,
 294,
 298,
 299,
 306,
 309,
 310,
 312,
 314,
 316,
 319,
 326,
 332,
 333,
 342,
 343,
 344,
 347,
 356,
 358,
 362,
 372,
 373,
 377,
 384,
 387,
 389,
 391,
 393,
 396,
 399,
 400,
 411,
 419,
 423,
 424,
 426,
 428,
 435,
 436,
 443,
 446,
 448,
 456,
 460,
 462,
 465,
 467,
 471,
 474,
 479,
 488,
 489,
 499,
 510,
 516,
 517,
 519,
 522,
 528,
 543,
 544,
 547,
 566,
 572,
 578,
 579,
 583,
 593]

In [93]:
model = LinearRegression()
trainSize = 0.3
cluster = KMeans(n_clusters=24, tol = 1e-3, algorithm = 'elkan', random_state=RND_SEED)
submissionValid = train.copy()[['id', 'sat_id', 'x', 'y', 'z', 'Vx', 'Vy', 'Vz']]
smp = []
SMP = []
nTargets = 6
for sat_id in Xtest['sat_id'].unique():
    if sat_id not in linearSat:
        smp = []
        satX = X[X['sat_id'] == sat_id].drop(columns = ['sat_id'])
        satX = pd.concat([satX, satX.shift(1).fillna(0), 
                          satX.shift(2).fillna(0), satX.shift(3).fillna(0),
                         satX.shift(4).fillna(0)], axis = 1)
        satY = y[X['sat_id'] == sat_id]
        size = int(satX.shape[0] * trainSize)
        
        Xtr, Xval = satX.iloc[:size, :], satX.iloc[size:, :]
        ytr, yval = satY.iloc[:size, :], satY.iloc[size:, :]
        for i in range(nTargets):
            model.fit(Xtr, ytr.iloc[:, i])
            ypred = model.predict(Xval)
            smp.append(smape(ypred, yval.iloc[:, i]))
            SMP.append(smape(ypred, yval.iloc[:, i]))
        print(f'SatId:',sat_id, 'Score:', 100*(1- np.mean(smp)))
    else:
        satX = X[X['sat_id'] == sat_id].drop(columns = ['sat_id'])
        satY = y[X['sat_id'] == sat_id]
        satXtest = Xtest[Xtest['sat_id'] == sat_id].drop(columns = ['sat_id'])
        
        size = int(satX.shape[0] * trainSize)
        Xtr, Xval = satX.iloc[:size, :], satX.iloc[size:, :]
        ytr, yval = satY.iloc[:size, :], satY.iloc[size:, :]

        labelsTrain = cluster.fit_predict(Xtr)
        labelsTest = cluster.predict(Xval)
        smp = []
        for label in np.unique(labelsTest):
            for i in range(nTargets):
                model.fit(Xtr[labelsTrain == label], ytr[labelsTrain == label].iloc[:, i])
                ypred = model.predict(Xval[labelsTest == label])
                smp.append(smape(ypred, yval[labelsTest == label].iloc[:, i]))
                SMP.append(smape(ypred, yval[labelsTest == label].iloc[:, i]))
        print(f'SatId:',sat_id, 'Score:', 100*(1- np.mean(smp)))
print('Final', 100*(1- np.mean(SMP)))

SatId: 1 Score: 77.5372122726189
SatId: 2 Score: 77.96593429328539
SatId: 3 Score: 79.8173228610597
SatId: 4 Score: 77.77301520776733
SatId: 6 Score: 92.13663286931155
SatId: 9 Score: 98.51823043334706
SatId: 16 Score: 91.97079249176385
SatId: 20 Score: 60.412154486230804
SatId: 22 Score: 62.19726636939704
SatId: 24 Score: 98.98074089246758
SatId: 25 Score: 81.72949335167694
SatId: 26 Score: 77.00781330419714
SatId: 27 Score: 86.31656957720548
SatId: 28 Score: 88.19442842586344
SatId: 29 Score: 99.13218625331818
SatId: 32 Score: 80.38877891111879
SatId: 34 Score: 99.55694927761805
SatId: 35 Score: 67.52130551807679
SatId: 36 Score: 98.9076201363261
SatId: 37 Score: 30.59748678395686
SatId: 38 Score: 71.9941907297324
SatId: 39 Score: 99.16584393586214
SatId: 40 Score: 99.62364309471597
SatId: 41 Score: 84.27798941016077
SatId: 42 Score: 99.56367611213774
SatId: 44 Score: 97.79470745512361
SatId: 45 Score: 77.36681334591871
SatId: 49 Score: 98.8542978065398
SatId: 51 Score: 99.2782890488

SatId: 462 Score: 99.79150519503133
SatId: 464 Score: 89.32574943414305
SatId: 465 Score: 97.56074649773586
SatId: 466 Score: 76.27873575422885
SatId: 467 Score: 98.82807279098697
SatId: 468 Score: 78.27535663965872
SatId: 470 Score: 88.83421280994412
SatId: 471 Score: 89.50842521944608
SatId: 473 Score: 59.12699604856855
SatId: 474 Score: 97.81920298899776
SatId: 475 Score: 98.87849003422635
SatId: 476 Score: 47.738566862230215
SatId: 477 Score: 75.95435158533039
SatId: 479 Score: 97.3678339587921
SatId: 480 Score: 81.31857116103184
SatId: 481 Score: 53.663051537929405
SatId: 482 Score: 79.35163993023157
SatId: 483 Score: 89.09750964462226
SatId: 486 Score: 76.70000461386455
SatId: 488 Score: 98.95206665599817
SatId: 489 Score: 97.38893900468798
SatId: 491 Score: 80.11207306100009
SatId: 495 Score: 84.72201608764503
SatId: 498 Score: 70.05462124391563
SatId: 499 Score: 99.4212798240892
SatId: 502 Score: 63.68575908585796
SatId: 504 Score: 57.9120204606294
SatId: 505 Score: 81.67269017

In [91]:
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.ensemble import AdaBoostRegressor
model = LinearRegression()
cluster = KMeans(n_clusters=24, tol = 1e-3, algorithm = 'elkan', random_state=RND_SEED)
nTargets = 6
for sat_id in tqdm(Xtest['sat_id'].unique()):
    if sat_id not in linearSat:
        satX = X[X['sat_id'] == sat_id].drop(columns = ['sat_id'])
        satX = pd.concat([satX, satX.shift(1).fillna(0), 
                              satX.shift(2).fillna(0), satX.shift(3).fillna(0),
                             satX.shift(4).fillna(0)], axis = 1)
        satY = y[X['sat_id'] == sat_id]
        satXtest = Xtest[Xtest['sat_id'] == sat_id].drop(columns = ['sat_id'])
        satXtest = pd.concat([satXtest, satXtest.shift(1).fillna(0),satXtest.shift(2).fillna(0), satXtest.shift(3).fillna(0),
                             satXtest.shift(4).fillna(0)], axis = 1)
        
        
        for i in range(nTargets):
            model.fit(satX, satY.iloc[:, i])
            ypred = model.predict(satXtest)
            submission.loc[satXtest.index, submission.columns[i+1]] = ypred
    else:
        satX = X[X['sat_id'] == sat_id].drop(columns = ['sat_id'])
        satY = y[X['sat_id'] == sat_id]
        satXtest = Xtest[Xtest['sat_id'] == sat_id].drop(columns = ['sat_id'])
        

        labelsTrain = cluster.fit_predict(satX)
        labelsTest = cluster.predict(satXtest)
        smp = []
        for label in np.unique(labelsTest):
            for i in range(nTargets):
                model.fit(satX[labelsTrain == label], satY[labelsTrain == label].iloc[:, i])
                ypred = model.predict(satXtest[labelsTest == label])
                submission.loc[satXtest[labelsTest == label].index, submission.columns[i+1]] = ypred

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 300/300 [01:44<00:00,  2.88it/s]


In [92]:
submission.to_csv('submission.csv', index = None)