In [76]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits import mplot3d
import seaborn as sns
import gc
from tqdm import tqdm
from warnings import filterwarnings
import time
filterwarnings('ignore')

%matplotlib inline

PATH = '../'

In [77]:
%%time
train = pd.read_csv(PATH + 'train.csv')
test = pd.read_csv(PATH + 'Track 1/test.csv')
submission = pd.read_csv(PATH + 'Track 1/submission.csv')

Wall time: 2.56 s


In [78]:
def smape(satellite_predicted_values, satellite_true_values): 
    # the division, addition and subtraction are pointwise 
    return np.mean(np.abs(satellite_predicted_values - satellite_true_values) / (np.abs(satellite_predicted_values) + np.abs(satellite_true_values)))

In [79]:
training_columns = ['id', 'epoch', 'sat_id', 'x_sim', 'y_sim', 'z_sim', 'Vx_sim', 'Vy_sim', 'Vz_sim']
prediction_columns = ['x','y','z','Vx', 'Vy', 'Vz']
X = train.loc[:, training_columns]
y = train.loc[:, prediction_columns]
Xtest = test.loc[:, training_columns]

In [80]:
def extractTimeFeats(data):
    timeFeats = data['epoch'].apply(lambda x: str(x).split('T')[1].split('.')[0].split(':')).values
    h,m,s = [],[],[]
    for feat in timeFeats:
        h.append(feat[0]); m.append(feat[1]); s.append(feat[2])
    return h,m,s

In [81]:
def generateSatFeats(X, Xtest):
    h,m,s = extractTimeFeats(X)
    
    X.loc[:, 'h'] = list(map(int, h)); X.loc[:, 'm'] = list(map(int, m)); X.loc[:, 's'] =list(map(int, s))
    X.drop(columns = ['sat_id', 'id', 'epoch'], inplace = True)

    h,m,s = extractTimeFeats(Xtest)
    
    Xtest.loc[:, 'h'] = list(map(int, h)); Xtest.loc[:, 'm'] = list(map(int, m)); Xtest.loc[:, 's'] = list(map(int, s))
    Xtest.drop(columns = ['sat_id', 'id', 'epoch'], inplace = True)
    
    satX = pd.concat([X, X.shift(1).fillna(0), 
                      X.shift(2).fillna(0), X.shift(3).fillna(0),
                     X.shift(4).fillna(0)], axis = 1)
    satXtest = pd.concat([Xtest, Xtest.shift(1).fillna(0), Xtest.shift(2).fillna(0),
                         Xtest.shift(3).fillna(0), Xtest.shift(4).fillna(0)], axis = 1)
    return satX, satXtest

In [86]:
def train_model(X, X_test, y, folds, nTargets=6, model=None, foldWeights = [1/6, 2/6, 3/6]):
    oofSubmission = X[["id", "x_sim", "y_sim", "z_sim", "Vx_sim", "Vy_sim", "Vz_sim"]]
    oofSubmission.columns = ["id", "x", "y", "z", "Vx", "Vy", "Vz"]
    
    submission = X_test[["id", "x_sim", "y_sim", "z_sim", "Vx_sim", "Vy_sim", "Vz_sim"]]
    submission.columns = ["id", "x", "y", "z", "Vx", "Vy", "Vz"]

    oofSubmission.iloc[:, 1:] = np.zeros(oofSubmission.iloc[:, 1:].shape)
    submission.iloc[:, 1:] = np.zeros(submission.iloc[:, 1:].shape)
    
    model = model
    SMAPE = []
    for sat_id in X_test['sat_id'].unique():
        satSmape = []
        satX, satXtest = X[X['sat_id'] == sat_id], X_test[X_test['sat_id'] == sat_id]
        satX, satXtest = generateSatFeats(satX, satXtest)
        
        for fold_n, (train_index, valid_index) in enumerate(folds.split(satX)):
            foldSmape = []
            X_train, X_valid = satX.iloc[train_index], satX.iloc[valid_index]
            y_train, y_valid = y.iloc[train_index], y.iloc[valid_index]
            for target in range(nTargets):
                model.fit(X_train, y_train.iloc[:, target])
                y_pred_valid = model.predict(X_valid)
                y_pred = model.predict(satXtest)
                foldSmape.append(smape(y_pred_valid, y_valid.iloc[:, target]))
                oofSubmission.loc[valid_index, oofSubmission.columns[target+1]]+=y_pred_valid * foldWeights[fold_n]
                submission.loc[satXtest.index, submission.columns[target+1]]+= y_pred* foldWeights[fold_n]
            satSmape.append(np.mean(foldSmape))
            #print(f'Fold {fold_n}. Sat {sat_id}. Score: {100*(1-np.mean(foldSmape)):.4f}')
        print(f'Sat {sat_id}. Mean Score {100*(1-np.mean(satSmape))}. Average score {100*(1-np.average(satSmape, weights = foldWeights))} ')
        print('---------------------------')
        SMAPE.append(np.mean(satSmape))
    print(f'Mean Score: {100*(1 - np.mean(SMAPE))}')
    return oofSubmission, submission

In [None]:
from sklearn.model_selection import TimeSeriesSplit
from sklearn.linear_model import LinearRegression
folds = TimeSeriesSplit(n_splits=3)
oofSubmission, submission = train_model(X,Xtest,y, folds = folds, model = LinearRegression())

Sat 1. Mean Score 63.41317304678093. Average score 60.860806895772804 
---------------------------
Sat 2. Mean Score 91.01378484524709. Average score 88.53111413714261 
---------------------------
Sat 3. Mean Score 91.0661842866026. Average score 87.21243512302509 
---------------------------
Sat 4. Mean Score 70.3223500773254. Average score 63.377139596700374 
---------------------------
Sat 6. Mean Score 91.2936707791284. Average score 89.08365779617667 
---------------------------
Sat 9. Mean Score 93.143831765495. Average score 90.13295953341594 
---------------------------
Sat 16. Mean Score 93.99791485081562. Average score 96.12501114195977 
---------------------------
Sat 20. Mean Score 92.4521038790961. Average score 94.80007397795542 
---------------------------
Sat 22. Mean Score 99.06375125735735. Average score 99.09868807825158 
---------------------------
Sat 24. Mean Score 89.99586585871059. Average score 88.83146339115932 
---------------------------
Sat 25. Mean Score 8

In [9]:
submission.to_csv('submission.csv', index = None)