In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits import mplot3d
import seaborn as sns
import gc
from tqdm import tqdm

%matplotlib inline

PATH = '../'

In [2]:
%%time
train = pd.read_csv(PATH + 'train.csv')
test = pd.read_csv(PATH + 'Track 1/test.csv')
submission = pd.read_csv(PATH + 'Track 1/submission.csv')

Wall time: 2.56 s


### LB Score Calc

Для подсчета скора просто используйте функцию getLBScore

In [3]:
def smape(satellite_predicted_values, satellite_true_values): 
    # the division, addition and subtraction are pointwise 
    return np.mean(np.abs(satellite_predicted_values - satellite_true_values) / (np.abs(satellite_predicted_values) + np.abs(satellite_true_values)))

### Model training

In [4]:
training_columns = ['epoch', 'sat_id', 'x_sim', 'y_sim', 'z_sim', 'Vx_sim', 'Vy_sim', 'Vz_sim']
prediction_columns = ['x','y','z','Vx', 'Vy', 'Vz']
X = train.loc[:, training_columns]
y = train.loc[:, prediction_columns]
Xtest = test.loc[:, training_columns]

In [5]:
def extractTimeFeats(data):
    timeFeats = data['epoch'].apply(lambda x: str(x).split('T')[1].split('.')[0].split(':')).values
    h,m,s = [],[],[]
    for feat in timeFeats:
        h.append(feat[0]); m.append(feat[1]); s.append(feat[2])
    return h,m,s

In [6]:
h,m,s = extractTimeFeats(X)
X.loc[:, 'h'] = list(map(int, h)); X.loc[:, 'm'] = list(map(int, m)); X.loc[:, 's'] =list(map(int, s))
X.drop(columns = 'epoch', inplace = True)

h,m,s = extractTimeFeats(Xtest)
Xtest.loc[:, 'h'] = list(map(int, h)); Xtest.loc[:, 'm'] = list(map(int, m)); Xtest.loc[:, 's'] = list(map(int, s))
Xtest.drop(columns = 'epoch', inplace = True)

In [7]:
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor
model = ExtraTreesRegressor(criterion='mae')
trainSize = 0.8
submissionValid = train.copy()[['id', 'sat_id', 'x', 'y', 'z', 'Vx', 'Vy', 'Vz']]
smp = []
SMP = []
nTargets = 6
for sat_id in tqdm(Xtest['sat_id'].unique()):
    if sat_id >= 0:
        smp = []
        satX = X[X['sat_id'] == sat_id].drop(columns = ['sat_id'])
        satX = pd.concat([satX, satX.shift(1).fillna(0), 
                          satX.shift(2).fillna(0), satX.shift(3).fillna(0),
                         satX.shift(4).fillna(0)], axis = 1)
        satY = y[X['sat_id'] == sat_id]

        size = int(satX.shape[0] * trainSize)
        Xtr, Xval = satX.iloc[:size, :], satX.iloc[size:, :]
        ytr, yval = satY.iloc[:size, :], satY.iloc[size:, :]
        for i in range(nTargets):
            model.fit(Xtr.values, ytr.iloc[:, i])
            ypred = model.predict(Xval.values)
            smp.append(smape(ypred, yval.iloc[:, i]))
            SMP.append(smape(ypred, yval.iloc[:, i]))
#         satDataTrain = train[train['sat_id'] == sat_id].loc[:, ['x', 'y', 'z']]
#         satDataTrainSim = train[train['sat_id'] == sat_id].loc[:, ['x_sim', 'y_sim', 'z_sim']].shift(1).fillna(0)
#         plt.figure(figsize = (10,5))
#         plt.title(f'Спутник {sat_id}, SCORE: {100*(1- np.mean(smp))}')
#         plt.plot(np.linalg.norm(satDataTrain.values - satDataTrainSim.values, axis = 1))
#         plt.show()
print('Final', 100*(1- np.mean(SMP)))

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 300/300 [25:02<00:00,  5.73s/it]


Final 89.88288330467032


In [8]:
submission.to_csv('submission.csv', index = None)


In [None]:
# ids = []
# for sat_id in X['sat_id'].unique():
#     if sat_id not in Xtest['sat_id'].unique():
#         ids.append(sat_id)

In [26]:
# from sklearn.linear_model import LinearRegression, Lasso, Ridge
# from sklearn.ensemble import AdaBoostRegressor
# import pickle
# model = LinearRegression()
# nTargets = 6
# for sat_id in tqdm(ids):
#     satX = X[X['sat_id'] == sat_id].drop(columns = ['sat_id'])
#     satX = pd.concat([satX, satX.shift(1).fillna(0), 
#                           satX.shift(2).fillna(0), satX.shift(3).fillna(0),
#                          satX.shift(4).fillna(0)], axis = 1)
#     satY = y[X['sat_id'] == sat_id]
# #     satXtest = Xtest[Xtest['sat_id'] == sat_id].drop(columns = ['sat_id'])
# #     satXtest = pd.concat([satXtest, satXtest.iloc[:, 1:].shift(1).fillna(0),satXtest.iloc[:, 1:].shift(2).fillna(0), satXtest.iloc[:, 1:].shift(3).fillna(0),
# #                          satXtest.iloc[:, 1:].shift(4).fillna(0)], axis = 1)
    
#     for i in range(nTargets):
#         model.fit(satX, satY.iloc[:, i])
#         modelWeights = f'models/model_{sat_id}_{i}.pkl'  

#         with open(modelWeights, 'wb') as file:  
#             pickle.dump(model, file)
# #         ypred = model.predict(satXtest)
# #         submission.loc[satXtest.index, submission.columns[i+1]] = ypred

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 300/300 [00:07<00:00, 39.11it/s]


In [9]:
# submission.to_csv('submission.csv', index = None)