In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits import mplot3d
import seaborn as sns
import gc
from tqdm import tqdm

%matplotlib inline

PATH = '../'

In [2]:
%%time
train = pd.read_csv(PATH + 'train.csv')
test = pd.read_csv(PATH + 'Track 1/test.csv')
submission = pd.read_csv(PATH + 'Track 1/submission.csv')

Wall time: 3.39 s


### LB Score Calc

Для подсчета скора просто используйте функцию getLBScore

In [3]:
def smape(satellite_predicted_values, satellite_true_values): 
    # the division, addition and subtraction are pointwise 
    return np.mean(np.abs(satellite_predicted_values - satellite_true_values) / (np.abs(satellite_predicted_values) + np.abs(satellite_true_values)))

### Model training

In [4]:
training_columns = ['epoch', 'sat_id', 'x_sim', 'y_sim', 'z_sim', 'Vx_sim', 'Vy_sim', 'Vz_sim']
prediction_columns = ['x','y','z','Vx', 'Vy', 'Vz']
X = train.loc[:, training_columns]
y = train.loc[:, prediction_columns]
Xtest = test.loc[:, training_columns]

In [5]:
def extractTimeFeats(data):
    timeFeats = data['epoch'].apply(lambda x: str(x).split('T')[1].split('.')[0].split(':')).values
    h,m,s = [],[],[]
    for feat in timeFeats:
        h.append(feat[0]); m.append(feat[1]); s.append(feat[2])
    return h,m,s

In [6]:
h,m,s = extractTimeFeats(X)
X.loc[:, 'h'] = list(map(int, h)); X.loc[:, 'm'] = list(map(int, m)); X.loc[:, 's'] =list(map(int, s))
X.drop(columns = 'epoch', inplace = True)

h,m,s = extractTimeFeats(Xtest)
Xtest.loc[:, 'h'] = list(map(int, h)); Xtest.loc[:, 'm'] = list(map(int, m)); Xtest.loc[:, 's'] = list(map(int, s))
Xtest.drop(columns = 'epoch', inplace = True)

In [7]:
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor
model = LinearRegression()
trainSize = 0.8
submissionValid = train.copy()[['id', 'sat_id', 'x', 'y', 'z', 'Vx', 'Vy', 'Vz']]
submission = test.copy()[['id', 'sat_id', 'x', 'y', 'z', 'Vx', 'Vy', 'Vz']]
smp = []
SMP = []
nTargets = 6
for sat_id in Xtest['sat_id'].unique():
    if sat_id >= 0:
        smp = []
        satX = X[X['sat_id'] == sat_id]
        satX = pd.concat([satX, satX.iloc[:, 1:].shift(1).fillna(0), 
                          satX.iloc[:, 1:].shift(2).fillna(0), satX.iloc[:, 1:].shift(3).fillna(0),
                         satX.iloc[:, 1:].shift(4).fillna(0)], axis = 1)
        satY = y[X['sat_id'] == sat_id]
        satXtest = Xtest[Xtest['sat_id'] == sat_id]
        satXtest = pd.concat([satXtest, satXtest.iloc[:, 1:].shift(24).fillna(0), satXtest.iloc[:, 1:].shift(48).fillna(0)], axis = 1)

        size = int(satX.shape[0] * trainSize)
        Xtr, Xval = satX.iloc[:size, :], satX.iloc[size:, :]
        ytr, yval = satY.iloc[:size, :], satY.iloc[size:, :]
        for i in range(nTargets):
            model.fit(Xtr, ytr.iloc[:, i])
            ypred = model.predict(Xval)
            submissionValid.iloc[Xval.index, i] = ypred
            smp.append(smape(ypred, yval.iloc[:, i]))
            SMP.append(smape(ypred, yval.iloc[:, i]))
        print(f'Sat id{sat_id} Score: {100*(1 - np.mean(smp))}')
#         satDataTrain = train[train['sat_id'] == sat_id].loc[:, ['x', 'y', 'z']]
#         satDataTrainSim = train[train['sat_id'] == sat_id].loc[:, ['x_sim', 'y_sim', 'z_sim']].shift(1).fillna(0)
#         plt.figure(figsize = (10,5))
#         plt.title(f'Спутник {sat_id}, SCORE: {100*(1- np.mean(smp))}')
#         plt.plot(np.linalg.norm(satDataTrain.values - satDataTrainSim.values, axis = 1))
#         plt.show()
print('Final', 100*(1- np.mean(SMP)))

Sat id1 Score: 86.28497641598007
Sat id2 Score: 80.83072373906063
Sat id3 Score: 83.2486066692097
Sat id4 Score: 88.37027679523956
Sat id6 Score: 84.2534206141141
Sat id9 Score: 97.58625074335609
Sat id16 Score: 97.56275639649355
Sat id20 Score: 69.60684568099455
Sat id22 Score: 78.80363248402986
Sat id24 Score: 98.43140073439147
Sat id25 Score: 86.00725492830382
Sat id26 Score: 74.21830819603487
Sat id27 Score: 88.23172641745224
Sat id28 Score: 77.2820265727567
Sat id29 Score: 98.27053854473118
Sat id32 Score: 83.59442840780787
Sat id34 Score: 99.79414773307438
Sat id35 Score: 69.79451992655976
Sat id36 Score: 99.9457485667806
Sat id37 Score: 19.090880787144883
Sat id38 Score: 89.37467905685916
Sat id39 Score: 99.36628143570793
Sat id40 Score: 99.97708084570915
Sat id41 Score: 88.81955389260887
Sat id42 Score: 99.59088078788255
Sat id44 Score: 97.19244436504096
Sat id45 Score: 85.68205332442233
Sat id49 Score: 98.37982572323737
Sat id51 Score: 99.64157924542634
Sat id52 Score: 90.1950

Sat id471 Score: 86.5965770544782
Sat id473 Score: 50.217199573394126
Sat id474 Score: 98.40483469209333
Sat id475 Score: 98.84669208867261
Sat id476 Score: 85.65045371613513
Sat id477 Score: 86.6365685892279
Sat id479 Score: 99.60819607510157
Sat id480 Score: 90.34122388747443
Sat id481 Score: 77.55193472616877
Sat id482 Score: 90.55936392188897
Sat id483 Score: 82.86400794109011
Sat id486 Score: 90.67957221594585
Sat id488 Score: 99.90639222770817
Sat id489 Score: 99.05427913312401
Sat id491 Score: 85.0555464872765
Sat id495 Score: 92.30863632461224
Sat id498 Score: 80.72541000032577
Sat id499 Score: 99.40210081763976
Sat id502 Score: 79.19446706914133
Sat id504 Score: 73.8490377953637
Sat id505 Score: 88.14664916140885
Sat id506 Score: 99.74678469373312
Sat id508 Score: 82.19796883657534
Sat id509 Score: 85.91561495470525
Sat id510 Score: 94.44259020188838
Sat id511 Score: 86.61174712072783
Sat id514 Score: 24.57620362335523
Sat id515 Score: 54.64673745874522
Sat id516 Score: 96.604

### Visualizing predictions

In [11]:
import plotly.express as px
import plotly.graph_objects as go

In [31]:
SAT_ID = ID = np.random.choice(train['sat_id'].unique(), 1)[0]
data = train[train['sat_id'] == ID].reset_index(drop = True)
satTrain = train[train['sat_id'] == SAT_ID]
satTest = test[test['sat_id'] == SAT_ID]

satPredicted = submissionValid[submissionValid['sat_id'] == SAT_ID]

print(f'ID: {SAT_ID}, Total observations: {satTrain.shape[0]}; In test {satTest.shape[0]}')

ID: 522, Total observations: 631; In test 570


In [32]:
# Зависимость координат и симулированных координат от времени

x,y,z = satTrain.loc[:, 'x'].values, satTrain.loc[:, 'y'].values, satTrain.loc[:, 'z'].values
xSim,ySim,zSim = satTrain.loc[:, 'x_sim'].values, satTrain.loc[:, 'y_sim'].values, satTrain.loc[:, 'z_sim'].values

xTestSim, yTestSim, zTestSim = satTest.loc[:, 'x_sim'].values, satTest.loc[:, 'y_sim'].values, satTest.loc[:, 'z_sim'].values

xPredicted, yPredicted, zPredicted = satPredicted.loc[:, 'x'].values, satPredicted.loc[:, 'y'].values, satPredicted.loc[:, 'z'].values
fig1 = go.Scatter3d(x=x, 
                    y=y, 
                    z=z, 
                    mode='markers', 
                    marker=dict(
                        #color='rgb(127, 127, 127)',
                        color = np.arange(satTrain.shape[0]),
                        colorscale = 'Viridis',
                        showscale = True,
                        size=8, 
                        symbol='circle', 
                        line=dict(
                            color='rgb(204, 204, 204)',
                            width=0.5),
                        opacity=0.9),
                   name = 'Real')
fig2 = go.Scatter3d(x=xSim, 
                    y=ySim, 
                    z=zSim, 
                    mode = 'markers', 
                    marker=dict(
                        size=8,
                        line=dict(
                            color='rgba(217, 217, 217, 0.14)',
                            width=0.5
                        ),
                    opacity=0.8),
                    name = 'Simulated')

fig3 = go.Scatter3d(x=xTestSim, 
                    y=yTestSim, 
                    z=zTestSim, 
                    mode = 'markers', 
                    marker=dict(
                        size=8,
                        line=dict(
                            color='rgba(255, 87, 123, 0.05)',
                            width=0.5
                        ),
                    opacity=0.8),
                    name = 'SimulatedTest')


fig4 = go.Scatter3d(x=xPredicted, 
                    y=yPredicted, 
                    z=zPredicted, 
                    mode = 'markers', 
                    marker=dict(
                        size=8,
                        line=dict(
                            color='rgba(255, 57, 0, 0.05)',
                            width=0.5
                        ),
                    opacity=0.8),
                    name = 'Prediction')

fig = go.Figure(data = [fig1, fig2, fig3, fig4])
fig.show()

In [33]:
from sklearn.cluster import KMeans, DBSCAN
from sklearn.preprocessing import StandardScaler, MinMaxScaler
RND_SEED = 44
model = KMeans(n_clusters=24, tol = 1e-3, algorithm = 'elkan', random_state=RND_SEED)
#model = DBSCAN(eps = 1000)
labels = model.fit_predict(satTrain[['x_sim', 'y_sim', 'z_sim', 'Vx_sim', 'Vy_sim', 'Vz_sim']])
print('Num unique labels: ', len(np.unique(labels)))


# Отобразим все x,y,z, xsim, ysim, zsim, относящиеся к одному кластеру
CLUSTER = 1
CLUSTER2 = 1
CLUSTER3 = 1
CLUSTER4 = 1
clusterDataTrain = satTrain[(labels == CLUSTER) | (labels == CLUSTER2) | (labels == CLUSTER3) | (labels == CLUSTER4)]
 
testLabels = model.predict(satTest[['x_sim', 'y_sim', 'z_sim', 'Vx_sim', 'Vy_sim', 'Vz_sim']])
clusterDataTest = satTest[(testLabels == CLUSTER)| (testLabels == CLUSTER2) | (testLabels == CLUSTER3) | (testLabels == CLUSTER4)]
lenPredicted = len(labels) - len(satPredicted)
clusterDataPredicted = satPredicted[(labels[lenPredicted:] == CLUSTER)| (labels[lenPredicted:] == CLUSTER2) | (labels[lenPredicted:] == CLUSTER3) | (labels[lenPredicted:] == CLUSTER4)]

# Зависимость координат и симулированных координат от времени

x,y,z = clusterDataTrain.loc[:, 'x'].values, clusterDataTrain.loc[:, 'y'].values, clusterDataTrain.loc[:, 'z'].values
xSim,ySim,zSim = clusterDataTrain.loc[:, 'x_sim'].values, clusterDataTrain.loc[:, 'y_sim'].values, clusterDataTrain.loc[:, 'z_sim'].values

xTestSim, yTestSim, zTestSim = clusterDataTest.loc[:, 'x_sim'].values, clusterDataTest.loc[:, 'y_sim'].values, clusterDataTest.loc[:, 'z_sim'].values
xPredicted, yPredicted, zPredicted = clusterDataPredicted.loc[:, 'x'].values, clusterDataPredicted.loc[:, 'y'].values, clusterDataPredicted.loc[:, 'z'].values
fig1 = go.Scatter3d(x=x, 
                    y=y, 
                    z=z, 
                    mode='markers', 
                    marker=dict(
                        #color='rgb(127, 127, 127)',
                        color = np.arange(clusterDataTrain.shape[0]),
                        colorscale = 'Viridis',
                        showscale = True,
                        size=8, 
                        symbol='circle', 
                        line=dict(
                            color='rgb(204, 204, 204)',
                            width=0.5),
                        opacity=0.9),
                   name = 'Real')
fig2 = go.Scatter3d(x=xSim, 
                    y=ySim, 
                    z=zSim, 
                    mode = 'markers', 
                    marker=dict(
                        size=8,
                        line=dict(
                            color='rgba(217, 217, 217, 0.14)',
                            width=0.5
                        ),
                    opacity=0.8),
                    name = 'Simulated')

fig3 = go.Scatter3d(x=xTestSim, 
                    y=yTestSim, 
                    z=zTestSim, 
                    mode = 'markers', 
                    marker=dict(
                        size=8,
                        line=dict(
                            color='rgba(255, 87, 123, 0.05)',
                            width=0.5
                        ),
                    opacity=0.8),
                    name = 'SimulatedTest')

fig4 = go.Scatter3d(x=xPredicted, 
                    y=yPredicted, 
                    z=zPredicted, 
                    mode = 'markers', 
                    marker=dict(
                        size=8,
                        line=dict(
                            color='rgba(255, 57, 0, 0.05)',
                            width=0.5
                        ),
                    opacity=0.8),
                    name = 'Prediction')

fig = go.Figure(data = [fig1, fig2, fig3, fig4])
fig.show()

Num unique labels:  24
