In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits import mplot3d
import seaborn as sns
import gc
from tqdm import tqdm_notebook

%matplotlib inline

PATH = '../'

In [2]:
%%time
train = pd.read_csv(PATH + 'train.csv')
test = pd.read_csv(PATH + 'Track 1/test.csv')
submission = pd.read_csv(PATH + 'Track 1/submission.csv')

Wall time: 2.56 s


### LB Score Calc

Для подсчета скора просто используйте функцию getLBScore

In [3]:
def smape(satellite_predicted_values, satellite_true_values): 
    # the division, addition and subtraction are pointwise 
    return np.mean(np.abs(satellite_predicted_values - satellite_true_values) / (np.abs(satellite_predicted_values) + np.abs(satellite_true_values)))

### Model training

In [4]:
training_columns = ['epoch', 'sat_id', 'x_sim', 'y_sim', 'z_sim', 'Vx_sim', 'Vy_sim', 'Vz_sim']
prediction_columns = ['x','y','z','Vx', 'Vy', 'Vz']
X = train.loc[:, training_columns]
y = train.loc[:, prediction_columns]
Xtest = test.loc[:, training_columns]

In [5]:
X.drop(columns = 'epoch', inplace = True)
Xtest.drop(columns = 'epoch', inplace = True)

In [6]:
X['r'] = np.sqrt(X.x_sim**2 + X.y_sim**2 + X.z_sim**2)
Xtest['r'] = np.sqrt(Xtest.x_sim**2 + Xtest.y_sim**2 + Xtest.z_sim**2)

X['phi'] = np.arctan(X.y_sim/X.x_sim)
Xtest['phi'] = np.arctan(Xtest.y_sim/Xtest.x_sim)

X['theta'] = np.arccos(X.z_sim/X.r)
Xtest['theta'] = np.arccos(Xtest.z_sim/Xtest.r)



X['Vr'] = np.sqrt(X.Vx_sim**2 + X.Vy_sim**2 + X.Vz_sim**2)
Xtest['Vr'] = np.sqrt(Xtest.Vx_sim**2 + Xtest.Vy_sim**2 + Xtest.Vz_sim**2)

X['Vphi'] = np.arctan(X.Vy_sim/X.Vx_sim)
Xtest['Vphi'] = np.arctan(Xtest.Vy_sim/Xtest.Vx_sim)

X['Vtheta'] = np.arccos(X.Vz_sim/X.Vr)
Xtest['Vtheta'] = np.arccos(Xtest.Vz_sim/Xtest.Vr)

In [21]:
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor
model = Ridge(alpha=0.01)
trainSize = 0.5
submissionValid = train.copy()[['id', 'sat_id', 'x', 'y', 'z', 'Vx', 'Vy', 'Vz']]
smp = []
SMP = []
nTargets = 6
for sat_id in tqdm_notebook(Xtest['sat_id'].unique()):
    if sat_id >= 0:
        smp = []
        satX = X[X['sat_id'] == sat_id]
        satX = pd.concat([satX, satX.shift(1).fillna(0), 
                          satX.shift(2).fillna(0), satX.shift(3).fillna(0),
                         satX.shift(4).fillna(0)], axis = 1)
        satY = y[X['sat_id'] == sat_id]
        
        size = int(satX.shape[0] * trainSize)
        Xtr, Xval = satX.iloc[:size, :], satX.iloc[size:, :]
        ytr, yval = satY.iloc[:size, :], satY.iloc[size:, :]
        for i in range(nTargets):
            model.fit(Xtr, ytr.iloc[:, i])
            ypred = model.predict(Xval)
            smp.append(smape(ypred, yval.iloc[:, i]))
            SMP.append(smape(ypred, yval.iloc[:, i]))
    print(sat_id, 100*(1-np.mean(smp)))
print('Final', 100*(1- np.mean(SMP)))

HBox(children=(IntProgress(value=0, max=300), HTML(value='')))

1 85.93369343769739
2 82.63898116753323
3 86.33780586444158
4 86.5424707826702
6 81.54826650915432
9 96.90944021243115
16 96.36710253823459
20 72.2333912552715
22 74.74436359633486
24 97.96406096831119
25 84.68127422550687
26 65.75740693390702
27 81.37643719789604
28 75.92104065627527
29 98.40160394166178
32 87.28123253698699
34 99.43928884384839
35 62.4943734191959
36 99.65900613982541
37 21.43118863960155
38 88.60596257656579
39 99.00295546548783
40 99.96207794277447
41 85.45721550707077
42 99.56543630055069
44 96.11453149734925
45 80.81780846352729
49 97.00335883681174
51 99.59913035702733
52 80.22468559597372
53 99.61102674500289
54 92.35242907407249
57 99.80881875447263
59 75.12552323950092
63 86.00749952913898
64 94.86179215027578
68 98.67615848680872
70 98.70439490717654
75 97.58087833168616
82 80.89009581890404
84 90.9018493582176
86 84.30390543393837
89 99.41860442106925
90 87.4355713380345
91 96.12115942113286
92 86.94700806837304
93 99.85424036425444
96 99.8332481020137
98 8

In [13]:
test

Unnamed: 0,id,sat_id,epoch,x_sim,y_sim,z_sim,Vx_sim,Vy_sim,Vz_sim
0,3927,1,2014-02-01T00:01:45.162,-13366.891347,-14236.753503,6386.774555,4.333815,-0.692764,0.810774
1,3928,1,2014-02-01T00:22:57.007,-7370.434039,-14498.771520,7130.411325,5.077413,0.360609,0.313402
2,3929,1,2014-02-01T00:44:08.852,-572.068654,-13065.289498,7033.794876,5.519106,2.012830,-0.539412
3,3930,1,2014-02-01T01:05:20.697,6208.945257,-9076.852425,5548.296900,4.849212,4.338955,-1.869600
4,3931,1,2014-02-01T01:26:32.542,10768.200284,-2199.706707,2272.014862,1.940505,6.192887,-3.167724
5,3932,1,2014-02-01T01:47:44.386,10811.062601,5601.275788,-1958.108672,-1.703730,5.680489,-3.254421
6,3933,1,2014-02-01T02:08:56.231,7207.568370,11719.756441,-5639.974363,-3.657275,3.897901,-2.482004
7,3934,1,2014-02-01T02:30:08.076,2087.178056,15604.788815,-8261.164504,-4.242309,2.279618,-1.661818
8,3935,1,2014-02-01T02:51:19.921,-3328.823919,17694.467481,-9937.160206,-4.211431,1.069290,-1.001519
9,3936,1,2014-02-01T03:12:31.766,-8521.041748,18460.550588,-10872.042252,-3.927017,0.180062,-0.490210


In [8]:
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.ensemble import AdaBoostRegressor
model = Ridge(alpha=0.01)
nTargets = 6
for sat_id in tqdm_notebook(Xtest['sat_id'].unique()):
    satX = X[X['sat_id'] == sat_id].drop(columns = ['sat_id'])
    satX = pd.concat([satX, satX.shift(1).fillna(0), 
                          satX.shift(2).fillna(0), satX.shift(3).fillna(0),
                         satX.shift(4).fillna(0)], axis = 1)
    satY = y[X['sat_id'] == sat_id]
    satXtest = Xtest[Xtest['sat_id'] == sat_id].drop(columns = ['sat_id'])
    satXtest = pd.concat([satXtest, satXtest.shift(1).fillna(0),satXtest.shift(2).fillna(0), satXtest.shift(3).fillna(0),
                         satXtest.shift(4).fillna(0)], axis = 1)
    
    for i in range(nTargets):
        model.fit(satX, satY.iloc[:, i])
        ypred = model.predict(satXtest)
        submission.loc[satXtest.index, submission.columns[i+1]] = ypred

HBox(children=(IntProgress(value=0, max=300), HTML(value='')))




In [9]:
submission.to_csv('submission.csv', index = None)