In [108]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [1]:
import random
import numpy as np
import pandas as pd
from tqdm import tqdm
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import StandardScaler

In [2]:
TRAIN_PATH = 'train.csv'
TEST_PATH = './Track_1/test.csv'

SEED = 42
HOLDOUT_SIZE = 0.15

np.random.seed(SEED)
random.seed(SEED)

In [3]:
def SMAPE(y_pred, y_true, id_s): 
    unique_ids = np.unique(id_s)
    
    def _smape(y_pred, y_true):
        return np.mean(np.abs((y_pred - y_true) / (np.abs(y_pred) + np.abs(y_true))))
    
    res = 0
    for sat_id in unique_ids:
        res += _smape(y_pred[id_s == sat_id], y_true[id_s == sat_id])
        
    return res / float(len(unique_ids))

In [4]:
def get_features(df):
    return df

In [5]:
train_df = pd.read_csv(TRAIN_PATH)
train_df['epoch'] = pd.to_datetime(train_df['epoch'])

train_df = get_features(train_df)

sc_x = StandardScaler()
sc_y = StandardScaler()

y_cols = ['x', 'y', 'z', 'Vx', 'Vy', 'Vz']
x_cols = train_df.columns.drop(y_cols + ['id', 'epoch', 'sat_id'])

train_df.loc[:, x_cols] = sc_x.fit_transform(train_df.loc[:, x_cols])
train_df.loc[:, y_cols] = sc_y.fit_transform(train_df.loc[:, y_cols])

train_df.head()

Unnamed: 0,id,epoch,sat_id,x,y,z,Vx,Vy,Vz,x_sim,y_sim,z_sim,Vx_sim,Vy_sim,Vz_sim
0,0,2014-01-01 00:00:00.000,0,-0.316586,0.495279,-1.000062,-0.343963,-1.466583,-0.955466,-0.311082,0.500388,-0.999704,-0.343891,-1.463405,-0.957448
1,1,2014-01-01 00:46:43.000,0,-0.384004,0.046689,-1.180357,-0.114443,-1.645311,-0.289528,-0.378567,0.051732,-1.180247,-0.114952,-1.642185,-0.291497
2,2,2014-01-01 01:33:26.001,0,-0.384438,-0.413691,-1.170018,0.105344,-1.558641,0.341756,-0.379212,-0.408885,-1.170478,0.104417,-1.556416,0.340189
3,3,2014-01-01 02:20:09.001,0,-0.328102,-0.82221,-0.999676,0.271375,-1.299216,0.8138,-0.323161,-0.817838,-1.000836,0.270316,-1.298146,0.813059
4,4,2014-01-01 03:06:52.002,0,-0.232434,-1.145158,-0.719729,0.376303,-0.970379,1.107995,-0.227778,-1.141341,-0.721533,0.37532,-0.970269,1.108234


In [6]:
# make holdout from 10% data
holdout_df = train_df.groupby('sat_id').tail(50)
train_df = pd.concat([train_df, holdout_df]).drop_duplicates(keep=False)

In [7]:
pred = []
for sat_id, df in tqdm(train_df.groupby('sat_id')):
    
    buf = []
    models = [KNeighborsRegressor(n_jobs=-1) for _ in range(6)]
    for i in range(6):
        models[i].fit(df.loc[:, x_cols].values, df.loc[:, y_cols[i]].values)
        
        val_df = holdout_df[holdout_df['sat_id'] == sat_id]
        buf.append(models[i].predict(val_df.loc[:, x_cols]))
    buf = np.array(buf).T
    pred.extend(buf)

100%|██████████| 600/600 [06:36<00:00,  1.49it/s]


In [8]:
pred = np.array(pred)

In [9]:
score = 100 * (1 - SMAPE(pred, holdout_df.loc[:, y_cols].values, holdout_df['sat_id'].values))
print(score)

92.4650882497159


## Make prediction

In [10]:
train_df = pd.read_csv(TRAIN_PATH)
train_df['epoch'] = pd.to_datetime(train_df['epoch'])

train_df = get_features(train_df)

sc_x = StandardScaler()
sc_y = StandardScaler()

y_cols = ['x', 'y', 'z', 'Vx', 'Vy', 'Vz']
x_cols = train_df.columns.drop(y_cols + ['id', 'epoch', 'sat_id'])

train_df.loc[:, x_cols] = sc_x.fit_transform(train_df.loc[:, x_cols])
train_df.loc[:, y_cols] = sc_y.fit_transform(train_df.loc[:, y_cols])

train_df.head()

Unnamed: 0,id,epoch,sat_id,x,y,z,Vx,Vy,Vz,x_sim,y_sim,z_sim,Vx_sim,Vy_sim,Vz_sim
0,0,2014-01-01 00:00:00.000,0,-0.316586,0.495279,-1.000062,-0.343963,-1.466583,-0.955466,-0.311082,0.500388,-0.999704,-0.343891,-1.463405,-0.957448
1,1,2014-01-01 00:46:43.000,0,-0.384004,0.046689,-1.180357,-0.114443,-1.645311,-0.289528,-0.378567,0.051732,-1.180247,-0.114952,-1.642185,-0.291497
2,2,2014-01-01 01:33:26.001,0,-0.384438,-0.413691,-1.170018,0.105344,-1.558641,0.341756,-0.379212,-0.408885,-1.170478,0.104417,-1.556416,0.340189
3,3,2014-01-01 02:20:09.001,0,-0.328102,-0.82221,-0.999676,0.271375,-1.299216,0.8138,-0.323161,-0.817838,-1.000836,0.270316,-1.298146,0.813059
4,4,2014-01-01 03:06:52.002,0,-0.232434,-1.145158,-0.719729,0.376303,-0.970379,1.107995,-0.227778,-1.141341,-0.721533,0.37532,-0.970269,1.108234


In [11]:
test_df = pd.read_csv(TEST_PATH)
test_df['epoch'] = pd.to_datetime(test_df['epoch'])

test_df = get_features(test_df)

test_df.loc[:, x_cols] = sc_x.transform(test_df.loc[:, x_cols])

In [12]:
pred = []
for sat_id, df in tqdm(train_df.groupby('sat_id')):
    pred_df = test_df[test_df['sat_id'] == sat_id]
    if len(pred_df) == 0:
        continue
    
    buf = []
    models = [KNeighborsRegressor(n_jobs=-1) for _ in range(6)]
    for i in range(6):
        models[i].fit(df.loc[:, x_cols].values, df.loc[:, y_cols[i]].values)
        
        buf.append(models[i].predict(pred_df.loc[:, x_cols]))
    buf = np.array(buf).T
    pred.extend(buf)

100%|██████████| 600/600 [03:19<00:00,  3.00it/s]


In [13]:
pred = np.array(pred)
pred = sc_y.inverse_transform(pred)

submit = pd.DataFrame(data=pred, columns=y_cols)
submit['id'] = test_df['id']
submit.head()
submit.loc[:, ['id'] + y_cols].to_csv("knn_submit.csv", index=None)