# Imports

In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import torch
from torch.utils.data import Dataset, DataLoader
import plotly.graph_objs as go
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from utils import CustomDataset, smape, spherical_from_cartesian
from models import BaselineNN, LSTM
from tqdm.notebook import tqdm
from IPython.display import clear_output, display
import ipywidgets as widgets

# Loading data

In [2]:
data = pd.read_csv('train.csv', parse_dates=['epoch'])

In [3]:
features = ['epoch', 'ro_sim', 'theta_sim', 'fi_sim', 'dro/dt_sim', 'dtheta/dt_sim', 'dfi/dt_sim']
targets = ['ro', 'theta', 'fi', 'dro/dt', 'dtheta/dt', 'dfi/dt']

In [4]:
data['epoch'] = data['epoch'].apply(lambda x: x.to_pydatetime().timestamp())
data['epoch'] = data['epoch'] - data['epoch'].min()

In [5]:
data = spherical_from_cartesian(data)
data[features] = data[features] / np.abs(data[features]).max(axis=0)
data[targets] = data[targets] / np.abs(data[targets]).max(axis=0)

# Separating satellites

In [6]:
sat_datas = []
data_grouped = data.groupby('sat_id')
for sat_data in data_grouped:
    sat_datas.append(sat_data[1].drop(['id', 'sat_id'], axis=1))
sat_datas_train = []
sat_datas_test = []
for sat_data in sat_datas:
    # Split data to train and test datasets
    sat_data_train, sat_data_test = train_test_split(sat_data, shuffle=False, test_size=0.25)
    sat_datas_train.append(sat_data_train)
    sat_datas_test.append(sat_data_test)

# Model

In [7]:
seq_len = 5
model = LSTM(14, seq_len=seq_len)
criterion = smape
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.5)
loss_widget = widgets.FloatProgress(min=0, max=1, step=0.01, description='Loss')
loss_widget.value = 0

# Train

In [8]:
display(loss_widget)
desc = 'satelline number'
model.train()
for train_data in tqdm(sat_datas_train, desc=desc, total=len(sat_datas_train)):
    x_train = train_data[features]
    y_train = train_data[targets]
    train_dataset = CustomDataset(x_train, y_train, seq_len=seq_len)
    train_dataloader = DataLoader(train_dataset, batch_size=10, shuffle=True)

    for epoch in range(10):  # TODO: adjust number of epoches
        for seq_train_x, train_y in train_dataloader:
            model.zero_grad()  # refresh gradients
            predictions = model(seq_train_x)
            loss = criterion(predictions, train_y) 
            loss_widget.value = loss
            loss.backward()  # compute gradients
            optimizer.step()  # update network parameters

FloatProgress(value=0.0, description='Loss', max=1.0)

HBox(children=(FloatProgress(value=0.0, description='satelline number', max=600.0, style=ProgressStyle(descrip…




# Test

In [None]:
model.eval()
display(loss_widget)
desc = 'sattelite number'
losses = []
for test_data in tqdm(sat_datas_test, desc=desc):
    x_test = test_data[features]
    y_test = test_data[targets]
    test_dataset = CustomDataset(x_test, y_test, seq_len=seq_len)
    test_dataloader = DataLoader(train_dataset, batch_size=10, shuffle=True)

    for epoch in range(10):  # TODO: adjust number of epoches
        for seq_train_x, train_y in train_dataloader:
            predictions = model(seq_train_x)
            loss = criterion(predictions, train_y) 
            loss_widget.value = loss
            losses.append(loss)
score = torch.tensor(losses).mean()
print(score)

In [None]:
model.hidden_cell[1].shape

In [None]:
model.eval()
y_pred = model(x_test)
after_train = criterion(y_pred.squeeze(), y_test) 
print('Test loss after Training' , after_train.item())

In [None]:
y_pred.shape