# Imports

In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import torch
from torch.utils.data import Dataset, DataLoader
import plotly.graph_objs as go
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from utils import CustomDataset, smape, process, features, targets, spherical_from_cartesian
from models import BaselineNN, LSTM, ResNet18
from tqdm.notebook import tqdm
from IPython.display import clear_output, display
import ipywidgets as widgets
import os
import spaceopt

# Loading data

In [2]:
data = pd.read_csv('train.csv', parse_dates=['epoch'])

# Data processing

In [3]:
sat_datas_train, sat_datas_test = process(data)

In [4]:
sat_datas_train[0].head()

Unnamed: 0,epoch,x,y,z,Vx,Vy,Vz,x_sim,y_sim,z_sim,Vx_sim,Vy_sim,Vz_sim,ro_sim,theta_sim,fi_sim,dro/dt_sim,dtheta/dt_sim,dfi/dt_sim
0,-1.730756,-8855.823863,13117.780146,-20728.353233,-0.908303,-3.808436,-2.022083,-0.311082,0.500388,-0.999704,-0.343891,-1.463405,-0.957448,-0.259073,0.430197,-1.291078,-0.004444,-0.93709,0.747771
1,-1.727132,-10567.672384,1619.746066,-24451.813271,-0.30259,-4.272617,-0.612796,-0.378567,0.051732,-1.180247,-0.114952,-1.642185,-0.291497,-0.235384,0.511332,-1.908559,0.434614,-1.353263,0.125439
2,-1.723508,-10578.684043,-10180.46746,-24238.280949,0.277435,-4.047522,0.723155,-0.379212,-0.408885,-1.170478,0.104417,-1.556416,0.340189,-0.171059,0.47889,1.416777,0.757259,-1.006825,-0.538343
3,-1.719884,-9148.251857,-20651.43746,-20720.381279,0.7156,-3.373762,1.722115,-0.323161,-0.817838,-1.000836,0.270316,-1.298146,0.813059,-0.080953,0.373143,0.937656,0.927449,-0.680071,-0.604404
4,-1.71626,-6719.092336,-28929.061629,-14938.907967,0.992507,-2.519732,2.344703,-0.227778,-1.141341,-0.721533,0.37532,-0.970269,1.108234,0.020066,0.240443,0.55224,0.972421,-0.534778,-0.546743


# Model

In [34]:
seq_len = 20
hidden_dim = 200
model = LSTM(hidden_dim=hidden_dim, seq_len=seq_len)
optimizer = torch.optim.Adam(params=model.parameters(), lr=0.001, eps=1e-8)
criterion = smape

# Train

In [35]:
# train_data = sat_datas_train[0]
# x_train = train_data[features]
# y_train = train_data[targets]
# train_dataset = CustomDataset(x_train, y_train, seq_len=seq_len)
# batch_size=10
# train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
# seq_train_x, train_y = next(iter(train_dataloader))
# model.zero_grad()  # refresh gradients
# model.init_hidden_cell(input_dim=13, hidden_dim=100)
# predictions = model(seq_train_x)
# loss = criterion(predictions, train_y)

In [None]:
loss_widget = widgets.FloatProgress(min=0, max=1, step=0.01, description='Loss', value=0)  # jupyter widget
display(loss_widget)
desc = 'satellite number'
EPOCH = 10
if not os.path.exists('models'):
    os.makedirs('models')
for epoch in range(EPOCH):
    for i, train_data in tqdm(enumerate(sat_datas_train), desc='sattelite number',
                              total=len(sat_datas_train)):
        x_train = train_data[features]
        y_train = train_data[targets]
        train_dataset = CustomDataset(x_train, y_train, seq_len=seq_len)
        train_dataloader = DataLoader(train_dataset, batch_size=10, shuffle=True)
        for seq_train_x, seq_train_y in train_dataloader:
            model.zero_grad()  # refresh gradients
            model.init_hidden_cell()
            predictions = model(seq_train_x)
            loss = criterion(predictions, seq_train_y[:, -1, :])
            loss_widget.value = loss
            loss.backward()  # compute gradients
            optimizer.step()  # update network parameters
#     torch.save(model.state_dict(), f'models//lstm-3_{i}.pt')

FloatProgress(value=0.0, description='Loss', max=1.0)

HBox(children=(FloatProgress(value=0.0, description='sattelite number', max=600.0, style=ProgressStyle(descrip…

In [32]:
seq_train_y[:, -1, :].shape

torch.Size([10, 6])

In [None]:
search_space = {
    'lr': [0.001, 0.01, 0.1, 1.],
    'eps': [1e-8, 1e-6, 1e-4, 1e-2, 1.],
    'seq_len': [2, 3, 5, 10, 20, 30],
    'hidden_dim': [13, 26, 50, 100]
}

In [None]:
def evaluate(point):
    seq_len = point['seq_len']  # TODO: search
    criterion = smape
    loss_widget = widgets.FloatProgress(min=0, max=1, step=0.01, description='Loss', value=0)  # jupyter widget
    display(loss_widget)
    desc = 'satellite number'
    EPOCH = 50
    model = LSTM(point['hidden_dim'], seq_len=seq_len)  # TODO: search
    model.train()
    optimizer = torch.optim.Adam(model.parameters(), lr=point['lr'], eps=point['eps'])  # TODO: search
    train_data = next(iter(sat_datas_train))
    x_train = train_data[features]
    y_train = train_data[targets]
    train_dataset = CustomDataset(x_train, y_train, seq_len=seq_len)
    train_dataloader = DataLoader(train_dataset, batch_size=10, shuffle=True)
    for epoch in range(EPOCH):
        for seq_train_x, train_y in train_dataloader:
            model.zero_grad()  # refresh gradients
            predictions = model(seq_train_x)
            loss = criterion(predictions, train_y)
            loss_widget.value = loss
            loss.backward()  # compute gradients
            optimizer.step()  # update network parameters
    model.eval()
    score_widget = widgets.FloatProgress(min=0, max=1, step=0.01, description='Score', value=0)
    display(score_widget)
    loss_sum = 0
    i = 0
    test_data = next(iter(sat_datas_test))
    with torch.no_grad():
        x_test = test_data[features]
        y_test = test_data[targets]
        test_dataset = CustomDataset(x_test, y_test, seq_len=seq_len)
        test_dataloader = DataLoader(test_dataset, batch_size=10, shuffle=True)

        for seq_test_x, test_y in train_dataloader:
            predictions = model(seq_test_x)
            loss = criterion(predictions, test_y)
            loss_sum += loss
            i += 1
            score = 1 - loss_sum / i
            score_widget.value = score
            score_widget.description = f'Score: {round(score.item(), 2)}'
            
    return score.item()

In [None]:
from spaceopt import SpaceOpt

spaceopt = SpaceOpt(search_space=search_space,
                    target_name='score',
                    objective='max')

In [None]:
for iteration in range(200):
    clear_output()
    print(f'Iteration: {iteration}')
    if iteration < 20:
        spoint = spaceopt.get_random()   # exploration
    else:
        spoint = spaceopt.fit_predict()  # exploitation
    print(spoint)
    spoint['score'] = evaluate(spoint)
    spaceopt.append_evaluated_spoint(spoint)

In [None]:
train_data = next(iter(sat_datas_train))
x_train = train_data[features]
y_train = train_data[targets]
train_dataset = CustomDataset(x_train, y_train, seq_len=13)
train_dataloader = DataLoader(train_dataset, batch_size=10, shuffle=True)
for epoch in range(10):
    for seq_train_x, train_y in train_dataloader:
        print(seq_train_x.shape)
        break

# Test

In [None]:
model.eval()
score_widget = widgets.FloatProgress(min=0, max=1, step=0.01, description='Score', value=0)
display(score_widget)
loss_sum = 0
i = 0
with torch.no_grad():
    for test_data in tqdm(sat_datas_test, desc='sattelite number'):
        x_test = test_data[features]
        y_test = test_data[targets]
        test_dataset = CustomDataset(x_test, y_test, seq_len=seq_len)
        test_dataloader = DataLoader(test_dataset, batch_size=10, shuffle=True)

        for seq_test_x, test_y in train_dataloader:
            predictions = model(seq_test_x)
            loss = criterion(predictions, test_y)
            loss_sum += loss
            i += 1
            score = 1 - loss_sum / i
            score_widget.value = score
            score_widget.description = f'Score: {round(score.item(), 2)}'

In [None]:
torch.save(model.state_dict(), 'LSTM-5.pt')

In [None]:
model.eval()
y_pred = model(x_test)
after_train = criterion(y_pred.squeeze(), y_test) 
print('Test loss after Training' , after_train.item())

In [None]:
y_pred.shape