In [1]:
base_dir = './'
fig_dir = './figures/'
model_dir = './models/'

In [2]:
import os
import sys

sys.path.insert(0, os.getcwd() + '\\src')

In [3]:
import matplotlib.pyplot as plt
from tqdm.notebook import trange

---

In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [5]:
print('CUDA available :', torch.cuda.is_available())
# device = torch.device('cpu')
device = torch.device('cuda')

CUDA available : True


# Data
- USD 제외

In [6]:
n_timestamps = 5
lag = 1

data_dir = f'./data/n_timestamps_{n_timestamps}__lag_{lag}/' + 'data_split.pt'

data_split = torch.load(data_dir)
train_data, train_pos_enc, train_dates = data_split['train'].values()
valid_data, valid_pos_enc, valid_dates = data_split['valid'].values()
test_data, test_pos_enc, test_dates = data_split['test'].values()

# Concat Date Positional Encoding
X_train = torch.cat((train_data, train_pos_enc), dim=-1)
X_valid = torch.cat((valid_data, valid_pos_enc), dim=-1)
X_test = torch.cat((test_data, test_pos_enc), dim=-1)

del data_split

In [7]:
N = X_train.shape[0] + X_valid.shape[0] + X_test.shape[0]
_, T, D = X_train.shape
print(f'Data shape : (*, {T}, {D})')
print(f'    - Train : {train_dates[0]} ~ {train_dates[-1]} ({X_train.__len__():>4d}, {X_train.__len__()/N*100:.2f}%)')
print(f'    - Valid : {valid_dates[0]} ~ {valid_dates[-1]} ({X_valid.__len__():>4d}, {X_valid.__len__()/N*100:.2f}%)')
print(f'    - Test  : {test_dates[0]} ~ {test_dates[-1]} ({X_test.__len__():>4d}, {X_test.__len__()/N*100:.2f}%)')

Data shape : (*, 5, 36)
    - Train : 2002-01-08 ~ 2015-05-07 (3412, 63.99%)
    - Valid : 2015-05-08 ~ 2018-09-03 ( 853, 16.00%)
    - Test  : 2018-09-04 ~ 2022-10-31 (1067, 20.01%)


# Modules

In [8]:
from manifold import *
from utils_layer import *

## Manifolds
- Manifolds
    - EuclideanSpace('euclidean')
    - Hypersphere('sphere')
    - PoincareHalfplane('p_plane')
    - PoincareBall('p_ball')
- Methods
    - .forward : output of auxiliary encoder -> local coordinates
    - .metric : local coordinates -> Riemannian metric
    - .distance : two points in local coordinates -> distance

## Encoder

In [9]:
from encoder import EncodingLayer

In [10]:
class Encoder(nn.Module):
    def __init__(self, dim_data, dim_embedding, target_manifold, 
                 layer_configs={}, device='cpu'):
        super().__init__()
        
        # Encoding Layer
        self.encoding_layer = EncodingLayer(dim_data, dim_embedding, layer_configs)
        
        # Target Manifold
        self.manifold = self._set_target_manifold(target_manifold, dim_embedding)
        
        # Misc.
        self.dim_data = dim_data
        self.dim_embedding = dim_embedding
        self.layer_configs = layer_configs
        
        self.device = device
        self.to(device)
        
    def forward(self, x):
        local_emb = self.encoding_layer(x.to(self.device))
        
        return local_emb
        
    def _set_target_manifold(self, target_manifold, dim_embedding):
        manifolds = {'euclidean':EuclideanSpace, 
                     'sphere':Hypersphere, 
                     'sphere_sp':HypersphereStereographic, 
                     'p_plane':PoincareHalfplane
                    }
        
        if target_manifold in manifolds.keys():
            return manifolds[target_manifold](dim_embedding)
        else:
            raise ValueError(f'Unknown value : "target_manifold={target_manifold}"\n    - Consider one of ["euclidean", "sphere", "sphere_sp", "p_plane"].')
        
    def distance(self, x1, x2):
        local_coords1 = self.manifold(x1.to(self.device))
        local_coords2 = self.manifold(x2.to(self.device))
        
        return self.manifold.distance(local_coords1, local_coords2)
    
    def pullback_metric(self, x):
        raise NotImplementedError('Method "pullback_metric" is not implemented yet.')
        
        local_coords = self.manifold(x.to(self.device))
        J = None
    
    def get_points_on_manifold(self, x):
        local_coords = self.manifold(x.to(self.device))
        points = self.manifold.local_to_manifold(local_coords)
        
        return points

# Trainer

In [11]:
from trainer import Trainer
from loss import ContrastiveLoss

In [12]:
from data_loader import DataLoader
from utils_layer import jacobian

In [13]:
# dim_data = (T, D)

# optim_configs = {'lr':1e-3}
# layer_configs = {}

# batch_size = 32
# n_epochs = 1000

In [14]:
# dim_embedding = 2**5
# target_manifold = 'sphere'

In [15]:
# model = Encoder(dim_data, dim_embedding, target_manifold=target_manifold,
#                 layer_configs=layer_configs, device=device)

# trainer = Trainer(model, ContrastiveLoss, model_dir=model_dir)

# loss_train, loss_valid = trainer.fit(X_train, batch_size, n_epochs, valid_data=X_valid, 
#                                      optim_configs=optim_configs
#                                     )

In [16]:
# plt.plot(loss_train, label='Train')
# plt.plot(loss_valid, label='Valid')
# plt.legend()
# plt.title(f'Target Manifold : {target_manifold.title()} of Dimension {dim_embedding}')
# plt.xlabel('epochs')
# plt.ylabel('contrastive loss')

In [17]:
# model = Encoder(dim_data, dim_embedding, target_manifold=target_manifold,
#                 layer_configs=layer_configs, device=device)

# loader = DataLoader(X_train, batch_size)

# batch = loader.get_batch(as_triplet=True)

In [18]:
# batch_ref = batch[0]
# J_ref = jacobian(model, batch_ref)

In [19]:
from itertools import product

In [20]:
dim_data = (T, D)

optim_configs = {'lr':5e-5}
layer_configs = {}

batch_size = 128
n_epochs = 1000

candidates = list(product(['sphere',], 
                          list(range(20, 52, 4))))

In [21]:
fig_dir = fig_dir + '221210_01/'
model_dir = model_dir + '221210_01/'

os.makedirs(fig_dir, exist_ok=True)

In [22]:
for target_manifold, dim_embedding in candidates:
    
    print('target_manifold :', target_manifold, ', dim_embedding :', dim_embedding)
    try:
        model = Encoder(dim_data, dim_embedding, target_manifold=target_manifold,
                        layer_configs=layer_configs, device=device)
        trainer = Trainer(model, ContrastiveLoss, model_dir=model_dir)

        loss_train, loss_valid = trainer.fit(X_train, batch_size, n_epochs, valid_data=X_valid, 
                                             optim_configs=optim_configs
                                            )
        #
        torch.save({'loss_train':loss_train, 'loss_valid':loss_valid},
                   model_dir + 'loss_history' + '_'.join(['Loss', model.manifold.name, f'dim-{model.dim_embedding}.pt'])
                   )
        
        #
        plt.plot(loss_train, label='Train', linewidth=.7)
        plt.plot(loss_valid, label='Valid', linewidth=.7, alpha=.7)
        plt.legend()
        plt.title(f'{model.manifold.name.title()} of dimension {dim_embedding}')
        plt.xlabel('epochs')
        plt.ylabel('loss')
        plt.savefig(fig_dir + target_manifold.title()+f'_dim_embedding_{dim_embedding}.png')
        plt.close()
    except:
        print('Failed : (target_manifold, dim_embedding) = ({}, {})'.format(target_manifold, dim_embedding))
        

target_manifold : sphere , dim_embedding : 20


  0%|          | 0/1000 [00:00<?, ?it/s]

target_manifold : sphere , dim_embedding : 24


  0%|          | 0/1000 [00:00<?, ?it/s]

target_manifold : sphere , dim_embedding : 28


  0%|          | 0/1000 [00:00<?, ?it/s]

target_manifold : sphere , dim_embedding : 32


  0%|          | 0/1000 [00:00<?, ?it/s]

target_manifold : sphere , dim_embedding : 36


  0%|          | 0/1000 [00:00<?, ?it/s]

target_manifold : sphere , dim_embedding : 40


  0%|          | 0/1000 [00:00<?, ?it/s]

target_manifold : sphere , dim_embedding : 44


  0%|          | 0/1000 [00:00<?, ?it/s]

target_manifold : sphere , dim_embedding : 48


  0%|          | 0/1000 [00:00<?, ?it/s]