In [35]:
import numpy as np
import os
import pandas as pd
import pytorch_lightning as pl
import torch
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.models as models
import comet_ml

from argparse import Namespace
from torch import nn
from torch.utils.data import Dataset, DataLoader, random_split, Subset

# working directory
ROOT_DIR = '.'
DATA_DIR = f'{ROOT_DIR}/data'
CHECKPOINT_DIR = 'd:/checkpoints/earnings-call'
CHECKPOINT_TEMP_DIR = f'{ROOT_DIR}/checkpoint/earnings-call/temp'

# Base

## `Dataset`

> `val` and `train` are of same period

In [36]:
# Dataset: Txt + Fin-ratio
class CCDataset(Dataset):
    
    def __init__(self, split_window, split_type, text_in_dataset, roll_type, print_window, preembeddings, targets_df, split_df, valid_transcriptids=None):
        '''
        Args:
            preembeddings (from globals): list of embeddings. Each element is a tensor (S, E) where S is number of sentences in a call
            targets_df (from globals): DataFrame of targets variables.
            split_df (from globals):
            split_window: str. e.g., "roll-09"
            split_type: str. 'train' or 'test'
            text_only: only output CAR and transcripts if true, otherwise also output financial ratios
            transcriptids: list. If provided, only the given transcripts will be used in generating the Dataset. `transcriptids` is applied **on top of** `split_window` and `split_type`
        '''

        # get split dates from `split_df`
        _, train_start, train_end, test_start, test_end, _ = tuple(split_df.loc[(split_df.window==split_window) & (split_df.roll_type==roll_type)].iloc[0])
        # print current window
        if print_window:
            print(f'Current window: {split_window} ({roll_type}) \n(train: {train_start} to {train_end}) (test: {test_start} to {test_end})')
        
        train_start = datetime.strptime(train_start, '%Y-%m-%d').date()
        train_end = datetime.strptime(train_end, '%Y-%m-%d').date()
        test_start = datetime.strptime(test_start, '%Y-%m-%d').date()
        test_end = datetime.strptime(test_end, '%Y-%m-%d').date()
        
        # select valid transcriptids (preemb_keys) according to split dates 
        if split_type=='train':
            transcriptids = targets_df[targets_df.ciq_call_date.between(train_start, train_end)].transcriptid.sample(frac=1, random_state=42).tolist()
            transcriptids = transcriptids[:int(len(transcriptids)*0.9)]
            
        if split_type=='val':
            transcriptids = targets_df[targets_df.ciq_call_date.between(train_start, train_end)].transcriptid.sample(frac=1, random_state=42).tolist()
            transcriptids = transcriptids[int(len(transcriptids)*0.9):]

        elif split_type=='test':
            transcriptids = targets_df[targets_df.ciq_call_date.between(test_start, test_end)].transcriptid.tolist()

        self.valid_preemb_keys = set(transcriptids).intersection(set(preembeddings.keys()))
        
        if valid_transcriptids is not None:
            self.valid_preemb_keys = self.valid_preemb_keys.intersection(set(valid_transcriptids))
        
        # self attributes
        self.text_in_dataset = text_in_dataset
        if text_in_dataset:
            self.preembeddings = preembeddings
        self.targets_df = targets_df
        self.sent_len = sorted([(k, preembeddings[k].shape[0]) for k in self.valid_preemb_keys], key=itemgetter(1))
        self.train_start = train_start
        self.train_end = train_end
        self.test_start = test_start
        self.test_end = test_end
        self.n_samples = len(self.sent_len)
        self.split_window = split_window
        self.split_type = split_type
        
    def __len__(self):
        return (len(self.valid_preemb_keys))
    
    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
            
        transcriptid = self.sent_len[idx][0]
        targets = self.targets_df[self.targets_df.transcriptid==transcriptid].iloc[0]
        
        # all of the following targests are
        # of type `numpy.float64`
        docid = targets.docid
        
        sue = targets.sue
        sest = targets.sest
        car_0_30 = targets.car_0_30
        car_0_30_norm = targets.car_0_30_norm
        revision = targets.revision
        revision_norm = targets.revision_norm
        inflow = targets.inflow
        inflow_norm = targets.inflow_norm
        
        alpha = targets.alpha
        volatility = targets.volatility
        mcap = targets.mcap/1e6
        bm = targets.bm
        roa = targets.roa
        debt_asset = targets.debt_asset
        numest = targets.numest
        smedest = targets.smedest
        sstdest = targets.sstdest
        car_m1_m1 = targets.car_m1_m1
        car_m2_m2 = targets.car_m2_m2
        car_m30_m3 = targets.car_m30_m3
        volume = targets.volume
        
        if self.text_in_dataset:
            # inputs: preembeddings
            embeddings = self.preembeddings[transcriptid]
            
            return car_0_30, car_0_30_norm, inflow, inflow_norm, revision, revision_norm, \
                   transcriptid, embeddings, \
                   [alpha, car_m1_m1, car_m2_m2, car_m30_m3, sest, sue, numest, sstdest, smedest, mcap, roa, bm, debt_asset, volatility, volume]
        else:
            return docid, \
                   torch.tensor(car_0_30,dtype=torch.float32), \
                   torch.tensor(car_0_30_norm,dtype=torch.float32), \
                   torch.tensor([alpha, car_m1_m1, car_m2_m2, car_m30_m3, sest, sue, numest, sstdest, smedest, mcap, roa, bm, debt_asset, volatility, volume], dtype=torch.float32)

## `Model`

In [37]:
# Model: position encoder
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, dropout=0.1, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)

        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-np.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        
        # pe: (max_len, 1, d_model)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:x.size(0), :] # (S, N, E)
        return self.dropout(x)
    
# Model: Base
class CC(pl.LightningModule):
    def __init__(self, hparams):
        super().__init__()
        
        self.hparams = Namespace(**hparams)
        # self.text_in_dataset will be filled during instanciating.
        
        global preembeddings, targets_df, split_df
        self.preembeddings = preembeddings
        self.targets_df = targets_df
        self.split_df = split_df

    # forward
    def forward(self):
        pass
    
    # loss
    def mse_loss(self, y, t):
        return F.mse_loss(y, t)
        
    # validation step
    def validation_epoch_end(self, outputs):
        mse = torch.stack([x['val_loss'] for x in outputs]).mean()
        rmse = torch.sqrt(mse)
        
        log_dict = {'val_rmse': rmse}
        
        if 'val_loss_car' in outputs[0]:
            rmse_car = torch.sqrt(torch.stack([x['val_loss_car'] for x in outputs]).mean())
            log_dict['val_rmse_car'] = rmse_car
            
        if 'val_loss_inflow' in outputs[0]:
            rmse_inflow = torch.sqrt(torch.stack([x['val_loss_inflow'] for x in outputs]).mean())
            log_dict['val_rmse_inflow'] = rmse_inflow

        if 'val_loss_revision' in outputs[0]:
            rmse_revision = torch.sqrt(torch.stack([x['val_loss_revision'] for x in outputs]).mean())
            log_dict['val_rmse_revision'] = rmse_revision

        return {'val_loss': mse, 'log': log_dict}
    
    # test step
    def test_epoch_end(self, outputs):
        mse = torch.stack([x['test_loss'] for x in outputs]).mean()
        rmse = torch.sqrt(mse)
        
        log_dict = {'test_rmse': rmse}
        
        if 'test_loss_car' in outputs[0]:
            rmse_car = torch.sqrt(torch.stack([x['test_loss_car'] for x in outputs]).mean())
            log_dict['test_rmse_car'] = rmse_car

        if 'test_loss_inflow' in outputs[0]:
            rmse_inflow = torch.sqrt(torch.stack([x['test_loss_inflow'] for x in outputs]).mean())
            log_dict['test_rmse_inflow'] = rmse_inflow
            
        if 'test_loss_revision' in outputs[0]:
            rmse_revision = torch.sqrt(torch.stack([x['test_loss_revision'] for x in outputs]).mean())
            log_dict['test_rmse_revision'] = rmse_revision
            
        return {'test_loss': mse, 'log': log_dict, 'progress_bar':log_dict}
    
    # Dataset
    def prepare_data(self):
        
        self.train_dataset = CCDataset(self.hparams.window, split_type='train', text_in_dataset=self.text_in_dataset,
                                       roll_type=self.hparams.roll_type, print_window=True, preembeddings=self.preembeddings,
                                       targets_df=self.targets_df, split_df=self.split_df)
        self.val_dataset = CCDataset(self.hparams.window, split_type='val', text_in_dataset=self.text_in_dataset,
                                     roll_type=self.hparams.roll_type, print_window=False, preembeddings=self.preembeddings,
                                       targets_df=self.targets_df, split_df=self.split_df)
        self.test_dataset = CCDataset(self.hparams.window, split_type='test', text_in_dataset=self.text_in_dataset, 
                                      roll_type=self.hparams.roll_type, print_window=False, preembeddings=self.preembeddings,
                                       targets_df=self.targets_df, split_df=self.split_df)

    # DataLoader
    def train_dataloader(self):
        # Caution:
        # - If you enable `BatchNorm`, then must set `drop_last=True`.

        collate_fn = self.collate_fn if self.text_in_dataset else None
        return DataLoader(self.train_dataset, batch_size=self.hparams.batch_size, shuffle=True, drop_last=True, num_workers=0, pin_memory=True, collate_fn=collate_fn)
    
    def val_dataloader(self):
        # Caution: 
        # - To improve the validation speed, I'll set val_batch_size to 4. 
        # - Must set `drop_last=True`, otherwise the `val_loss` tensors for different batches won't match and hence give you error.
        # - Not to set `val_batch_size` too large (e.g., 16), otherwise you'll lose precious validation data points
        
        collate_fn = self.collate_fn if self.text_in_dataset else None
        return DataLoader(self.val_dataset, batch_size=self.hparams.val_batch_size, num_workers=0, pin_memory=True, collate_fn=collate_fn, drop_last=True)

    def test_dataloader(self):
        collate_fn = self.collate_fn if self.text_in_dataset else None
        return DataLoader(self.test_dataset, num_workers=0, pin_memory=True, collate_fn=collate_fn)
    
    def collate_fn(self, data):
        '''create mini-batch

        Retures:
            embeddings: tensor, (N, S, E)
            mask: tensor, (N, S)
            sue,car,selead,sest: tensor, (N,)
        '''
        
        # embeddings: (N, S, E)
        car_0_30, car_0_30_norm, inflow, inflow_norm, revision, revision_norm, \
        transcriptid, embeddings, \
        fin_ratios = zip(*data)
        
        # pad sequence
        # the number of `padding_value` is irrelevant, since we'll 
        # apply a mask in the Transformer encoder, which will 
        # eliminate the padded positions.
        valid_seq_len = [emb.shape[-2] for emb in embeddings]
        embeddings = pad_sequence(embeddings, batch_first=True, padding_value=0) # (N, T, E)

        # mask: (N, T)
        mask = torch.ones((embeddings.shape[0], embeddings.shape[1]))
        for i, length in enumerate(valid_seq_len):
            mask[i, :length] = 0
        mask = mask == 1
        
        return torch.tensor(car_0_30, dtype=torch.float32), torch.tensor(car_0_30_norm, dtype=torch.float32), \
               torch.tensor(inflow, dtype=torch.float32), torch.tensor(inflow_norm, dtype=torch.float32), \
               torch.tensor(revision, dtype=torch.float32), torch.tensor(revision_norm, dtype=torch.float32), \
               torch.tensor(transcriptid, dtype=torch.float32), embeddings.float(), mask, \
               torch.tensor(fin_ratios, dtype=torch.float32)
        
    # optimizer
    def configure_optimizers(self):
        optimizer = optim.Adam(self.parameters(), lr=self.hparams.learning_rate)
        return optimizer   

In [38]:
# helpers: load targets
def load_targets(targets_name):
    if 'targets_df' not in globals():
        print(f'Loading targets...@{Now()}')
        globals()['targets_df'] = pd.read_feather(f'{DATA_DIR}/{targets_name}.feather')
        print(f'Loading finished. @{Now()}')
        
# helpers: load preembeddings
def load_preembeddings(preembedding_type):
    if 'preembeddings' not in globals():
        print(f'Loading preembeddings...@{Now()}')
        globals()['preembeddings'] = torch.load(f"{DATA_DIR}/embeddings/preembeddings_{preembedding_type}.pt")
        print(f'Loading finished. @{Now()}')
        
# helpers: load split_df
def load_split_df(roll_type):
    split_df = pd.read_csv(f'{DATA_DIR}/split_dates.csv')
    globals()['split_df'] = split_df.loc[split_df.roll_type==roll_type]

In [39]:
model_hparams = {
    'preembedding_type': 'all_sbert_roberta_nlistsb_encoded', # key!
    'targets_name': 'f_sue_keydevid_car_finratio_vol_transcriptid_sim_inflow_revision_text_norm', # key!
    'roll_type': '3y',  # key!
}    

# load split_df
load_split_df(model_hparams['roll_type'])
    
# load targets_df
load_targets(model_hparams['targets_name'])

# load preembeddings
load_preembeddings(model_hparams['preembedding_type'])

# Model

In [100]:
# (MTL, hardshare) x*car + (1-x)*inf ~ txt + fr
class CCTransformerMTLInfHard(CC):
    def __init__(self, hparams):
        # `self.hparams` will be created by super().__init__
        super().__init__(hparams)
        
        # specify model type
        self.model_type = 'TSFM'
        self.target_type = 'car+inf'
        self.feature_type = 'txt+fr'
        self.emb_share = 'hard'
        self.normalize_target = True
        
        self.attn_type = 'dotprod'
        self.text_in_dataset = True if self.feature_type!='fr' else False 
        self.n_covariate = 15
        
        # positional encoding
        self.encoder_pos = PositionalEncoding(self.hparams.d_model, self.hparams.attn_dropout)
        
        # encoder layers for input, expert, nonexpert
        encoder_layers_expert = nn.TransformerEncoderLayer(self.hparams.d_model, self.hparams.n_head_encoder, self.hparams.dff, self.hparams.attn_dropout)
        
        # atten layers
        self.attn_layers_car = nn.Linear(self.hparams.d_model, 1)
        self.attn_dropout_1 = nn.Dropout(self.hparams.attn_dropout)
        
        # Build Encoder
        self.encoder_expert = nn.TransformerEncoder(encoder_layers_expert, self.hparams.n_layers_encoder)
        
        # linear layer to produce final result
        self.linear_car_1 = nn.Linear(self.hparams.d_model, self.hparams.d_model)
        self.linear_car_2 = nn.Linear(self.hparams.d_model, self.hparams.final_tdim)
        self.linear_car_3 = nn.Linear(self.hparams.final_tdim+self.n_covariate, self.hparams.final_tdim+self.n_covariate)
        self.linear_car_4 = nn.Linear(self.hparams.final_tdim+self.n_covariate, self.hparams.final_tdim+self.n_covariate)
        self.linear_car_5 = nn.Linear(self.hparams.final_tdim+self.n_covariate, 1)
        
        self.linear_inflow = nn.Linear(self.hparams.final_tdim, 1)
        # self.linear_revision = nn.Linear(hparam.final_tdim, 1)
        
        # dropout for final fc layers
        self.final_dropout_1 = nn.Dropout(self.hparams.dropout)
        self.final_dropout_2 = nn.Dropout(self.hparams.dropout)
        self.final_dropout_3 = nn.Dropout(self.hparams.dropout)
        
        # layer normalization
        if self.hparams.normalize_layer:
            self.layer_norm = nn.LayerNorm(self.hparams.final_tdim+self.n_covariate)
            
        # batch normalization
        if self.hparams.normalize_batch:
            self.batch_norm = nn.BatchNorm1d(self.n_covariate)

    # forward
    def forward(self, embeddings, src_key_padding_mask, fin_ratios):
        
        bsz, embed_dim = embeddings.size(0), embeddings.size(2)
        
        # if S is longer than max_seq_len, cut
        embeddings = embeddings[:,:self.hparams.max_seq_len,] # (N, S, E)
        src_key_padding_mask = src_key_padding_mask[:,:self.hparams.max_seq_len] # (N, S)
        
        embeddings = embeddings.transpose(0, 1) # (S, N, E)
        
        # positional encoding
        x = self.encoder_pos(embeddings) # (S, N, E)
        
        # encode
        x_expert = self.encoder_expert(x, src_key_padding_mask=src_key_padding_mask).transpose(0,1) # (N, S, E)
        
        # multiply with attn
        x_attn = self.attn_dropout_1(F.softmax(self.attn_layers_car(x_expert), dim=1)) # (N, S, 1)
        x_expert = torch.bmm(x_expert.transpose(-1,-2), x_attn).squeeze(-1) # (N, E)
        
        # mix with covariate
        x_expert = self.final_dropout_1(F.relu(self.linear_car_1(x_expert))) # (N, E)
        x_expert = F.relu(self.linear_car_2(x_expert)) # (N, final_tdim)
        
        # batch normalization
        if self.hparams.normalize_batch:
            fin_ratio = self.batch_norm(fin_ratios)
        
        x_car = torch.cat([x_expert, fin_ratios], dim=-1) # (N, X + final_tdim) where X is the number of covariate (n_covariate)

        # ouput y
        y_inflow = self.linear_inflow(x_expert)
        
        x_car = self.final_dropout_2(F.relu(self.linear_car_3(x_car))) # (N, X + final_tdim)
        y_car = self.linear_car_5(x_car) # (N,1)
        
        # final output
        return y_car, y_inflow
    
    # traning step
    def training_step(self, batch, idx):
        car, car_norm, inflow, inflow_norm, revision, revision_norm, \
        transcriptid, embeddings, mask, \
        fin_ratios = batch
        
        # get batch size
        bsz = fin_ratios.size(0)
        
        # forward
        y_car, y_inflow = self.forward(embeddings, mask, fin_ratios) # (N, 1)
        
        # compute loss
        loss_car = self.mse_loss(y_car, car_norm.unsqueeze(-1)).unsqueeze(-1) # (1,)
        loss_inflow = self.mse_loss(y_inflow, inflow_norm.unsqueeze(-1)).unsqueeze(-1) # (1,)
        
        
        assert self.hparams.car_weight+self.hparams.inflow_weight==1, 'car_weight + inflow_weight != 1'
        
        loss = self.hparams.car_weight*loss_car + self.hparams.inflow_weight*loss_inflow
        
        # logging
        return {'loss': loss, 'log': {'train_loss': loss}}
        
    # validation step
    def validation_step(self, batch, idx):
        car, car_norm, inflow, inflow_norm, revision, revision_norm, \
        transcriptid, embeddings, mask, \
        fin_ratios = batch
        
        # get batch size
        bsz = fin_ratios.size(0)
        
        # forward
        y_car, y_inflow = self.forward(embeddings, mask, fin_ratios) # (N, 1)
        
        # compute loss
        loss_car = self.mse_loss(y_car, car_norm.unsqueeze(-1)).unsqueeze(-1) # (1,)
        loss_inflow = self.mse_loss(y_inflow, inflow_norm.unsqueeze(-1)).unsqueeze(-1) # (1,)
        
        loss = loss_car + loss_inflow
        
        # logging
        return {'val_loss': loss, 'val_loss_car': loss_car, 'val_loss_inflow': loss_inflow}

    # test step
    def test_step(self, batch, idx):
        car, car_norm, inflow, inflow_norm, revision, revision_norm, \
        transcriptid, embeddings, mask, \
        fin_ratios = batch
        
        # get batch size
        bsz = fin_ratios.size(0)
        
        # forward
        y_car, y_inflow = self.forward(embeddings, mask, fin_ratios) # (N, 1)
        
        # compute loss
        loss_car = self.mse_loss(y_car, car_norm.unsqueeze(-1)).unsqueeze(-1) # (1,)
        loss_inflow = self.mse_loss(y_inflow, inflow_norm.unsqueeze(-1)).unsqueeze(-1) # (1,)
        
        loss = loss_car + loss_inflow

        # logging
        return {'test_loss': loss, 'test_loss_car': loss_car, 'test_loss_inflow': loss_inflow}  

In [103]:
# STL-text-fr
class CCTransformerSTLTxtFr(CC):
    def __init__(self, hparams):
        # `self.hparams` will be created by super().__init__
        super().__init__(hparams)
        
        # specify model type
        self.model_type = 'TSFM'
        self.target_type = 'car'
        self.feature_type = 'txt+fr'
        self.normalize_target = True
        self.attn_type = 'dotprod'
        self.text_in_dataset = True if self.feature_type!='fr' else False 

        self.n_covariate = 15
        
        # positional encoding
        self.encoder_pos = PositionalEncoding(self.hparams.d_model, self.hparams.attn_dropout)
        
        # encoder layers for input, expert, nonexpert
        encoder_layers_expert = nn.TransformerEncoderLayer(self.hparams.d_model, self.hparams.n_head_encoder, self.hparams.dff, self.hparams.attn_dropout)
        
        # atten layers for SUE, CAR, SELEAD, SEST
        self.attn_layers_car = nn.Linear(self.hparams.d_model, 1)
        self.attn_dropout_1 = nn.Dropout(self.hparams.attn_dropout)
        
        # Build Encoder and Decoder
        self.encoder_expert = nn.TransformerEncoder(encoder_layers_expert, self.hparams.n_layers_encoder)
        
        # linear layer to produce final result
        self.fc_1 = nn.Linear(self.hparams.final_tdim+self.n_covariate, self.hparams.final_tdim+self.n_covariate)
        self.fc_2 = nn.Linear(self.hparams.final_tdim+self.n_covariate, 1)
        
        # dropout for final fc layers
        self.fc_dropout_1 = nn.Dropout(self.hparams.dropout)
        
        # layer normalization
        if self.hparams.normalize_layer:
            self.layer_norm = nn.LayerNorm(self.hparams.final_tdim+self.n_covariate)
            
        # batch normalization
        if self.hparams.normalize_batch:
            self.batch_norm = nn.BatchNorm1d(self.n_covariate)

    # forward
    def forward(self, embeddings, src_key_padding_mask, fin_ratios):
        
        bsz, embed_dim = embeddings.size(0), embeddings.size(2)
        
        # if S is longer than max_seq_len, cut
        embeddings = embeddings[:,:self.hparams.max_seq_len,] # (N, S, E)
        src_key_padding_mask = src_key_padding_mask[:,:self.hparams.max_seq_len] # (N, S)
        
        embeddings = embeddings.transpose(0, 1) # (S, N, E)
        
        # positional encoding
        x = self.encoder_pos(embeddings) # (S, N, E)
        
        # encode
        x_expert = self.encoder_expert(x, src_key_padding_mask=src_key_padding_mask).transpose(0,1) # (N, S, E)
        
        # decode with attn
        x_attn = self.attn_dropout_1(F.softmax(self.attn_layers_car(x_expert), dim=1)) # (N, S, 1)
        x_expert = torch.bmm(x_expert.transpose(-1,-2), x_attn).squeeze(-1) # (N, E)
        
        # batch norm fin-ratios
        if self.hparams.normalize_batch:
            fin_ratios = self.batch_norm(fin_ratios)
        
        x_final = torch.cat([x_expert, fin_ratios], dim=-1) # (N, E+X) where X is the number of covariate (n_covariate)

        # final FC
        x_final = self.fc_dropout_1(F.relu(self.fc_1(x_final))) # (N, E+X)
        y_car = self.fc_2(x_final) # (N, 1)
        
        # final output
        return y_car
    
    # traning step
    def training_step(self, batch, idx):
        car, car_norm, inflow, inflow_norm, revision, revision_norm, \
        transcriptid, embeddings, mask, \
        fin_ratios = batch
        
        # get batch size
        bsz = fin_ratios.size(0)
        
        # forward
        y_car = self.forward(embeddings, mask, fin_ratios) # (N, 1)

        # compute loss
        loss_car = self.mse_loss(y_car, car_norm.unsqueeze(-1)).unsqueeze(-1) # (1,)
        
        # logging
        return {'loss': loss_car, 'log': {'train_loss': loss_car}}
        
    # validation step
    def validation_step(self, batch, idx):
        car, car_norm, inflow, inflow_norm, revision, revision_norm, \
        transcriptid, embeddings, mask, \
        fin_ratios = batch
        
        # get batch size
        bsz = fin_ratios.size(0)

        # forward
        y_car = self.forward(embeddings, mask, fin_ratios) # (N, 1)

        # compute loss
        loss_car = self.mse_loss(y_car, car_norm.unsqueeze(-1)).unsqueeze(-1) # (1,)

        # logging
        return {'val_loss': loss_car}

    # test step
    def test_step(self, batch, idx):
        car, car_norm, inflow, inflow_norm, revision, revision_norm, \
        transcriptid, embeddings, mask, \
        fin_ratios = batch
        
        # get batch size
        bsz = fin_ratios.size(0)

        # forward
        y_car = self.forward(embeddings, mask, fin_ratios) # (N, 1)

        # compute loss
        loss_car = self.mse_loss(y_car, car_norm.unsqueeze(-1)).unsqueeze(-1) # (1,)

        # logging
        return {'test_loss': loss_car}  

In [112]:
model = CCTransformerMTLInfHard.load_from_checkpoint('D:\Checkpoints\earnings-call\MTL-01-3y-(0.5car+0.5inf~txt+fr)-hardshare-norm\TSFM_roll-01_epoch=9_v0.ckpt')

In [113]:
model.to_json()

AttributeError: 'CCTransformerMTLInfHard' object has no attribute 'to_json'

In [108]:
params.keys()

dict_keys(['epoch', 'global_step', 'pytorch-lightning_version', 'checkpoint_callback_best_model_score', 'checkpoint_callback_best_model_path', 'early_stop_callback_state_dict', 'optimizer_states', 'lr_schedulers', 'state_dict', 'hparams_name', 'hyper_parameters'])

In [None]:
torch

In [102]:
list(model.named_parameters())

[('attn_layers_car.weight',
  Parameter containing:
  tensor([[ 0.0046,  0.0055, -0.0117,  ...,  0.0156, -0.0165,  0.0100]])),
 ('attn_layers_car.bias',
  Parameter containing:
  tensor([0.0113])),
 ('encoder_expert.layers.0.self_attn.in_proj_weight',
  Parameter containing:
  tensor([[ 0.0253,  0.0111,  0.0024,  ..., -0.0128,  0.0160,  0.0131],
          [-0.0076,  0.0186,  0.0307,  ...,  0.0360,  0.0151, -0.0067],
          [ 0.0061, -0.0210, -0.0229,  ...,  0.0360,  0.0047,  0.0012],
          ...,
          [-0.0027, -0.0244,  0.0181,  ..., -0.0186,  0.0206, -0.0331],
          [-0.0326,  0.0123, -0.0187,  ..., -0.0085,  0.0278,  0.0149],
          [-0.0099, -0.0041,  0.0366,  ..., -0.0118,  0.0052, -0.0353]])),
 ('encoder_expert.layers.0.self_attn.in_proj_bias',
  Parameter containing:
  tensor([ 2.0374e-04, -1.1255e-03,  4.5907e-04,  ...,  7.6145e-05,
           6.2435e-04, -7.0612e-04])),
 ('encoder_expert.layers.0.self_attn.out_proj.weight',
  Parameter containing:
  tensor([[ 

In [57]:
len(children)
len(modules)

14

57

In [None]:
modules

In [83]:
class myNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.convBN =  nn.Sequential(nn.Conv2d(10,10,3), nn.BatchNorm2d(10))
        self.linear =  nn.Linear(10,2)

    def forward(self, x):
        pass

Net = myNet()

print("Printing children\n------------------------------")
print(list(Net.children()))
print("\n\nPrinting Modules\n------------------------------")
print(list(Net.modules()))

Printing children
------------------------------
[Sequential(
  (0): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1))
  (1): BatchNorm2d(10, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
), Linear(in_features=10, out_features=2, bias=True)]


Printing Modules
------------------------------
[myNet(
  (convBN): Sequential(
    (0): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1))
    (1): BatchNorm2d(10, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (linear): Linear(in_features=10, out_features=2, bias=True)
), Sequential(
  (0): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1))
  (1): BatchNorm2d(10, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
), Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1)), BatchNorm2d(10, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True), Linear(in_features=10, out_features=2, bias=True)]
