In [3]:
# import os and define graphic card
import os
os.environ["OMP_NUM_THREADS"] = "1"

# import common libraries
import gc
import random
import argparse
import pandas as pd
import numpy as np
from functools import partial

# import pytorch related libraries
import torch
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.optim.optimizer import Optimizer
from torch.utils.data import TensorDataset, DataLoader,Dataset
from torch.utils.data.sampler import RandomSampler, SequentialSampler
from torch.optim.lr_scheduler import StepLR, ReduceLROnPlateau, CosineAnnealingLR, _LRScheduler
from tensorboardX import SummaryWriter
from pytorch_pretrained_bert.optimization import BertAdam
from transformers import get_linear_schedule_with_warmup

# import apex for mix precision training
from apex import amp
from apex.parallel import DistributedDataParallel as DDP
from apex.optimizers import FusedAdam

# import dataset class
from dataset.dataset import *

# import utils
from utils.ranger import *
from utils.lrs_scheduler import * 
from utils.loss_function import *
from utils.metric import *
from utils.file import *

# import model
from model.model_bert import *

matplotlib.get_backend :  TkAgg


In [4]:
model = QuestNet(model_type="albert-xxlarge-v2", n_classes=30, hidden_layers=[-1, -3, -5, -7, -9])

W0127 18:33:32.079421 139937887930112 modeling_utils.py:326] There is currently an upstream reproducibility issue with ALBERT v2 models. Please see https://github.com/google-research/google-research/issues/119 for more information.
I0127 18:33:32.231390 139937887930112 file_utils.py:331] https://s3.amazonaws.com/models.huggingface.co/bert/albert-xxlarge-v2-config.json not found in cache or force_download set to True, downloading to /tmp/tmp8gti1egu


HBox(children=(IntProgress(value=0, description='Downloading', max=505, style=ProgressStyle(description_width=…

I0127 18:33:32.473910 139937887930112 file_utils.py:346] copying /tmp/tmp8gti1egu to cache at /home/jionie/.cache/torch/transformers/b3eed512e24335a76694282193217608ead013caa55330de3ff236d1f5695e6c.98f2df4a57b10458e65046f312a110b53115e56294bb20da7599630d708641dc
I0127 18:33:32.474906 139937887930112 file_utils.py:350] creating metadata file for /home/jionie/.cache/torch/transformers/b3eed512e24335a76694282193217608ead013caa55330de3ff236d1f5695e6c.98f2df4a57b10458e65046f312a110b53115e56294bb20da7599630d708641dc
I0127 18:33:32.476576 139937887930112 file_utils.py:359] removing temp file /tmp/tmp8gti1egu
I0127 18:33:32.477216 139937887930112 configuration_utils.py:160] loading configuration file https://s3.amazonaws.com/models.huggingface.co/bert/albert-xxlarge-v2-config.json from cache at /home/jionie/.cache/torch/transformers/b3eed512e24335a76694282193217608ead013caa55330de3ff236d1f5695e6c.98f2df4a57b10458e65046f312a110b53115e56294bb20da7599630d708641dc
I0127 18:33:32.478204 13993788793




HBox(children=(IntProgress(value=0, description='Downloading', max=892728632, style=ProgressStyle(description_…

I0127 18:34:08.535514 139937887930112 file_utils.py:346] copying /tmp/tmpeybmw8gz to cache at /home/jionie/.cache/torch/transformers/c8f990f22da3ddf461b7e0d30a079014b20ad2859f352a9f18421485f63a69e7.9ac42d6fae7d18840d74eaf2a6d817700ffdd5af9ae1a12c3e96e239e23f76f4





I0127 18:34:09.045233 139937887930112 file_utils.py:350] creating metadata file for /home/jionie/.cache/torch/transformers/c8f990f22da3ddf461b7e0d30a079014b20ad2859f352a9f18421485f63a69e7.9ac42d6fae7d18840d74eaf2a6d817700ffdd5af9ae1a12c3e96e239e23f76f4
I0127 18:34:09.045813 139937887930112 file_utils.py:359] removing temp file /tmp/tmpeybmw8gz
I0127 18:34:09.114085 139937887930112 modeling_utils.py:401] loading weights file https://s3.amazonaws.com/models.huggingface.co/bert/albert-xxlarge-v2-pytorch_model.bin from cache at /home/jionie/.cache/torch/transformers/c8f990f22da3ddf461b7e0d30a079014b20ad2859f352a9f18421485f63a69e7.9ac42d6fae7d18840d74eaf2a6d817700ffdd5af9ae1a12c3e96e239e23f76f4


In [5]:
model.cuda()

QuestNet(
  (albert_model): AlbertModel(
    (embeddings): AlbertEmbeddings(
      (word_embeddings): Embedding(30000, 128, padding_idx=0)
      (position_embeddings): Embedding(512, 128)
      (token_type_embeddings): Embedding(2, 128)
      (LayerNorm): LayerNorm((128,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): AlbertTransformer(
      (embedding_hidden_mapping_in): Linear(in_features=128, out_features=4096, bias=True)
      (albert_layer_groups): ModuleList(
        (0): AlbertLayerGroup(
          (albert_layers): ModuleList(
            (0): AlbertLayer(
              (full_layer_layer_norm): LayerNorm((4096,), eps=1e-12, elementwise_affine=True)
              (attention): AlbertAttention(
                (query): Linear(in_features=4096, out_features=4096, bias=True)
                (key): Linear(in_features=4096, out_features=4096, bias=True)
                (value): Linear(in_features=4096, out_features=4096, bias=T