In [1]:
import os
import sys
import itertools
import pickle
from glob import glob
from tqdm import tqdm_notebook as tqdm

import numpy as np
import pandas as pd
from scipy.stats import spearmanr

from matplotlib import pyplot as plt
from matplotlib_venn import venn2, venn3
import seaborn as sns

import torch
from torch import nn, optim
from transformers import BertConfig, BertTokenizer, BertModel, BertForMaskedLM#, BertLayer, BertEmbeddings
from transformers import XLNetModel, GPT2Model, RobertaModel
from transformers.modeling_bert import BertLayer, BertEmbeddings

In [2]:
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 500)

# re-load functions
%load_ext autoreload
%autoreload 2

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))
%config InlineBackend.figure_formats = {'png', 'retina'}

In [3]:
DEVICE = 'cpu'

## save

In [5]:
bert_model = BertModel.from_pretrained('bert-base-uncased')
roberta_model = RobertaModel.from_pretrained('roberta-base')
xlnet_model = XLNetModel.from_pretrained('xlnet-base-cased')
gpt2_model = GPT2Model.from_pretrained('gpt2')

In [7]:
ls ../mnt/datasets/model_configs

bert-model-uncased-config.pkl  dataset-metadata.json


In [9]:
with open('../mnt/datasets/roberta-model-base-config.pkl', 'wb') as fout:
    pickle.dump(roberta_model.config, fout)
with open('../mnt/datasets/xlnet-model-base-cased-config.pkl', 'wb') as fout:
    pickle.dump(xlnet_model.config, fout)
with open('../mnt/datasets/gpt2-model-config.pkl', 'wb') as fout:
    pickle.dump(gpt2_model.config, fout)

In [12]:
with open('../mnt/datasets/model_configs/roberta-model-base-config.pkl', 'rb') as fin:
    config = pickle.load(fin)

In [13]:
model = RobertaModel(config)

In [14]:
model.to('cuda')

RobertaModel(
  (embeddings): RobertaEmbeddings(
    (word_embeddings): Embedding(50265, 768, padding_idx=1)
    (position_embeddings): Embedding(514, 768, padding_idx=1)
    (token_type_embeddings): Embedding(1, 768)
    (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): BertEncoder(
    (layer): ModuleList(
      (0): BertLayer(
        (attention): BertAttention(
          (self): BertSelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (output): BertSelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inpl

In [16]:
import transformers
transformers.__version__

'2.3.0'