In [1]:
from IPython.core.interactiveshell import InteractiveShell

# pretty print all cell's output and not just the last one
InteractiveShell.ast_node_interactivity = "all"

# Currently Running:
DANN with lambda 5.0, dir 12

In [2]:
# External Lib imports
import os
import re
import html
import pickle
import argparse
import pandas as pd
from pathlib import Path
from functools import partial

os.environ['QT_QPA_PLATFORM'] = 'offscreen'

# FastAI Imports
from fastai import text, core, lm_rnn

# Torch imports
import torch.nn as nn
import torch.optim as optim

# Mytorch imports
from mytorch import loops, lriters as mtlr, dataiters as mtdi
from mytorch.utils.goodies import *

# Local imports
from options import Phase3 as params

# Configurations

In [None]:
QUICK, DEBUG, MODEL_NUM = False, True, '12'
PRETRAINED = True
UNSUP_MODEL_DIR = PATH / 'models' / MODEL_NUM
MODEL_SUFFIX = '_lowaux'

assert MODEL_SUFFIX in ['_lowaux', '_hightrn', ''], 'Incorrect Suffix given with which to load model'

In [3]:
device = torch.device('cuda')
np.random.seed(42)
torch.manual_seed(42)

'''
    Paths and macros
'''
# Path fields
BOS = 'xbos'  # beginning-of-sentence tag
FLD = 'xfld'  # data field tag

DATA_PATH = Path('raw/imdb/aclImdb/')
DATA_PATH.mkdir(exist_ok=True)
PATH = Path('resources/proc/imdb')
DATA_PROC_PATH = PATH / 'data'
DATA_LM_PATH = PATH / 'datalm'

LM_PATH = Path('resources/models')
LM_PATH.mkdir(exist_ok=True)
PRE_PATH = LM_PATH / 'wt103'
PRE_LM_PATH = PRE_PATH / 'fwd_wt103.h5'
CLASSES = ['neg', 'pos', 'unsup']


'''
    Model code
'''
class CustomEncoder(lm_rnn.MultiBatchRNN):
    @property
    def layers(self):
        # TODO: ADD ENCODERR!!!!!!!!!!
        return torch.nn.ModuleList([torch.nn.ModuleList([self.rnns[0], self.dropouths[0]]),
                                    torch.nn.ModuleList([self.rnns[1], self.dropouths[1]]),
                                    torch.nn.ModuleList([self.rnns[2], self.dropouths[2]])])


class TextClassifier(nn.Module):

    # @TODO: inject comments.
    def __init__(self,
                 _device: torch.device,
                 ntoken: int,
                 dps: list,
                 enc_wgts = None,
                 _debug=False):
        super(TextClassifier, self).__init__()

        self.device = _device

        # Load the pre-trained model
        args = {'ntoken': ntoken, 'emb_sz': 400, 'n_hid': 1150,
                'n_layers': 3, 'pad_token': 0, 'qrnn': False, 'bptt': 70, 'max_seq': 1400,
                'dropouti': dps[0], 'wdrop': dps[1], 'dropoute': dps[2], 'dropouth': dps[3]}
        self.encoder = CustomEncoder(**args).to(self.device)
        if enc_wgts:
            self.encoder.load_state_dict(enc_wgts)
        '''
            Make new classifier.
            
            Explanation:
                400*3 because input is [ h_T, maxpool, meanpool ]
                50 is hidden layer dim
                2 is n_classes

                0.4, 0.1 are drops at various layers
        '''
        self.linear = text.PoolingLinearClassifier(layers=[400 * 3, 50, 2], drops=[dps[4], 0.1]).to(self.device)
        self.encoder.reset()

    @property
    def layers(self):
        layers = [x for x in self.encoder.layers]
        layers += [x for x in self.linear.layers]
        return torch.nn.ModuleList(layers)

    @property
    def layers_rev(self):
        layers = [x for x in self.encoder.layers]
        layers += [x for x in self.linear.layers]
        layers.reverse()
        return torch.nn.ModuleList(layers)

    def forward(self, x):
        # inputs are S*B

        # Encoding all the data
        op_p = self.encoder(x.transpose(1, 0))
        # pos_batch = op_p[1][-1][-1]
        score = self.linear(op_p)[0]

        return score

    def predict(self, x):
        with torch.no_grad():
            self.eval()
            predicted = self.forward(x)
            self.train()
            return predicted


'''
    Prepare data
'''
re1 = re.compile(r'  +')


def fixup(x):
    x = x.replace('#39;', "'").replace('amp;', '&').replace('#146;', "'").replace(
        'nbsp;', ' ').replace('#36;', '$').replace('\\n', "\n").replace('quot;', "'").replace(
        '<br />', "\n").replace('\\"', '"').replace('<unk>', 'u_n').replace(' @.@ ', '.').replace(
        ' @-@ ', '-').replace('\\', ' \\ ')
    return re1.sub(' ', html.unescape(x))


def get_texts(df, n_lbls=1):
    labels = df.iloc[:, range(n_lbls)].values.astype(np.int64)
    texts = f'\n{BOS} {FLD} 1 ' + df.iloc[:, 1].astype(str)
    texts = list(texts.apply(fixup).values)

    tok = text.Tokenizer().proc_all_mp(core.partition_by_cores(texts))
    return tok, list(labels)


def get_all(df, n_lbls):
    tok, labels = get_texts(df)
    return tok, labels


def get_texts_org(path):
    texts, labels = [], []
    for idx, label in enumerate(CLASSES):
        for fname in (path / label).glob('*.*'):
            texts.append(fname.open('r', encoding='utf-8').read())
            labels.append(idx)
    return np.array(texts), np.array(labels)


def epoch_end_hook() -> None:
    lr_schedule.reset()


def eval(y_pred, y_true):
    """
        Expects a batch of input

        :param y_pred: tensor of shape (b, nc)
        :param y_true: tensor of shape (b, 1)
    """
    return torch.mean((torch.argmax(y_pred, dim=1) == y_true).float())

<torch._C.Generator at 0x7f292343b330>

'\n    Paths and macros\n'

'\n    Model code\n'

'\n    Prepare data\n'

In [4]:
trn_texts, trn_labels = get_texts_org(DATA_PATH / 'train')
val_texts, val_labels = get_texts_org(DATA_PATH / 'test')

# Lose label 2 from train
trn_texts = trn_texts[trn_labels<2]
trn_labels = trn_labels[trn_labels<2]

# Shuffle data
if QUICK:
    np.random.seed(42)
    trn_idx = np.random.permutation(len(trn_texts))[:1000]
    val_idx = np.random.permutation(len(val_texts))[:1000]
else:
    np.random.seed(42)
    trn_idx = np.random.permutation(len(trn_texts))
    val_idx = np.random.permutation(len(val_texts))

trn_texts, trn_labels = trn_texts[trn_idx], trn_labels[trn_idx]
val_texts, val_labels = val_texts[val_idx], val_labels[val_idx]
col_names = ['labels', 'text']

df_trn = pd.DataFrame({'text': trn_texts, 'labels': trn_labels}, columns=col_names)
df_val = pd.DataFrame({'text': val_texts, 'labels': val_labels}, columns=col_names)

itos_path = UNSUP_MODEL_DIR / 'itos.pkl'
itos2 = pickle.load(itos_path.open('rb'))
stoi2 = {v: k for k, v in enumerate(itos2)}

trn_clas, trn_labels = get_all(df_trn, 1)
val_clas, val_labels = get_all(df_val, 1)

trn_clas = np.array([[stoi2.get(w, 0) for w in para] for para in trn_clas])
val_clas = np.array([[stoi2.get(w, 0) for w in para] for para in val_clas])
trn_labels = [x for y in trn_labels for x in y]
val_labels = [x for y in val_labels for x in y]

'''
    Make model
'''
dps = list(params.encoder_dropouts)
# enc_wgts = torch.load(LM_PATH, map_location=lambda storage, loc: storage)
enc_wgts = torch.load(UNSUP_MODEL_DIR / 'unsup_model_enc'+MODEL_SUFFIX+'.torch', map_location=lambda storage, loc: storage)
clf = TextClassifier(device, len(itos2), dps, enc_wgts=enc_wgts if PRETRAINED else None)

'''
    Setup things for training (data, loss, opt, lr schedule etc
'''
bs = params.bs
loss_fn = torch.nn.CrossEntropyLoss()
opt_fn = partial(optim.Adam, betas=params.adam_betas)
opt = make_opt(clf, opt_fn, lr=0.001)
opt.param_groups[-1]['lr'] = 0.01

# Make data
data_fn = partial(mtdi.SortishSampler, _batchsize=bs, _padidx=1)
data = {'train': {'x': trn_clas, 'y': trn_labels}, 'valid': {'x': val_clas, 'y': val_labels}}

# Make lr scheduler
lr_args = {'iterations': len(data_fn(data['train'])), 'cycles': 1}
lr_schedule = mtlr.LearningRateScheduler(optimizer=opt, lr_args=lr_args, lr_iterator=mtlr.CosineAnnealingLR)

args = {'epochs': 1, 'data': data, 'device': device,
        'opt': opt, 'loss_fn': loss_fn, 'model': clf,
        'train_fn': clf, 'predict_fn': clf.predict,
        'epoch_end_hook': epoch_end_hook, 'weight_decay': params.weight_decay,
        'clip_grads_at': params.clip_grads_at, 'lr_schedule': lr_schedule,
        'data_fn': data_fn, 'eval_fn': eval}

'\n    Make model\n'

'\n    Setup things for training (data, loss, opt, lr schedule etc\n'

In [5]:
'''
    Training schedule:
    
    1. Unfreeze one layer. Train for 1 epoch
    2 - 5. Unfreeze one layer, train for 1 epoch
    3. Train for 15 epochs (after all layers are unfrozen). Use 15 cycles for cosine annealing.
'''
# opt.param_groups[-1]['lr'] = 0.01
traces = loops.generic_loop(**args)

opt.param_groups[-1]['lr'] = 0.01
opt.param_groups[-2]['lr'] = 0.005
lr_schedule = mtlr.LearningRateScheduler(optimizer=opt, lr_args=lr_args, lr_iterator=mtlr.CosineAnnealingLR)
args['lr_schedule'] = lr_schedule
traces_new = loops.generic_loop(**args)
traces = [a+b for a, b in zip(traces, traces_new)]

opt.param_groups[-1]['lr'] = 0.01
opt.param_groups[-2]['lr'] = 0.005
opt.param_groups[-3]['lr'] = 0.001
lr_schedule = mtlr.LearningRateScheduler(optimizer=opt, lr_args=lr_args, lr_iterator=mtlr.CosineAnnealingLR)
args['lr_schedule'] = lr_schedule
traces_new = loops.generic_loop(**args)
traces = [a+b for a, b in zip(traces, traces_new)]

opt.param_groups[-1]['lr'] = 0.01
opt.param_groups[-2]['lr'] = 0.005
opt.param_groups[-3]['lr'] = 0.001
opt.param_groups[-4]['lr'] = 0.001
lr_schedule = mtlr.LearningRateScheduler(optimizer=opt, lr_args=lr_args, lr_iterator=mtlr.CosineAnnealingLR)
args['lr_schedule'] = lr_schedule
traces_new = loops.generic_loop(**args)
traces = [a+b for a, b in zip(traces, traces_new)]

opt.param_groups[-1]['lr'] = 0.01
opt.param_groups[-2]['lr'] = 0.005
opt.param_groups[-3]['lr'] = 0.001
opt.param_groups[-4]['lr'] = 0.001
opt.param_groups[-5]['lr'] = 0.001
lr_schedule = mtlr.LearningRateScheduler(optimizer=opt, lr_args=lr_args, lr_iterator=mtlr.CosineAnnealingLR)
args['lr_schedule'] = lr_schedule
traces_new = loops.generic_loop(**args)
traces = [a+b for a, b in zip(traces, traces_new)]

opt.param_groups[-1]['lr'] = 0.01
opt.param_groups[-2]['lr'] = 0.005
opt.param_groups[-3]['lr'] = 0.001
opt.param_groups[-4]['lr'] = 0.001
opt.param_groups[-5]['lr'] = 0.001
lr_args['cycles'] = 15
args['epochs'] = 15
lr_schedule = mtlr.LearningRateScheduler(optimizer=opt, lr_args=lr_args, lr_iterator=mtlr.CosineAnnealingLR)
args['lr_schedule'] = lr_schedule
traces_new = loops.generic_loop(**args)
traces = [a+b for a, b in zip(traces, traces_new)]

'\n    Training schedule:\n    \n    1. Unfreeze one layer. Train for 1 epoch\n    2 - 5. Unfreeze one layer, train for 1 epoch\n    3. Train for 15 epochs (after all layers are unfrozen). Use 15 cycles for cosine annealing.\n'

100%|██████████| 1042/1042 [05:01<00:00,  3.43it/s]
100%|██████████| 1042/1042 [01:52<00:00,  9.28it/s]


Epoch: 000 | Loss: 0.31983 | Tr_c: 0.86428 | Vl_c: 0.91503 | Time: 5.050 min


100%|██████████| 1042/1042 [05:01<00:00,  3.78it/s]
100%|██████████| 1042/1042 [01:50<00:00,  9.42it/s]


Epoch: 000 | Loss: 0.23688 | Tr_c: 0.90811 | Vl_c: 0.91479 | Time: 5.049 min


100%|██████████| 1042/1042 [05:02<00:00,  3.19it/s]
100%|██████████| 1042/1042 [01:51<00:00,  7.57it/s]


Epoch: 000 | Loss: 0.23675 | Tr_c: 0.91001 | Vl_c: 0.91769 | Time: 5.066 min


 58%|█████▊    | 600/1042 [02:57<02:38,  2.79it/s]ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/data/priyansh/conda/anaconda3/envs/fastai/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2961, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-5-c285844546d7>", line 32, in <module>
    traces_new = loops.generic_loop(**args)
  File "/data/priyansh/lm-transferlearning/mytorch/loops.py", line 187, in generic_loop
    _x = torch.tensor(x, dtype=torch.long, device=device)
KeyboardInterrupt

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/data/priyansh/conda/anaconda3/envs/fastai/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 1863, in showtraceback
    stb = value._render_traceback_()
AttributeError: 'KeyboardInterrupt' object has no attribute '_render_traceback_'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/data/priyansh/conda/anaco

KeyboardInterrupt: 

In [None]:
mt_save(UNSUP_MODEL_DIR,
            torch_stuff=[tosave('sup_model.torch', clf.state_dict())],
            pickle_stuff=[tosave('final_sup_traces.pkl', traces), tosave('unsup_options.pkl', params)])

In [None]:
UNSUP_MODEL_DIR

In [None]:
from matplotlib import pyplot as plt
def plot(trcs):
    layers = len(trcs[0])
    for l in range(layers):
        plt.plot(trcs[:,l], label=f"layer {l}")
    plt.show()
    
plot(np.asarray(traces[-1][100:]))

In [None]:
plot(np.asarray([[x[-1]] for x in traces[-1][:]]))

In [None]:
print(lr_args)
lr_args['cycles'] = 5
lr_args['iterations'] = 42*15
lr_schedule = mtlr.LearningRateScheduler(optimizer=opt, lr_args=lr_args, lr_iterator=mtlr.CosineAnnealingLR)
lrs = []
while True:
    try:
        lrs.append(lr_schedule.get())
    except CustomError:
        break
plot(np.asarray(lrs))

In [None]:
from matplotlib import pyplot as plt
from matplotlib import style as pltstyle
%pylab inline
pylab.rcParams['figure.figsize'] = (16, 8)

def plot_accs(tra, vla, style=None):
    pltstyle.use(style if style else 'seaborn-deep')
    fig = plt.figure(figsize = (16,8))
    ax = fig.add_axes((0.1, 0.2, 0.8, 0.7))
    ax.spines['right'].set_color('none')
    ax.spines['top'].set_color('none')
#     plt.xticks([])
#     plt.yticks([])
    plt.plot(tra, label=f"Train Acc", linewidth=3)
    plt.plot(vla, label=f"Valid Acc", linewidth=3)
    plt.legend()
    plt.show()
    
def plot(trcs):
    layers = len(trcs[0])
    for l in range(layers):
        plt.plot(trcs[:,l], label=f"layer {l}")
    plt.show()
    
plot_accs(traces[0], traces[2])