In [5]:
import torch
import torch.nn as nn
from torch import optim
import time, random
import os
from tqdm import tqdm
from lstm import LSTMSentiment
from bilstm import BiLSTMSentiment
from torchtext import data
import numpy as np
import argparse
from sklearn.metrics import accuracy_score,f1_score

torch.set_num_threads(8)
torch.manual_seed(1)
random.seed(1)





def get_accuracy(truth, pred):
    assert len(truth) == len(pred)
    return accuracy_score(truth,pred)

def get_f1(truth,pred):
    assert len(truth) == len(pred)
    return f1_score(truth,pred,average='weighted')

def train_epoch_progress(model, train_iter, loss_function, optimizer, text_field, label_field, epoch):
    model.train()
    avg_loss = 0.0
    truth_res = []
    pred_res = []
    count = 0
    for batch in tqdm(train_iter, desc='Train epoch '+str(epoch+1)):
        sent, label = batch.text, batch.label
        label.data.sub_(1)
        truth_res += list(label.data)
        model.batch_size = len(label.data)
        model.hidden = model.init_hidden()
        pred = model(sent)
        pred_label = pred.data.max(1)[1].numpy()
        pred_res += [x for x in pred_label]
        model.zero_grad()
        loss = loss_function(pred, label)
        avg_loss += loss.data[0]
        count += 1
        loss.backward()
        optimizer.step()
    avg_loss /= len(train_iter)
    acc = get_accuracy(truth_res, pred_res)
    f1 = get_f1(truth_res, pred_res)
    return avg_loss, acc,f1


def train_epoch(model, train_iter, loss_function, optimizer):
    model.train()
    avg_loss = 0.0
    truth_res = []
    pred_res = []
    count = 0
    for batch in train_iter:
        sent, label = batch.text, batch.label
        label.data.sub_(1)
        truth_res += list(label.data)
        model.batch_size = len(label.data)
        model.hidden = model.init_hidden()
        pred = model(sent)
        pred_label = pred.data.max(1)[1].numpy()
        pred_res += [x for x in pred_label]
        model.zero_grad()
        loss = loss_function(pred, label)
        avg_loss += loss.data[0]
        count += 1
        loss.backward()
        optimizer.step()
    avg_loss /= len(train_iter)
    acc = get_accuracy(truth_res, pred_res)
    f1 = get_f1(truth_res, pred_res)
    return avg_loss, acc,f1


def evaluate(model, data, loss_function, name):
    model.eval()
    avg_loss = 0.0
    truth_res = []
    pred_res = []
    for batch in data:
        sent, label = batch.text, batch.label
        label.data.sub_(1)
        truth_res += list(label.data)
        model.batch_size = len(label.data)
        model.hidden = model.init_hidden()
        pred = model(sent)
        pred_label = pred.data.max(1)[1].numpy()
        pred_res += [x for x in pred_label]
        loss = loss_function(pred, label)
        avg_loss += loss.data[0]
    avg_loss /= len(data)
    acc = get_accuracy(truth_res, pred_res)
    f1 = get_f1(truth_res, pred_res)
    print(name + ': loss %.2f acc %.1f f1 %.2f' % (avg_loss, acc*100,f1))
    return acc,f1


def load_sst(text_field, label_field, batch_size):
    train, dev, test = data.TabularDataset.splits(path='./data/SST2/', train='train.csv',
                                                  validation='dev.csv', test='test.csv', format='csv',
                                                  fields=[('text', text_field), ('label', label_field)])
    text_field.build_vocab(train, dev, test)
    label_field.build_vocab(train, dev, test)
    train_iter, dev_iter, test_iter = data.BucketIterator.splits((train, dev, test),
                batch_sizes=(batch_size, len(dev), len(test)), sort_key=lambda x: len(x.text), repeat=False, device=-1)
    return train_iter, dev_iter, test_iter


# def adjust_learning_rate(learning_rate, optimizer, epoch):
#     lr = learning_rate * (0.1 ** (epoch // 10))
#     for param_group in optimizer.param_groups:
#         param_group['lr'] = lr
#     return optimizer






In [6]:
EPOCHS = 10
USE_GPU = torch.cuda.is_available()
EMBEDDING_DIM = 300
HIDDEN_DIM = 150

BATCH_SIZE = 5
timestamp = str(int(time.time()))
best_dev_acc = 0.0


text_field = data.Field(lower=True)
label_field = data.Field(sequential=False)
train_iter, dev_iter, test_iter = load_sst(text_field, label_field, BATCH_SIZE)



In [7]:
vars(label_field.vocab)

{'freqs': Counter({'0': 19347,
          '1': 9808,
          '2': 730,
          '3': 153,
          '4': 906,
          '5': 1187,
          'label': 3}),
 'itos': ['<unk>', '0', '1', '5', '4', '2', '3', 'label'],
 'stoi': defaultdict(<function torchtext.vocab._default_unk_index>,
             {'0': 1,
              '1': 2,
              '2': 5,
              '3': 6,
              '4': 4,
              '5': 3,
              '<unk>': 0,
              'label': 7}),
 'vectors': None}

In [None]:
#load dictionary
import pickle
emoji_dict =pickle.load(open('./data/SST2/emoji_dict.p','rb'))
print("emoji dictionary load successfully")
# load embedding
word_to_idx = text_field.vocab.stoi
pretrained_embeddings = np.random.uniform(-0.25, 0.25, (len(text_field.vocab), 300))
pretrained_embeddings[0] = 0
#word2vec = load_bin_vec('./data/GoogleNews-vectors-negative300.bin', word_to_idx)
from gensim.models.keyedvectors import KeyedVectors
emoji2vec =KeyedVectors.load_word2vec_format('./embedding/emoji2vec.bin', binary=True)

for word in emoji2vec.vocab:
    s = word.encode('unicode-escape').decode('ASCII')
    if s in emoji_dict.keys():
        print(s)
        pretrained_embeddings[word_to_idx[emoji_dict[s]]-1] = emoji2vec[word]
print("emoji2vec load successfully")




word2vec= KeyedVectors.load_word2vec_format('./embedding/GoogleNews-vectors-negative300.bin', binary=True)
for word in word2vec.vocab:
    pretrained_embeddings[word_to_idx[word]-1] = word2vec[word]
print("word2vec load successfully")


emoticon2vec = KeyedVectors.load_word2vec_format('./embedding/emoticon2vec.txt', binary=False)

for word in emoticon2vec.vocab:
    pretrained_embeddings[word_to_idx[word]-1] = emoticon2vec[word]
print("emoticon2vec load successfully")    
print('Loading complete')

emoji dictionary load successfully
\U0001f454
\U0001f300
\U0001f6be
\U0001f479
\U0001f6bb
\U0001f46c
\U0001f3a7
\U0001f43d
\U0001f69c
\u264b
\U0001f4c5
\U0001f488
\U0001f378
\U0001f937
\U0001f302
\U0001f693
\U0001f364
\U0001f498
\U0001f694
\U0001f45a
\U0001f427
\U0001f365
\U0001f375
\U0001f453
\u26d4
\U0001f615
\U0001f38e
\U0001f3ca\U0001f3fb
\u2757
\U0001f4ad
\U0001f4ac
\u2696
\U0001f1ee\U0001f1e9
\U0001f621
\U0001f1e8\U0001f1ed
\U0001f366
\U0001f310
\U0001f3a1
\U0001f513
\U0001f3bb
\U0001f639
\U0001f47d
\U0001f31d
\U0001f1ed\U0001f1fa
\U0001f406
\U0001f494
\U0001f447\U0001f3fd
\U0001f346
\U0001f416
\U0001f62e
\u231a
\U0001f47b
\U0001f5fb
\U0001f1e8\U0001f1fa
\U0001f61f
\U0001f4a3
\U0001f481
\U0001f980
\U0001f3a9
\U0001f449\U0001f3fe
\U0001f64a
\U0001f35a
\U0001f359
\U0001f6af
\U0001f1eb\U0001f1f7
\U0001f478\U0001f3fc
\U0001f63f
\U0001f632
\U0001f6ac
\U0001f485
\U0001f3c0
\U0001f377
\U0001f648
\U0001f46d
\U0001f36f
\U0001f3f0
\U0001f5a8
\U0001f368
\U0001f197
\U0001f629
\U0001f389
\U00

In [8]:
model = LSTMSentiment(embedding_dim=EMBEDDING_DIM, hidden_dim=HIDDEN_DIM, vocab_size=len(text_field.vocab), label_size=len(label_field.vocab)-1,\
                          use_gpu=USE_GPU, batch_size=BATCH_SIZE)


In [6]:
model.embeddings.weight.data.copy_(torch.from_numpy(pretrained_embeddings))




 0.0000  0.0000  0.0000  ...   0.0000  0.0000  0.0000
-0.1564  0.1313 -0.0573  ...   0.2062 -0.0458  0.1574
 0.0801  0.1050  0.0498  ...   0.0037  0.0476 -0.0688
          ...             ⋱             ...          
 0.1075  0.0449  0.2172  ...  -0.1078  0.1145  0.2146
-0.2470 -0.1418 -0.0502  ...  -0.0088 -0.0527 -0.1548
-0.0400  0.0009 -0.0988  ...   0.0423 -0.0371 -0.0146
[torch.FloatTensor of size 51322x300]

In [7]:
#define optimizer and loss function
best_model = model
optimizer = optim.Adam(model.parameters(), lr=1e-3)
loss_function = nn.NLLLoss()



In [8]:
print('Training...')
out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp))
print("Writing to {}\n".format(out_dir))
if not os.path.exists(out_dir):
    os.makedirs(out_dir)
for epoch in range(EPOCHS):
    avg_loss, acc,f1 = train_epoch_progress(model, train_iter, loss_function, optimizer, text_field, label_field, epoch)
    tqdm.write('Train: loss %.2f acc %.1f f1 %.3f' % (avg_loss, acc*100,f1))
    dev_acc , dev_f1= evaluate(model, dev_iter, loss_function, 'Dev')
    if dev_acc > best_dev_acc:
        if best_dev_acc > 0:
            os.system('rm '+ out_dir + '/best_model' + '.pth')
        best_dev_acc = dev_acc
        best_model = model
        torch.save(best_model.state_dict(), out_dir + '/best_model' + '.pth')
        # evaluate on test with the best dev performance model
        test_acc,test_f1 = evaluate(best_model, test_iter, loss_function, 'Test')
test_acc,test_f1 = evaluate(best_model, test_iter, loss_function, 'Final Test')




Train epoch 1:   0%|          | 0/4049 [00:00<?, ?it/s]

Training...
Writing to /Users/nihaozheng/Desktop/NLP/project/model/pytorch-sentiment-classification-master/runs/1523664846



  log_probs = F.log_softmax(y)
Train epoch 1: 100%|██████████| 4049/4049 [1:10:20<00:00,  1.04s/it]
  'precision', 'predicted', average, warn_for)


Train: loss 0.84 acc 67.0 f1 0.636




Dev: loss 1.19 acc 42.7 f1 0.37


Train epoch 2:   0%|          | 0/4049 [00:00<?, ?it/s]

Test: loss 1.24 acc 33.7 f1 0.21


Train epoch 2: 100%|██████████| 4049/4049 [1:35:40<00:00,  1.42s/it]


Train: loss 0.41 acc 86.0 f1 0.855
Dev: loss 0.92 acc 59.3 f1 0.58


Train epoch 3:   0%|          | 0/4049 [00:00<?, ?it/s]

Test: loss 1.02 acc 50.5 f1 0.48


Train epoch 3: 100%|██████████| 4049/4049 [1:23:58<00:00,  1.24s/it]


Train: loss 0.21 acc 92.3 f1 0.922
Dev: loss 0.78 acc 69.0 f1 0.69


Train epoch 4:   0%|          | 0/4049 [00:00<?, ?it/s]

Test: loss 0.80 acc 66.0 f1 0.66


Train epoch 4: 100%|██████████| 4049/4049 [1:03:23<00:00,  1.06it/s]


Train: loss 0.13 acc 94.2 f1 0.942
Dev: loss 0.79 acc 71.1 f1 0.72


Train epoch 5:   0%|          | 0/4049 [00:00<?, ?it/s]

Test: loss 0.84 acc 68.2 f1 0.69


Train epoch 5: 100%|██████████| 4049/4049 [56:04<00:00,  1.20it/s]


Train: loss 0.11 acc 94.8 f1 0.948


Train epoch 6:   0%|          | 0/4049 [00:00<?, ?it/s]

Dev: loss 1.17 acc 62.5 f1 0.67


Train epoch 6: 100%|██████████| 4049/4049 [3:25:20<00:00,  3.04s/it]  


Train: loss 0.09 acc 95.0 f1 0.950
Dev: loss 0.93 acc 72.2 f1 0.72


Train epoch 7:   0%|          | 0/4049 [00:00<?, ?it/s]

Test: loss 0.98 acc 69.4 f1 0.70


Train epoch 7: 100%|██████████| 4049/4049 [2:59:05<00:00,  2.65s/it]  


Train: loss 0.08 acc 95.2 f1 0.952


Train epoch 8:   0%|          | 0/4049 [00:00<?, ?it/s]

Dev: loss 1.06 acc 70.0 f1 0.71


Train epoch 8: 100%|██████████| 4049/4049 [54:36<00:00,  1.24it/s]


Train: loss 0.08 acc 95.5 f1 0.955


Train epoch 9:   0%|          | 0/4049 [00:00<?, ?it/s]

Dev: loss 1.16 acc 67.7 f1 0.69


Train epoch 9: 100%|██████████| 4049/4049 [3:15:39<00:00,  2.90s/it]  


Train: loss 0.08 acc 95.2 f1 0.952


Train epoch 10:   0%|          | 0/4049 [00:00<?, ?it/s]

Dev: loss 1.53 acc 57.4 f1 0.62


Train epoch 10: 100%|██████████| 4049/4049 [1:13:20<00:00,  1.09s/it]


Train: loss 0.07 acc 95.6 f1 0.955
Dev: loss 1.09 acc 70.2 f1 0.71
Final Test: loss 1.15 acc 66.8 f1 0.69


# try different dim of hidden layer (after load dictionary)

In [9]:
EPOCHS = 10
USE_GPU = torch.cuda.is_available()
EMBEDDING_DIM = 300
HIDDEN_DIM = 500

BATCH_SIZE = 5
timestamp = str(int(time.time()))
best_dev_acc = 0.0


text_field = data.Field(lower=True)
label_field = data.Field(sequential=False)
train_iter, dev_iter, test_iter = load_sst(text_field, label_field, BATCH_SIZE)




In [10]:
model = LSTMSentiment(embedding_dim=EMBEDDING_DIM, hidden_dim=HIDDEN_DIM, vocab_size=len(text_field.vocab), label_size=len(label_field.vocab)-1,\
                          use_gpu=USE_GPU, batch_size=BATCH_SIZE)



In [11]:
model.embeddings.weight.data.copy_(torch.from_numpy(pretrained_embeddings))





 0.0000  0.0000  0.0000  ...   0.0000  0.0000  0.0000
-0.1564  0.1313 -0.0573  ...   0.2062 -0.0458  0.1574
 0.0801  0.1050  0.0498  ...   0.0037  0.0476 -0.0688
          ...             ⋱             ...          
 0.1075  0.0449  0.2172  ...  -0.1078  0.1145  0.2146
-0.2470 -0.1418 -0.0502  ...  -0.0088 -0.0527 -0.1548
-0.0400  0.0009 -0.0988  ...   0.0423 -0.0371 -0.0146
[torch.FloatTensor of size 51322x300]

In [12]:
#define optimizer and loss function
best_model = model
optimizer = optim.Adam(model.parameters(), lr=1e-3)
loss_function = nn.NLLLoss()




In [13]:
print('Training...')
out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp))
print("Writing to {}\n".format(out_dir))
if not os.path.exists(out_dir):
    os.makedirs(out_dir)
for epoch in range(EPOCHS):
    avg_loss, acc,f1 = train_epoch_progress(model, train_iter, loss_function, optimizer, text_field, label_field, epoch)
    tqdm.write('Train: loss %.2f acc %.1f f1 %.3f' % (avg_loss, acc*100,f1))
    dev_acc , dev_f1= evaluate(model, dev_iter, loss_function, 'Dev')
    if dev_acc > best_dev_acc:
        if best_dev_acc > 0:
            os.system('rm '+ out_dir + '/best_model' + '.pth')
        best_dev_acc = dev_acc
        best_model = model
        torch.save(best_model.state_dict(), out_dir + '/best_model' + '.pth')
        # evaluate on test with the best dev performance model
        test_acc,test_f1 = evaluate(best_model, test_iter, loss_function, 'Test')
test_acc,test_f1 = evaluate(best_model, test_iter, loss_function, 'Final Test')





Train epoch 1:   0%|          | 0/4049 [00:00<?, ?it/s]

Training...
Writing to /Users/nihaozheng/Desktop/NLP/project/model/pytorch-sentiment-classification-master/runs/1523730993



  log_probs = F.log_softmax(y)
Train epoch 1: 100%|██████████| 4049/4049 [42:42<00:00,  1.58it/s]
  'precision', 'predicted', average, warn_for)


Train: loss 0.89 acc 65.0 f1 0.605




Dev: loss 1.15 acc 65.0 f1 0.56


Train epoch 2:   0%|          | 0/4049 [00:00<?, ?it/s]

Test: loss 1.20 acc 62.5 f1 0.50


Train epoch 2: 100%|██████████| 4049/4049 [1:14:23<00:00,  1.10s/it]


Train: loss 0.48 acc 83.4 f1 0.822
Dev: loss 0.87 acc 67.3 f1 0.66


Train epoch 3:   0%|          | 0/4049 [00:00<?, ?it/s]

Test: loss 0.94 acc 64.7 f1 0.63


Train epoch 3: 100%|██████████| 4049/4049 [1:34:31<00:00,  1.40s/it]


Train: loss 0.25 acc 91.2 f1 0.910
Dev: loss 0.72 acc 74.5 f1 0.73


Train epoch 4:   0%|          | 0/4049 [00:00<?, ?it/s]

Test: loss 0.74 acc 72.8 f1 0.70


Train epoch 4: 100%|██████████| 4049/4049 [2:34:58<00:00,  2.30s/it]  


Train: loss 0.16 acc 93.6 f1 0.936


Train epoch 5:   0%|          | 0/4049 [00:00<?, ?it/s]

Dev: loss 1.36 acc 52.9 f1 0.61


Train epoch 5: 100%|██████████| 4049/4049 [2:14:51<00:00,  2.00s/it]  


Train: loss 0.12 acc 94.3 f1 0.943


Train epoch 6:   0%|          | 0/4049 [00:00<?, ?it/s]

Dev: loss 1.01 acc 65.8 f1 0.69


Train epoch 6: 100%|██████████| 4049/4049 [2:55:46<00:00,  2.60s/it]


Train: loss 0.10 acc 94.9 f1 0.949


Train epoch 7:   0%|          | 0/4049 [00:00<?, ?it/s]

Dev: loss 1.14 acc 65.8 f1 0.69


Train epoch 7: 100%|██████████| 4049/4049 [2:56:47<00:00,  2.62s/it]  


Train: loss 0.09 acc 95.1 f1 0.951


Train epoch 8:   0%|          | 0/4049 [00:00<?, ?it/s]

Dev: loss 0.89 acc 73.9 f1 0.73


Train epoch 8: 100%|██████████| 4049/4049 [3:59:37<00:00,  3.55s/it]  


Train: loss 0.08 acc 95.4 f1 0.954


Train epoch 9:   0%|          | 0/4049 [00:00<?, ?it/s]

Dev: loss 1.02 acc 68.8 f1 0.71


Train epoch 9: 100%|██████████| 4049/4049 [2:28:48<00:00,  2.21s/it]  


Train: loss 0.08 acc 95.4 f1 0.954


Train epoch 10:   0%|          | 0/4049 [00:00<?, ?it/s]

Dev: loss 0.93 acc 70.4 f1 0.70


Train epoch 10: 100%|██████████| 4049/4049 [2:10:45<00:00,  1.94s/it]  


Train: loss 0.08 acc 95.5 f1 0.955
Dev: loss 0.87 acc 72.7 f1 0.73
Final Test: loss 0.89 acc 71.6 f1 0.72


# try L2 penalty on 500 hidden

In [5]:
EPOCHS = 10
USE_GPU = torch.cuda.is_available()
EMBEDDING_DIM = 300
HIDDEN_DIM = 500

BATCH_SIZE = 5
timestamp = str(int(time.time()))
best_dev_acc = 0.0


text_field = data.Field(lower=True)
label_field = data.Field(sequential=False)
train_iter, dev_iter, test_iter = load_sst(text_field, label_field, BATCH_SIZE)





In [6]:
model = LSTMSentiment(embedding_dim=EMBEDDING_DIM, hidden_dim=HIDDEN_DIM, vocab_size=len(text_field.vocab), label_size=len(label_field.vocab)-1,\
                          use_gpu=USE_GPU, batch_size=BATCH_SIZE)




In [7]:
model.embeddings.weight.data.copy_(torch.from_numpy(pretrained_embeddings))






 0.0000  0.0000  0.0000  ...   0.0000  0.0000  0.0000
 0.0452  0.0593  0.0739  ...  -0.0481 -0.1746  0.1170
 0.0801  0.1050  0.0498  ...   0.0037  0.0476 -0.0688
          ...             ⋱             ...          
-0.2312  0.0737 -0.1603  ...   0.2040  0.1060  0.1579
 0.0512 -0.1998 -0.0526  ...   0.1661  0.1353  0.1909
-0.0400  0.0009 -0.0988  ...   0.0423 -0.0371 -0.0146
[torch.FloatTensor of size 51322x300]

In [8]:
#define optimizer and loss function
# with smaller lr and add regularizaition
best_model = model
optimizer = optim.Adam(model.parameters(), lr=1e-4,weight_decay=1e-5)
loss_function = nn.NLLLoss()





In [9]:
print('Training...')
out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp))
print("Writing to {}\n".format(out_dir))
if not os.path.exists(out_dir):
    os.makedirs(out_dir)
for epoch in range(EPOCHS):
    avg_loss, acc,f1 = train_epoch_progress(model, train_iter, loss_function, optimizer, text_field, label_field, epoch)
    tqdm.write('Train: loss %.2f acc %.1f f1 %.3f' % (avg_loss, acc*100,f1))
    dev_acc , dev_f1= evaluate(model, dev_iter, loss_function, 'Dev')
    if dev_acc > best_dev_acc:
        if best_dev_acc > 0:
            os.system('rm '+ out_dir + '/best_model' + '.pth')
        best_dev_acc = dev_acc
        best_model = model
        torch.save(best_model.state_dict(), out_dir + '/best_model' + '.pth')
        # evaluate on test with the best dev performance model
        test_acc,test_f1 = evaluate(best_model, test_iter, loss_function, 'Test')
test_acc,test_f1 = evaluate(best_model, test_iter, loss_function, 'Final Test')






Train epoch 1:   0%|          | 0/4049 [00:00<?, ?it/s]

Training...
Writing to /Users/nihaozheng/Desktop/NLP/project/model/pytorch-sentiment-classification-master/runs/1523824793



  log_probs = F.log_softmax(y)
Train epoch 1: 100%|██████████| 4049/4049 [46:34<00:00,  1.45it/s]
  'precision', 'predicted', average, warn_for)


Train: loss 0.97 acc 61.7 f1 0.536




Dev: loss 1.16 acc 30.6 f1 0.15


Train epoch 2:   0%|          | 0/4049 [00:00<?, ?it/s]

Test: loss 1.16 acc 30.6 f1 0.14


Train epoch 2: 100%|██████████| 4049/4049 [54:01<00:00,  1.25it/s]


Train: loss 0.76 acc 71.4 f1 0.685


Train epoch 3:   0%|          | 0/4049 [00:00<?, ?it/s]

Dev: loss 1.30 acc 30.6 f1 0.14


Train epoch 3: 100%|██████████| 4049/4049 [1:25:04<00:00,  1.26s/it]


Train: loss 0.58 acc 79.2 f1 0.768
Dev: loss 1.04 acc 60.2 f1 0.46


Train epoch 4:   0%|          | 0/4049 [00:00<?, ?it/s]

Test: loss 1.05 acc 60.2 f1 0.45


Train epoch 4: 100%|██████████| 4049/4049 [1:09:21<00:00,  1.03s/it]


Train: loss 0.44 acc 84.5 f1 0.829


Train epoch 5:   0%|          | 0/4049 [00:00<?, ?it/s]

Dev: loss 1.88 acc 31.1 f1 0.15


Train epoch 5: 100%|██████████| 4049/4049 [1:10:26<00:00,  1.04s/it]


Train: loss 0.35 acc 87.9 f1 0.871
Dev: loss 0.99 acc 69.6 f1 0.67


Train epoch 6:   0%|          | 0/4049 [00:00<?, ?it/s]

Test: loss 0.97 acc 69.3 f1 0.65


Train epoch 6: 100%|██████████| 4049/4049 [1:09:28<00:00,  1.03s/it]


Train: loss 0.29 acc 90.0 f1 0.895


Train epoch 7:   0%|          | 0/4049 [00:00<?, ?it/s]

Dev: loss 1.69 acc 38.4 f1 0.29


Train epoch 7: 100%|██████████| 4049/4049 [1:07:21<00:00,  1.00it/s]


Train: loss 0.25 acc 91.1 f1 0.908


Train epoch 8:   0%|          | 0/4049 [00:00<?, ?it/s]

Dev: loss 0.93 acc 65.4 f1 0.65


Train epoch 8: 100%|██████████| 4049/4049 [2:39:09<00:00,  2.36s/it]  


Train: loss 0.22 acc 92.0 f1 0.918


Train epoch 9:   0%|          | 0/4049 [00:00<?, ?it/s]

Dev: loss 0.91 acc 62.2 f1 0.61


Train epoch 9: 100%|██████████| 4049/4049 [3:30:42<00:00,  3.12s/it]  


Train: loss 0.19 acc 92.6 f1 0.925
Dev: loss 1.13 acc 71.9 f1 0.70


Train epoch 10:   0%|          | 0/4049 [00:00<?, ?it/s]

Test: loss 1.22 acc 70.4 f1 0.69


Train epoch 10: 100%|██████████| 4049/4049 [1:22:15<00:00,  1.22s/it]


Train: loss 0.18 acc 93.3 f1 0.932
Dev: loss 1.69 acc 52.8 f1 0.51
Final Test: loss 1.91 acc 45.8 f1 0.43


# no emoji embedding

In [10]:
#load dictionary
import pickle
emoji_dict =pickle.load(open('./data/SST2/emoji_dict.p','rb'))
print("emoji dictionary load successfully")
# load embedding
word_to_idx = text_field.vocab.stoi
pretrained_embeddings = np.random.uniform(-0.25, 0.25, (len(text_field.vocab), 300))
pretrained_embeddings[0] = 0
#word2vec = load_bin_vec('./data/GoogleNews-vectors-negative300.bin', word_to_idx)
from gensim.models.keyedvectors import KeyedVectors
word2vec= KeyedVectors.load_word2vec_format('./embedding/GoogleNews-vectors-negative300.bin', binary=True)
for word in word2vec.vocab:
    pretrained_embeddings[word_to_idx[word]-1] = word2vec[word]
print("word2vec load successfully")
emoji2vec =KeyedVectors.load_word2vec_format('./embedding/emoji2vec.bin', binary=True)
for word in emoji2vec.vocab:
    if word in emoji_dict.keys():
        pretrained_embeddings[word_to_idx[emoji_dict[word]]-1] = np.zeros(300)
print("emoji2vec set to 0 successfully")
emoticon2vec = KeyedVectors.load_word2vec_format('./embedding/emoticon2vec.txt', binary=False)



for word in emoticon2vec.vocab:
    pretrained_embeddings[word_to_idx[word]-1] = emoticon2vec[word]
print("emoticon2vec load successfully")    
print('Loading complete')

emoji dictionary load successfully
word2vec load successfully
emoji2vec set to 0 successfully
emoticon2vec load successfully
Loading complete


In [11]:
EPOCHS = 10
USE_GPU = torch.cuda.is_available()
EMBEDDING_DIM = 300
HIDDEN_DIM = 500

BATCH_SIZE = 5
timestamp = str(int(time.time()))
best_dev_acc = 0.0


text_field = data.Field(lower=True)
label_field = data.Field(sequential=False)
train_iter, dev_iter, test_iter = load_sst(text_field, label_field, BATCH_SIZE)






In [12]:
model = LSTMSentiment(embedding_dim=EMBEDDING_DIM, hidden_dim=HIDDEN_DIM, vocab_size=len(text_field.vocab), label_size=len(label_field.vocab)-1,\
                          use_gpu=USE_GPU, batch_size=BATCH_SIZE)



In [13]:
model.embeddings.weight.data.copy_(torch.from_numpy(pretrained_embeddings))







 0.0000  0.0000  0.0000  ...   0.0000  0.0000  0.0000
 0.1256 -0.1908  0.0809  ...  -0.2132 -0.2020 -0.0540
 0.0801  0.1050  0.0498  ...   0.0037  0.0476 -0.0688
          ...             ⋱             ...          
 0.0373 -0.0412  0.0839  ...  -0.0790 -0.2086 -0.1758
 0.2105 -0.0310 -0.2465  ...   0.0832  0.2307 -0.0782
-0.0400  0.0009 -0.0988  ...   0.0423 -0.0371 -0.0146
[torch.FloatTensor of size 51322x300]

In [15]:
#define optimizer and loss function
best_model = model
optimizer = optim.Adam(model.parameters(), lr=1e-3)
loss_function = nn.NLLLoss()





In [16]:
print('Training...')
out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp))
print("Writing to {}\n".format(out_dir))
if not os.path.exists(out_dir):
    os.makedirs(out_dir)
for epoch in range(EPOCHS):
    avg_loss, acc,f1 = train_epoch_progress(model, train_iter, loss_function, optimizer, text_field, label_field, epoch)
    tqdm.write('Train: loss %.2f acc %.1f f1 %.3f' % (avg_loss, acc*100,f1))
    dev_acc , dev_f1= evaluate(model, dev_iter, loss_function, 'Dev')
    if dev_acc > best_dev_acc:
        if best_dev_acc > 0:
            os.system('rm '+ out_dir + '/best_model' + '.pth')
        best_dev_acc = dev_acc
        best_model = model
        torch.save(best_model.state_dict(), out_dir + '/best_model' + '.pth')
        # evaluate on test with the best dev performance model
        test_acc,test_f1 = evaluate(best_model, test_iter, loss_function, 'Test')
test_acc,test_f1 = evaluate(best_model, test_iter, loss_function, 'Final Test')







Train epoch 1:   0%|          | 0/4049 [00:00<?, ?it/s]

Training...
Writing to /Users/nihaozheng/Desktop/NLP/project/model/pytorch-sentiment-classification-master/runs/1523887552



  log_probs = F.log_softmax(y)
Train epoch 1: 100%|██████████| 4049/4049 [50:00<00:00,  1.35it/s]
  'precision', 'predicted', average, warn_for)


Train: loss 0.87 acc 66.4 f1 0.622




Dev: loss 1.28 acc 39.2 f1 0.31


Train epoch 2:   0%|          | 0/4049 [00:00<?, ?it/s]

Test: loss 1.38 acc 33.7 f1 0.21


Train epoch 2:   1%|          | 36/4049 [00:39<1:13:00,  1.09s/it]

KeyboardInterrupt: 

In [1]:
!ls

BiLSTM.ipynb   [1m[31mREADME.md[m[m      [1m[36mdata[m[m           [1m[31mlstm.py[m[m
LSTM.ipynb     [1m[36m__pycache__[m[m    [1m[31mdata_helper.py[m[m [1m[36mruns[m[m
Other ML.ipynb [1m[31mbilstm.py[m[m      [1m[36membedding[m[m      [1m[31mtrain_batch.py[m[m


In [2]:
! python train_batch.py 

Traceback (most recent call last):
  File "train_batch.py", line 160, in <module>
    train_iter, dev_iter, test_iter = load_sst(text_field, label_field, BATCH_SIZE)
  File "train_batch.py", line 131, in load_sst
    label_field.build_vocab(train, dev, test)
  File "/Users/nihaozheng/anaconda/lib/python3.6/site-packages/torchtext/data/field.py", line 248, in build_vocab
    for x in data:
  File "/Users/nihaozheng/anaconda/lib/python3.6/site-packages/torchtext/data/dataset.py", line 96, in __getattr__
    yield getattr(x, attr)
AttributeError: 'Example' object has no attribute 'label'
