In [1]:
import pandas as pd
import numpy as np
import math
import seaborn as sns
import matplotlib.pyplot as plt
from tqdm import tqdm
# from sklearn.model_selection import train_test_split
from transformers import BertTokenizer
from transformers import AutoModel
from datasets import ClassLabel
import torch
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from transformers import BertForSequenceClassification
from torch.optim import AdamW
from transformers import get_linear_schedule_with_warmup
from torch.nn.utils import clip_grad_norm_
from nltk.corpus import stopwords
import nltk

In [2]:
stop_words = stopwords.words('english')

Read in data...

In [3]:
df = pd.read_csv('big_df.csv').drop(columns = ['Unnamed: 0'])

  exec(code_obj, self.user_global_ns, self.user_ns)


In [4]:
df_images = pd.read_csv('big_df_image_descriptions.csv').drop(columns = ['Unnamed: 0'])

In [5]:
print(set(df_images.Pokemon).difference(set(df.Pokemon)))
df_images = df_images.replace({'Flabe╠übe╠ü': 'Flabébé',
                   'NidoranΓÖÇ': 'Nidoran♀',
                   'NidoranΓÖé':'Nidoran♂' })
print(set(df_images.Pokemon).difference(set(df.Pokemon)))

{'NidoranΓÖÇ', 'NidoranΓÖé', 'Flabe╠übe╠ü'}
set()


Before concatting, trim the image descriptions for phrases resembling 'a cartoon character...'

In [6]:
def clean_phrase(text):
    for phrase in [
    'a cartoon character',
    'character',
    'cartoon',
    'paper cut out',
    'stylized image',
    'image',
    'cute',
    'very cute',
    'drawing',
    'animated',
    'sketch',
    'toon',
    'object',
    'information technology',
    'pikachu',
    'picture']:
        if phrase in text:
            text = text.replace(phrase,"")
    return text

df_images.value = df_images.value.apply(lambda x: clean_phrase(x))

In [7]:
phys = pd.read_csv('big_df_physiology.csv').drop(columns = ['Unnamed: 0'])
behavior = pd.read_csv('big_df_behavior.csv').drop(columns = ['Unnamed: 0'])

phys = phys.replace('Ho-Oh', 'Ho-oh')
behavior = behavior.replace('Ho-Oh', 'Ho-oh')

In [8]:
phys

Unnamed: 0,Pokemon,variable,value
0,Bulbasaur,physiology,"Bulbasaur resembles a small amphibian/frog, bu..."
1,Bulbasaur,physiology,"It also has large, red eyes and small, sharp ..."
2,Bulbasaur,physiology,"Its skin is a light, turquoise color with dar..."
3,Bulbasaur,physiology,It has three claws on all four of its legs
4,Bulbasaur,physiology,"Its most notable feature, however, is the afo..."
...,...,...,...
58713,Miraidon,9,The eyes experience a pixelated display; they ...
58714,Miraidon,9,Inside Miraidon ' s body is what appears to be...
58715,Miraidon,9,This energy be mostly concentrated on its thro...
58716,Miraidon,9,The energy have a blue glow around the edges f...


In [9]:
df = pd.concat([df, df_images,phys,behavior])

In [10]:
df

Unnamed: 0,Pokemon,variable,value
0,Bulbasaur,Description,A strange seed was planted on its back at birt...
1,Bulbasaur,Description,It can go for days without eating a single mor...
2,Bulbasaur,Description,The seed on its back is filled with nutrients....
3,Bulbasaur,Description,It carries a seed on its back right from birth...
4,Bulbasaur,Description,"While it is young, it uses the nutrients that ..."
...,...,...,...
64213,Gimmighoul Chest Form,9,"Best seen when using its key signature move, G..."
64214,Gimmighoul Chest Form,9,It then finishes off its adversary with a indi...
64215,Gholdengo,9,Gholdengo have a friendly disposition and can ...
64216,Chien-Pao,9,Chien - Pao be described as be able to control...


Stopwords version...

In [11]:
df.value = df.value.apply(lambda x: ' '.join([word for word in x.split() if word not in (stop_words)]))
df

Unnamed: 0,Pokemon,variable,value
0,Bulbasaur,Description,A strange seed planted back birth. The plant s...
1,Bulbasaur,Description,It go days without eating single morsel. In bu...
2,Bulbasaur,Description,The seed back filled nutrients. The seed grows...
3,Bulbasaur,Description,It carries seed back right birth. As grows old...
4,Bulbasaur,Description,"While young, uses nutrients stored seeds back ..."
...,...,...,...
64213,Gimmighoul Chest Form,9,"Best seen using key signature move, Glaive Rus..."
64214,Gimmighoul Chest Form,9,It finishes adversary individual strike large ...
64215,Gholdengo,9,Gholdengo friendly disposition befriend anyone...
64216,Chien-Pao,9,Chien - Pao described able control 100 tons fa...


In [12]:
class_converter = ClassLabel(num_classes = 1008, names = list(df.Pokemon.unique()))

df['pokelabel'] = [class_converter.str2int(label) for label in df.Pokemon]

In [13]:
df

Unnamed: 0,Pokemon,variable,value,pokelabel
0,Bulbasaur,Description,A strange seed planted back birth. The plant s...,0
1,Bulbasaur,Description,It go days without eating single morsel. In bu...,0
2,Bulbasaur,Description,The seed back filled nutrients. The seed grows...,0
3,Bulbasaur,Description,It carries seed back right birth. As grows old...,0
4,Bulbasaur,Description,"While young, uses nutrients stored seeds back ...",0
...,...,...,...,...
64213,Gimmighoul Chest Form,9,"Best seen using key signature move, Glaive Rus...",998
64214,Gimmighoul Chest Form,9,It finishes adversary individual strike large ...,998
64215,Gholdengo,9,Gholdengo friendly disposition befriend anyone...,999
64216,Chien-Pao,9,Chien - Pao described able control 100 tons fa...,1001


In [14]:
# X = df['value']
# y = df['Pokemon']
train = df.sample(frac = 0.8, random_state = 323)
test = df.drop(train.index).reset_index(drop = True)

In [15]:
pretrained_model = "bert-base-uncased"
tokenizer = BertTokenizer.from_pretrained(
    pretrained_model)
tokenizer

BertTokenizer(name_or_path='bert-base-uncased', vocab_size=30522, model_max_length=512, is_fast=False, padding_side='right', truncation_side='right', special_tokens={'unk_token': '[UNK]', 'sep_token': '[SEP]', 'pad_token': '[PAD]', 'cls_token': '[CLS]', 'mask_token': '[MASK]'})

In [17]:
def encode(docs):
    '''
    This function takes list of texts and returns input_ids and attention_mask of texts
    '''
    encoded_dict = tokenizer.batch_encode_plus(docs, add_special_tokens=True, max_length=128, padding='max_length',
                            return_attention_mask=True, truncation=True, return_tensors='pt')
    input_ids = encoded_dict['input_ids']
    attention_masks = encoded_dict['attention_mask']
    return input_ids, attention_masks



train_input_ids, train_att_masks = encode(train.value.tolist())
# valid_input_ids, valid_att_masks = encode(valid_df['text'].values.tolist())
test_input_ids, test_att_masks = encode(test.value.tolist())


In [18]:
train_y = torch.LongTensor(train['pokelabel'].values.tolist())
test_y = torch.LongTensor(test['pokelabel'].values.tolist())

In [19]:
train_dataset = TensorDataset(train_input_ids, train_att_masks, train_y)
train_sampler = RandomSampler(train_dataset)
train_dataloader = DataLoader(train_dataset,
                              sampler=train_sampler,
                              batch_size= 16)


test_dataset = TensorDataset(test_input_ids, test_att_masks, test_y)
test_sampler = RandomSampler(test_dataset)
test_dataloader = DataLoader(test_dataset,
                              sampler=test_sampler,
                              batch_size= 16)


In [20]:
model = BertForSequenceClassification.from_pretrained(
    pretrained_model,
    num_labels = len(train.pokelabel.unique()),
    output_attentions = False,
    output_hidden_states = False
    )

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

In [21]:
device = torch.device('cuda' if torch.cuda.is_available() else "cpu")
print(device)
model = model.cuda()

cuda


In [None]:
epochs = 30

optimizer = AdamW(model.parameters(), lr = 2e-6)
scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps = 0,
    num_training_steps = len(train_dataloader) * epochs
)

In [20]:
train_loss_per_epoch = []
test_loss_per_epoch = []


epochs = 15

for epoch_num in range(epochs):
    print('Epoch: ', epoch_num + 1)
    '''
    Training
    '''
    model.train()
    train_loss = 0
    for step_num, batch_data in enumerate(tqdm(train_dataloader,desc='Training')):
        input_ids, att_mask, labels = [data.to(device) for data in batch_data]
        output = model(input_ids = input_ids, attention_mask=att_mask, labels= labels)
        
        loss = output.loss
        train_loss += loss.item()

        model.zero_grad()
        loss.backward()
        del loss

        clip_grad_norm_(parameters=model.parameters(), max_norm=1.0)
        optimizer.step()
        scheduler.step()

    train_loss_per_epoch.append(train_loss / (step_num + 1))              


    '''
    Testing
    '''
    model.eval()
    test_loss = 0
    test_pred = []
    with torch.no_grad():
        for step_num_e, batch_data in enumerate(tqdm(test_dataloader,desc='Test')):
            input_ids, att_mask, labels = [data.to(device) for data in batch_data]
            output = model(input_ids = input_ids, attention_mask=att_mask, labels= labels)

            loss = output.loss
            test_loss += loss.item()
   
            test_pred.append(np.argmax(output.logits.cpu().detach().numpy(),axis=-1))
        
    test_loss_per_epoch.append(test_loss / (step_num_e + 1))
    test_pred = np.concatenate(test_pred)

    '''
    Loss message
    '''
    print("train loss: {0} ".format(train_loss / (step_num + 1)))
    print("testing loss: {0} ".format(test_loss / (step_num_e + 1)))


Epoch:  1


Training: 100%|██████████| 6928/6928 [14:04<00:00,  8.20it/s]
Test: 100%|██████████| 1732/1732 [01:03<00:00, 27.22it/s]


train loss: 6.473003762492406 
testing loss: 6.125884360859631 
Epoch:  2


Training: 100%|██████████| 6928/6928 [14:06<00:00,  8.18it/s]
Test: 100%|██████████| 1732/1732 [01:03<00:00, 27.28it/s]


train loss: 5.909981066718112 
testing loss: 5.558661881275045 
Epoch:  3


Training: 100%|██████████| 6928/6928 [14:02<00:00,  8.22it/s]
Test: 100%|██████████| 1732/1732 [01:02<00:00, 27.50it/s]


train loss: 5.38516023101212 
testing loss: 5.027145098869024 
Epoch:  4


Training: 100%|██████████| 6928/6928 [13:58<00:00,  8.26it/s]
Test: 100%|██████████| 1732/1732 [01:02<00:00, 27.60it/s]


train loss: 4.892993814950727 
testing loss: 4.535506429903502 
Epoch:  5


Training: 100%|██████████| 6928/6928 [13:58<00:00,  8.26it/s]
Test: 100%|██████████| 1732/1732 [01:02<00:00, 27.51it/s]


train loss: 4.429659286285934 
testing loss: 4.074534267531257 
Epoch:  6


Training: 100%|██████████| 6928/6928 [13:59<00:00,  8.25it/s]
Test: 100%|██████████| 1732/1732 [01:02<00:00, 27.54it/s]


train loss: 3.9931765754145783 
testing loss: 3.6428973086987027 
Epoch:  7


Training: 100%|██████████| 6928/6928 [13:59<00:00,  8.25it/s]
Test: 100%|██████████| 1732/1732 [01:02<00:00, 27.51it/s]


train loss: 3.585075203630445 
testing loss: 3.2415435453210084 
Epoch:  8


Training: 100%|██████████| 6928/6928 [13:58<00:00,  8.26it/s]
Test: 100%|██████████| 1732/1732 [01:02<00:00, 27.55it/s]


train loss: 3.2103205618489423 
testing loss: 2.87775200701751 
Epoch:  9


Training: 100%|██████████| 6928/6928 [13:58<00:00,  8.26it/s]
Test: 100%|██████████| 1732/1732 [01:02<00:00, 27.55it/s]


train loss: 2.866498921367643 
testing loss: 2.548973407054333 
Epoch:  10


Training: 100%|██████████| 6928/6928 [13:58<00:00,  8.26it/s]
Test: 100%|██████████| 1732/1732 [01:02<00:00, 27.52it/s]


train loss: 2.5613774797141415 
testing loss: 2.26327544671008 
Epoch:  11


Training: 100%|██████████| 6928/6928 [14:00<00:00,  8.25it/s]
Test: 100%|██████████| 1732/1732 [01:03<00:00, 27.40it/s]


train loss: 2.2863585742805626 
testing loss: 2.005559433036533 
Epoch:  12


Training: 100%|██████████| 6928/6928 [14:02<00:00,  8.22it/s]
Test: 100%|██████████| 1732/1732 [01:03<00:00, 27.41it/s]


train loss: 2.045648744488974 
testing loss: 1.7845391031982718 
Epoch:  13


Training: 100%|██████████| 6928/6928 [14:02<00:00,  8.22it/s]
Test: 100%|██████████| 1732/1732 [01:03<00:00, 27.35it/s]


train loss: 1.8333540579119125 
testing loss: 1.592889520367629 
Epoch:  14


Training: 100%|██████████| 6928/6928 [14:02<00:00,  8.22it/s]
Test: 100%|██████████| 1732/1732 [01:03<00:00, 27.41it/s]


train loss: 1.6491660323794113 
testing loss: 1.4260402268818289 
Epoch:  15


Training: 100%|██████████| 6928/6928 [14:02<00:00,  8.22it/s]
Test: 100%|██████████| 1732/1732 [01:03<00:00, 27.39it/s]

train loss: 1.4888343672863882 
testing loss: 1.2850339930960544 





Initial test loss

In [132]:
test_loss

9015.656203269958

In [22]:
model.save_pretrained("pokemodel")

Post training test loss

In [21]:
test_loss

2225.678876042366

In [91]:
model

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12,

In [30]:
def predict_pokemon(desc, model):
    pred_inputs = tokenizer(desc, return_tensors="pt").to(device)
 
    with torch.no_grad():

        logits = model(**pred_inputs).logits


        predicted_class_id = logits.argmax().item()
        second_class_id = torch.topk(logits.flatten(), 3).indices[1]
        third_class_id = torch.topk(logits.flatten(), 3).indices[2]

    return(class_converter.int2str(predicted_class_id), 
           class_converter.int2str(second_class_id.item()),
           class_converter.int2str(third_class_id.item()))

In [71]:
predict_pokemon("pokemon that shoots fire from its mouth")

('Raikou', 'Charizard', 'Charmander')

In [40]:
model.save_pretrained('pokemodel_apr5')

Practice Loading in...

In [27]:
model1 = model.from_pretrained("pokemodel_apr5")

In [86]:
model1 = model1.cuda()
pred_inputs = tokenizer('elephant', return_tensors="pt").to(device)
 
with torch.no_grad():

    logits = model1(**pred_inputs).logits
    print(logits)

tensor([[-0.6715, -4.3084, -2.9991,  ..., -0.6740,  0.7072, -0.0621]],
       device='cuda:0')


In [90]:
predict_pokemon("brendan matthys", model1)

('Slowking', 'Togekiss', 'Omastar')

Day 2

In [30]:
epochs = 15

optimizer = AdamW(model1.parameters(), lr = 2e-6)
scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps = 0,
    num_training_steps = len(train_dataloader) * epochs
)

In [31]:
model1 = model1.cuda()

In [33]:
model = model1

train_loss_per_epoch1 = []
test_loss_per_epoch1 = []


epochs = 10

for epoch_num in range(epochs):
    print('Epoch: ', epoch_num + 1)
    '''
    Training
    '''
    model.train()
    train_loss = 0
    for step_num, batch_data in enumerate(tqdm(train_dataloader,desc='Training')):
        input_ids, att_mask, labels = [data.to(device) for data in batch_data]
        output = model(input_ids = input_ids, attention_mask=att_mask, labels= labels)
        
        loss = output.loss
        train_loss += loss.item()

        model.zero_grad()
        loss.backward()
        del loss

        clip_grad_norm_(parameters=model.parameters(), max_norm=1.0)
        optimizer.step()
        scheduler.step()

    train_loss_per_epoch1.append(train_loss / (step_num + 1))              


    '''
    Testing
    '''
    model.eval()
    test_loss = 0
    test_pred = []
    with torch.no_grad():
        for step_num_e, batch_data in enumerate(tqdm(test_dataloader,desc='Test')):
            input_ids, att_mask, labels = [data.to(device) for data in batch_data]
            output = model(input_ids = input_ids, attention_mask=att_mask, labels= labels)

            loss = output.loss
            test_loss += loss.item()
   
            test_pred.append(np.argmax(output.logits.cpu().detach().numpy(),axis=-1))
        
    test_loss_per_epoch1.append(test_loss / (step_num_e + 1))
    test_pred = np.concatenate(test_pred)

    '''
    Loss message
    '''
    print("train loss: {0} ".format(train_loss / (step_num + 1)))
    print("testing loss: {0} ".format(test_loss / (step_num_e + 1)))


Epoch:  1


Training: 100%|██████████| 7483/7483 [15:08<00:00,  8.24it/s]
Test: 100%|██████████| 1651/1651 [01:00<00:00, 27.37it/s]


train loss: 1.7163525736102538 
testing loss: 1.1663524752887793 
Epoch:  2


Training: 100%|██████████| 7483/7483 [15:10<00:00,  8.21it/s]
Test: 100%|██████████| 1651/1651 [01:00<00:00, 27.36it/s]


train loss: 1.4701578348982578 
testing loss: 0.9927206523377992 
Epoch:  3


Training: 100%|██████████| 7483/7483 [15:11<00:00,  8.21it/s]
Test: 100%|██████████| 1651/1651 [01:00<00:00, 27.32it/s]


train loss: 1.2786488959125923 
testing loss: 0.8531649635641017 
Epoch:  4


Training: 100%|██████████| 7483/7483 [15:13<00:00,  8.19it/s]
Test: 100%|██████████| 1651/1651 [01:00<00:00, 27.27it/s]


train loss: 1.122545735155072 
testing loss: 0.7447241481725264 
Epoch:  5


Training: 100%|██████████| 7483/7483 [15:13<00:00,  8.19it/s]
Test: 100%|██████████| 1651/1651 [01:00<00:00, 27.29it/s]


train loss: 0.9959615731635992 
testing loss: 0.6562479104528566 
Epoch:  6


Training: 100%|██████████| 7483/7483 [15:10<00:00,  8.22it/s]
Test: 100%|██████████| 1651/1651 [01:00<00:00, 27.32it/s]


train loss: 0.8938860863996798 
testing loss: 0.5862352950763586 
Epoch:  7


Training: 100%|██████████| 7483/7483 [15:12<00:00,  8.20it/s]
Test: 100%|██████████| 1651/1651 [01:00<00:00, 27.32it/s]


train loss: 0.8111197699588696 
testing loss: 0.5298776383484082 
Epoch:  8


Training: 100%|██████████| 7483/7483 [15:12<00:00,  8.20it/s]
Test: 100%|██████████| 1651/1651 [01:00<00:00, 27.32it/s]


train loss: 0.7442111905649769 
testing loss: 0.4845248000309332 
Epoch:  9


Training: 100%|██████████| 7483/7483 [15:09<00:00,  8.22it/s]
Test: 100%|██████████| 1651/1651 [01:00<00:00, 27.49it/s]


train loss: 0.6880408596179232 
testing loss: 0.4485710434195925 
Epoch:  10


Training: 100%|██████████| 7483/7483 [15:10<00:00,  8.22it/s]
Test: 100%|██████████| 1651/1651 [01:00<00:00, 27.35it/s]

train loss: 0.6432078573586436 
testing loss: 0.4203644958451652 





In [34]:
model.save_pretrained('pokemodel_apr6')

In [43]:
predict_pokemon("cold pokemon", model)

('Vanillish', 'Abomasnow', 'Snover')

Day 3 -- with wiki data! 

In [47]:
model1 = model.from_pretrained("pokemodel_apr6")

In [50]:
model1 = model1.cuda()
pred_inputs = tokenizer('elephant', return_tensors="pt").to(device)
 
with torch.no_grad():

    logits = model1(**pred_inputs).logits
    print(logits)

tensor([[ 1.9821, -2.0495, -1.1257,  ..., -1.4932, -0.9351, -1.6894]],
       device='cuda:0')


In [51]:
epochs = 10

optimizer = AdamW(model1.parameters(), lr = 2e-6)
scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps = 0,
    num_training_steps = len(train_dataloader) * epochs
)

In [53]:
model = model1

train_loss_per_epoch1 = []
test_loss_per_epoch1 = []


epochs = 7

for epoch_num in range(epochs):
    print('Epoch: ', epoch_num + 1)
    '''
    Training
    '''
    model.train()
    train_loss = 0
    for step_num, batch_data in enumerate(tqdm(train_dataloader,desc='Training')):
        input_ids, att_mask, labels = [data.to(device) for data in batch_data]
        output = model(input_ids = input_ids, attention_mask=att_mask, labels= labels)
        
        loss = output.loss
        train_loss += loss.item()

        model.zero_grad()
        loss.backward()
        del loss

        clip_grad_norm_(parameters=model.parameters(), max_norm=1.0)
        optimizer.step()
        scheduler.step()

    train_loss_per_epoch1.append(train_loss / (step_num + 1))              


    '''
    Testing
    '''
    model.eval()
    test_loss = 0
    test_pred = []
    with torch.no_grad():
        for step_num_e, batch_data in enumerate(tqdm(test_dataloader,desc='Test')):
            input_ids, att_mask, labels = [data.to(device) for data in batch_data]
            output = model(input_ids = input_ids, attention_mask=att_mask, labels= labels)

            loss = output.loss
            test_loss += loss.item()
   
            test_pred.append(np.argmax(output.logits.cpu().detach().numpy(),axis=-1))
        
    test_loss_per_epoch1.append(test_loss / (step_num_e + 1))
    test_pred = np.concatenate(test_pred)

    '''
    Loss message
    '''
    print("train loss: {0} ".format(train_loss / (step_num + 1)))
    print("testing loss: {0} ".format(test_loss / (step_num_e + 1)))


Epoch:  1


Training: 100%|██████████| 13629/13629 [27:31<00:00,  8.25it/s]
Test: 100%|██████████| 1042/1042 [00:38<00:00, 27.14it/s]


train loss: 2.174676286792867 
testing loss: 0.5174251904571697 
Epoch:  2


Training: 100%|██████████| 13629/13629 [27:35<00:00,  8.23it/s]
Test: 100%|██████████| 1042/1042 [00:38<00:00, 27.32it/s]


train loss: 1.6534603646398027 
testing loss: 0.4284073086294583 
Epoch:  3


Training: 100%|██████████| 13629/13629 [27:35<00:00,  8.23it/s]
Test: 100%|██████████| 1042/1042 [00:38<00:00, 27.11it/s]


train loss: 1.34389510804891 
testing loss: 0.3605473042892975 
Epoch:  4


Training: 100%|██████████| 13629/13629 [27:34<00:00,  8.24it/s]
Test: 100%|██████████| 1042/1042 [00:38<00:00, 27.19it/s]


train loss: 1.1252477233199654 
testing loss: 0.30932065593561375 
Epoch:  5


Training: 100%|██████████| 13629/13629 [27:31<00:00,  8.25it/s]
Test: 100%|██████████| 1042/1042 [00:38<00:00, 27.34it/s]


train loss: 0.9639045988539666 
testing loss: 0.2711614287081183 
Epoch:  6


Training: 100%|██████████| 13629/13629 [27:28<00:00,  8.27it/s]
Test: 100%|██████████| 1042/1042 [00:38<00:00, 27.39it/s]


train loss: 0.8459040060375886 
testing loss: 0.24338901926354004 
Epoch:  7


Training: 100%|██████████| 13629/13629 [27:28<00:00,  8.27it/s]
Test: 100%|██████████| 1042/1042 [00:38<00:00, 27.31it/s]

train loss: 0.7603962936291224 
testing loss: 0.22315266972940392 





In [54]:
model.save_pretrained('pokemodel_apr17')

In [95]:
predict_pokemon(
    "Purple dinosaur type, has a solo horn on its nose and looks like it has big blueish ears"
    , model)


('Donphan', 'Rhyperior', 'Fraxure')

Try this but as a stop model. Going to do a LOT of training here

In [24]:
model1 = model.from_pretrained("pokemodel_apr17")

epochs = 10

optimizer = AdamW(model1.parameters(), lr = 2e-6)
scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps = 0,
    num_training_steps = len(train_dataloader) * epochs
)



In [27]:

model = model1.cuda()

train_loss_per_epoch1 = []
test_loss_per_epoch1 = []


epochs = 7

for epoch_num in range(epochs):
    print('Epoch: ', epoch_num + 1)
    '''
    Training
    '''
    model.train()
    train_loss = 0
    for step_num, batch_data in enumerate(tqdm(train_dataloader,desc='Training')):
        input_ids, att_mask, labels = [data.to(device) for data in batch_data]
        output = model(input_ids = input_ids, attention_mask=att_mask, labels= labels)
        
        loss = output.loss
        train_loss += loss.item()

        model.zero_grad()
        loss.backward()
        del loss

        clip_grad_norm_(parameters=model.parameters(), max_norm=1.0)
        optimizer.step()
        scheduler.step()

    train_loss_per_epoch1.append(train_loss / (step_num + 1))              


    '''
    Testing
    '''
    model.eval()
    test_loss = 0
    test_pred = []
    with torch.no_grad():
        for step_num_e, batch_data in enumerate(tqdm(test_dataloader,desc='Test')):
            input_ids, att_mask, labels = [data.to(device) for data in batch_data]
            output = model(input_ids = input_ids, attention_mask=att_mask, labels= labels)

            loss = output.loss
            test_loss += loss.item()
   
            test_pred.append(np.argmax(output.logits.cpu().detach().numpy(),axis=-1))
        
    test_loss_per_epoch1.append(test_loss / (step_num_e + 1))
    test_pred = np.concatenate(test_pred)

    '''
    Loss message
    '''
    print("train loss: {0} ".format(train_loss / (step_num + 1)))
    print("testing loss: {0} ".format(test_loss / (step_num_e + 1)))


Epoch:  1


Training: 100%|██████████| 13629/13629 [27:24<00:00,  8.29it/s]
Test: 100%|██████████| 1042/1042 [00:38<00:00, 27.41it/s]


train loss: 1.2198126944687668 
testing loss: 0.3219016258767493 
Epoch:  2


Training: 100%|██████████| 13629/13629 [27:27<00:00,  8.27it/s]
Test: 100%|██████████| 1042/1042 [00:37<00:00, 27.51it/s]


train loss: 0.9190081049513157 
testing loss: 0.2576289624226013 
Epoch:  3


Training: 100%|██████████| 13629/13629 [27:28<00:00,  8.27it/s]
Test: 100%|██████████| 1042/1042 [00:38<00:00, 27.42it/s]


train loss: 0.7475817165328551 
testing loss: 0.21072839914987332 
Epoch:  4


Training: 100%|██████████| 13629/13629 [27:29<00:00,  8.26it/s]
Test: 100%|██████████| 1042/1042 [00:37<00:00, 27.43it/s]


train loss: 0.6295280578338559 
testing loss: 0.1824749679074063 
Epoch:  5


Training: 100%|██████████| 13629/13629 [27:28<00:00,  8.27it/s]
Test: 100%|██████████| 1042/1042 [00:37<00:00, 27.44it/s]


train loss: 0.5438670648666578 
testing loss: 0.15895070722451765 
Epoch:  6


Training: 100%|██████████| 13629/13629 [27:26<00:00,  8.28it/s]
Test: 100%|██████████| 1042/1042 [00:37<00:00, 27.50it/s]


train loss: 0.48215685560818256 
testing loss: 0.14379370710213674 
Epoch:  7


Training: 100%|██████████| 13629/13629 [27:25<00:00,  8.28it/s]
Test: 100%|██████████| 1042/1042 [00:37<00:00, 27.50it/s]

train loss: 0.43654619537756395 
testing loss: 0.13239858217176315 





In [33]:
model.save_pretrained('pokemodel_apr17_stopwords')

In [31]:
predict_pokemon(
    "Purple dinosaur type, has a solo horn on its nose and looks like it has big blueish ears"
    , model)


('Donphan', 'Cranidos', 'Kricketune')

Let's train the stopwords a little bit more, then we can move to the testing file

In [23]:
model1 = model.from_pretrained("pokemodel_apr17_stopwords")

epochs = 10

optimizer = AdamW(model1.parameters(), lr = 2e-6)
scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps = 0,
    num_training_steps = len(train_dataloader) * epochs
)



In [24]:

model = model1.cuda()

train_loss_per_epoch1 = []
test_loss_per_epoch1 = []


epochs = 7

for epoch_num in range(epochs):
    print('Epoch: ', epoch_num + 1)
    '''
    Training
    '''
    model.train()
    train_loss = 0
    for step_num, batch_data in enumerate(tqdm(train_dataloader,desc='Training')):
        input_ids, att_mask, labels = [data.to(device) for data in batch_data]
        output = model(input_ids = input_ids, attention_mask=att_mask, labels= labels)
        
        loss = output.loss
        train_loss += loss.item()

        model.zero_grad()
        loss.backward()
        del loss

        clip_grad_norm_(parameters=model.parameters(), max_norm=1.0)
        optimizer.step()
        scheduler.step()

    train_loss_per_epoch1.append(train_loss / (step_num + 1))              


    '''
    Testing
    '''
    model.eval()
    test_loss = 0
    test_pred = []
    with torch.no_grad():
        for step_num_e, batch_data in enumerate(tqdm(test_dataloader,desc='Test')):
            input_ids, att_mask, labels = [data.to(device) for data in batch_data]
            output = model(input_ids = input_ids, attention_mask=att_mask, labels= labels)

            loss = output.loss
            test_loss += loss.item()
   
            test_pred.append(np.argmax(output.logits.cpu().detach().numpy(),axis=-1))
        
    test_loss_per_epoch1.append(test_loss / (step_num_e + 1))
    test_pred = np.concatenate(test_pred)

    '''
    Loss message
    '''
    print("train loss: {0} ".format(train_loss / (step_num + 1)))
    print("testing loss: {0} ".format(test_loss / (step_num_e + 1)))


Epoch:  1


Training: 100%|██████████| 13629/13629 [27:57<00:00,  8.12it/s]
Test: 100%|██████████| 1042/1042 [00:37<00:00, 27.53it/s]


train loss: 0.40186336648365034 
testing loss: 0.11353446448207502 
Epoch:  2


Training: 100%|██████████| 13629/13629 [27:42<00:00,  8.20it/s]
Test: 100%|██████████| 1042/1042 [00:37<00:00, 27.54it/s]


train loss: 0.3312793173615054 
testing loss: 0.09691325695863469 
Epoch:  3


Training: 100%|██████████| 13629/13629 [27:43<00:00,  8.19it/s]
Test: 100%|██████████| 1042/1042 [00:37<00:00, 27.51it/s]


train loss: 0.2794026941786999 
testing loss: 0.08311590683001188 
Epoch:  4


Training: 100%|██████████| 13629/13629 [27:43<00:00,  8.19it/s]
Test: 100%|██████████| 1042/1042 [00:37<00:00, 27.47it/s]


train loss: 0.24153434211260172 
testing loss: 0.07457745357750846 
Epoch:  5


Training: 100%|██████████| 13629/13629 [27:43<00:00,  8.19it/s]
Test: 100%|██████████| 1042/1042 [00:37<00:00, 27.54it/s]


train loss: 0.21288665428935882 
testing loss: 0.06722204967879203 
Epoch:  6


Training: 100%|██████████| 13629/13629 [27:43<00:00,  8.19it/s]
Test: 100%|██████████| 1042/1042 [00:37<00:00, 27.49it/s]


train loss: 0.1926927720806425 
testing loss: 0.06178272706313477 
Epoch:  7


Training: 100%|██████████| 13629/13629 [27:43<00:00,  8.19it/s]
Test: 100%|██████████| 1042/1042 [00:37<00:00, 27.46it/s]

train loss: 0.1775660244009307 
testing loss: 0.058671759833419074 





In [25]:
model.save_pretrained('pokemodel_apr17_stopwords_1')

In [87]:
def predict_pokemon(desc, model, filter_stopwords = False):
    
    for descriptor in ['Pokémon', 'Pokemon', 'pokemon','pokémon']:
        if descriptor in desc:
            desc = desc.replace(descriptor,'')
    if filter_stopwords:
        desc = ' '.join([word for word in desc.split() if word not in (stop_words)])
    pred_inputs = tokenizer(desc, return_tensors="pt").to(device)
 
    with torch.no_grad():

        logits = model(**pred_inputs).logits


        predicted_class_id = logits.argmax().item()
        second_class_id = torch.topk(logits.flatten(), 3).indices[1]
        third_class_id = torch.topk(logits.flatten(), 3).indices[2]

    return(class_converter.int2str(predicted_class_id), 
           class_converter.int2str(second_class_id.item()),
           class_converter.int2str(third_class_id.item()))

In [92]:
df.sample(1)[['Pokemon','value']]

Unnamed: 0,Pokemon,value
8599,Pachirisu,These fur balls exhibit static


In [95]:
predict_pokemon('sand castle', model, filter_stopwords = True)

('Palossand', 'Sandaconda', 'Silicobra')