In [1]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [2]:
# Files
import input_net
import utils

# Packages
from os import path
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F

from transformers import AlbertConfig, AlbertForSequenceClassification, AlbertTokenizer
from transformers import BartConfig, BartForSequenceClassification, BartTokenizer
from transformers import BertConfig, BertForSequenceClassification, BertTokenizer
from transformers import CamembertConfig, CamembertForSequenceClassification, CamembertTokenizer
from transformers import DistilBertConfig, DistilBertForSequenceClassification, DistilBertTokenizer
from transformers import FlaubertConfig, FlaubertForSequenceClassification, FlaubertTokenizer
from transformers import RobertaConfig, RobertaForSequenceClassification, RobertaTokenizer
from transformers import XLMConfig, XLMForSequenceClassification, XLMTokenizer
from transformers import XLMRobertaConfig, XLMRobertaForSequenceClassification, XLMRobertaTokenizer
from transformers import XLNetConfig, XLNetForSequenceClassification, XLNetTokenizer
from sklearn.model_selection import train_test_split

# Optimizer -> busca info de com s'utilitza
from transformers import AdamW, get_linear_schedule_with_warmup

Using TensorFlow backend.


In [3]:
TRAIN = "data/train.csv"
TEST = "data/test.csv"
INPUT_NET = 'data/input.csv'

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)
max_len = 70

if not path.exists(INPUT_NET):
    df = input_net.create_input(TRAIN, INPUT_NET, tokenizer, max_len)
else:
    df = pd.read_csv(INPUT_NET)

tokens_tensor, segments_tensor, attention_tensor, targets_tensor = utils.ToTensor(df)

In [6]:
args = {
    'model_type': 'xlm',
    'model_name': 'bert-base-cased',
    'do_train': True,
    'do_eval': True,
    'max_seq_length': 70,
    'batch_size': 16, 

    'num_train_epochs': 1,
    'weight_decay': 0,
    'learning_rate': 4e-5,
    'adam_epsilon': 1e-8,
    'warmup_steps': 0,
    'max_grad_norm': 1.0,
}

In [7]:
MODEL_CLASSES = {
    'albert': (AlbertConfig, AlbertForSequenceClassification, AlbertTokenizer),
    'bart': (BartConfig, BartForSequenceClassification, BartTokenizer),
    'bert': (BertConfig, BertForSequenceClassification, BertTokenizer),
    'camembert': (CamembertConfig, CamembertForSequenceClassification, CamembertTokenizer),
    'distilbert': (DistilBertConfig, DistilBertForSequenceClassification, DistilBertTokenizer),
    'flaubert': (FlaubertConfig, FlaubertForSequenceClassification, FlaubertTokenizer),
    'roberta': (RobertaConfig, RobertaForSequenceClassification, RobertaTokenizer),
    'xlm': (XLMConfig, XLMForSequenceClassification, XLMTokenizer),
    'xlm roberta': (XLMRobertaConfig, XLMRobertaForSequenceClassification, XLMRobertaTokenizer),
    'xlnet': (XLNetConfig, XLNetForSequenceClassification, XLNetTokenizer)
}

config_class, model_class, tokenizer_class = MODEL_CLASSES[args['model_type']]

# Mirar aixo:: AutoModelForSequenceClassification.from_pretrained(pretrained_model_name_or_path)

In [46]:
'''
tokenizer = tokenizer_class.from_pretrained(args['model_name'])

model = model_class.from_pretrained(args['model_name'])

no_decay = ['bias', 'LayerNorm.weight']
optimizer_grouped_parameters = [
        {'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': args['weight_decay']},
        {'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
        ]
'''

In [None]:
'''
### In Transformers, optimizer and schedules are splitted and instantiated like this:
optimizer = AdamW(model.parameters(), lr=lr, correct_bias=False)  # To reproduce BertAdam specific behavior set correct_bias=False
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=num_warmup_steps, num_training_steps=num_training_steps)  # PyTorch scheduler
### and used like this:

for batch in train_data:
    model.train()
    loss = model(batch)
    loss.backward()
    torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)  # Gradient clipping is not in AdamW anymore (so you can use amp without issue)
    optimizer.step()
    scheduler.step()
    optimizer.zero_grad()
'''

In [17]:
criterion = nn.CrossEntropyLoss(reduction='mean')

def batch_generator(data, target, batch_size):
    data = np.array(data)
    target = np.array(target)
    nsamples = len(data)
    perm = np.random.permutation(nsamples)
    for i in range(0, nsamples, batch_size):
        batch_idx = perm[i:i+batch_size]
        if target is not None:
            #print("data:", data.loc[batch_idx,:])
            yield data[batch_idx,:], target[batch_idx]
        #else:
        #    yield data[batch_idx], None

def train(model, train_data, train_target, batch_size, optimizer):
    ncorrect = 0
    num_warmup_steps = 100
    num_training_steps = 1000
    scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=num_warmup_steps, num_training_steps=num_training_steps) 
    i = 0
    for X, y in batch_generator(train_data, train_target, batch_size):
        X = torch.tensor(X, dtype=torch.long) #al codi original X era un numpy.darray, aqui un pd.df
        y = torch.tensor(y, dtype=torch.long)

        X_i = X[:,:70]
        X_s = X[:,70:140]
        X_p = X[:,140:]
        
        model.zero_grad() 
        out= model(input_ids=X_i, token_type_ids=X_s, attention_mask=X_p, labels=y)[1]
        
        loss = criterion(out, y) #no se si aixo ha de ser diferent
        loss.backward()
        optimizer.step()
        scheduler.step()
        optimizer.zero_grad()

        out = F.softmax(out, dim=1)

        ncorrect += (torch.max(out, 1)[1] == y).sum().item()
        i += 1
        print("ncorrect", ncorrect)
        print("running:", i)

    acc = ncorrect/len(train_data) * 100
    print("Accuracy", acc)

    return acc, loss
        
def validation(model, X, y, batch_size):
    model.eval()
    

In [8]:
def build(model, learn_data, optimizer, batch_size, epochs):
    X_train, X_val, y_train, y_val = train_test_split(learn_data.iloc[:,:-1], learn_data.iloc[:,-1], test_size=0.2, random_state=42)
    
    train_acc = [None]*epochs
    train_loss = [None]*epochs
    val_acc = [None]*epochs
    val_los = [None]*epochs
    for epoch in range(epochs):
        #acc, loss = train(model, X_train, y_train, batch_size, optimizer)

        out = train(model, X_train, y_train, batch_size, optimizer)
        print(out) 
             
        #train_acc[epoch] = acc
        #train_loss[epoch] = loss
        
        #acc, loss = val(model, val_data)
        #val_acc[epoch] = acc
        #val_loss[epoch] = loss
        

In [9]:
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2)

In [18]:
optimizer = AdamW(model.parameters(), lr=1e-3, correct_bias=False)

build(model, df, optimizer, batch_size = 5, epochs = 1)

ncorrect 2
running: 1
ncorrect 5
running: 2
ncorrect 7
running: 3
ncorrect 9
running: 4
ncorrect 12
running: 5
ncorrect 15
running: 6
ncorrect 17
running: 7
ncorrect 18
running: 8


KeyboardInterrupt: 

In [25]:
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2)

#hyp to optimise
batch_size = 16#,32
epochs = 4
lr = 1e-3
max_grad_norm = 1.0
num_training_steps = 1000
num_warmup_steps = 100

optimizer = AdamW(model.parameters(), lr=lr, correct_bias=False)  # To reproduce BertAdam specific behavior set correct_bias=False

scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=num_warmup_steps, num_training_steps=num_training_steps)  # PyTorch scheduler

build(model, df, optimizer, batch_size, epochs)

IndexingError: Too many indexers

In [57]:
Predictions = model(input_ids=tokens_tensor[:5],token_type_ids=segments_tensor[:5],attention_mask=attention_tensor[:5],labels=targets_tensor[:5])

In [31]:
F.softmax(model(input_ids=tokens_tensor[:5],token_type_ids=segments_tensor[:5],attention_mask=attention_tensor[:5])[0], dim=1)

tensor([[0.7353, 0.2647],
        [0.7387, 0.2613],
        [0.7226, 0.2774],
        [0.6201, 0.3799],
        [0.7045, 0.2955]], grad_fn=<SoftmaxBackward>)

In [59]:
Predictions[1]

tensor([[-0.3982,  0.6718],
        [-0.4118,  0.6909],
        [-0.3936,  0.6663],
        [-0.4014,  0.6766],
        [-0.4046,  0.6803]], grad_fn=<AddmmBackward>)