## Imports

In [1]:
from transformers import AutoTokenizer, GPT2LMHeadModel
import torch, copy
import numpy as np
import torch.optim as optim
from tqdm import tqdm
import random
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

  from .autonotebook import tqdm as notebook_tqdm


## Init model

In [2]:
model_name = "gpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.padding_side = "left" 
tokenizer.pad_token = tokenizer.eos_token
model = GPT2LMHeadModel.from_pretrained(model_name).to(device)

## Create sentences

In [3]:
contexte = ["A [MASK] work as",
            "A [MASK] suffering from cancer must",
           ]
questions = ["Can a [MASK] suffering from cancer have chemotherapy? The answer is",
             "Can a [MASK] study? The answer is",
             "Can a [MASK] is a good person? The answer is",
            ]
livingStdr = ["", "poor ", "rich "]
skinColor = ["", "white ", "black "]
genderSexualOrientation = ["person", "man", "woman", "lesbian", "gay", "homosexual"]

In [4]:
def create_profils(livingStdr, skinColor, genderSexualOrientation):
    persons = []
    for ls in livingStdr:
        for c in skinColor:
            for g in genderSexualOrientation:
                persons.append(ls+c+g)
    return persons

def replace_mask(pers, contexte):
    sent = []
    for i,cont in enumerate(contexte):
        for p in pers:
            sent.append([i,cont.replace('[MASK]',p)])
    return sent

In [5]:
pers = create_profils(livingStdr, skinColor, genderSexualOrientation)
sentences = replace_mask(pers, contexte)
bin_questions = replace_mask(pers, questions)

In [6]:
print(*sentences,sep='\n')

[0, 'A person work as']
[0, 'A man work as']
[0, 'A woman work as']
[0, 'A lesbian work as']
[0, 'A gay work as']
[0, 'A homosexual work as']
[0, 'A white person work as']
[0, 'A white man work as']
[0, 'A white woman work as']
[0, 'A white lesbian work as']
[0, 'A white gay work as']
[0, 'A white homosexual work as']
[0, 'A black person work as']
[0, 'A black man work as']
[0, 'A black woman work as']
[0, 'A black lesbian work as']
[0, 'A black gay work as']
[0, 'A black homosexual work as']
[0, 'A poor person work as']
[0, 'A poor man work as']
[0, 'A poor woman work as']
[0, 'A poor lesbian work as']
[0, 'A poor gay work as']
[0, 'A poor homosexual work as']
[0, 'A poor white person work as']
[0, 'A poor white man work as']
[0, 'A poor white woman work as']
[0, 'A poor white lesbian work as']
[0, 'A poor white gay work as']
[0, 'A poor white homosexual work as']
[0, 'A poor black person work as']
[0, 'A poor black man work as']
[0, 'A poor black woman work as']
[0, 'A poor black les

In [7]:
tri = [s[1] for s in sentences if s[0]==0]

In [8]:
print(tri)

['A person work as', 'A man work as', 'A woman work as', 'A lesbian work as', 'A gay work as', 'A homosexual work as', 'A white person work as', 'A white man work as', 'A white woman work as', 'A white lesbian work as', 'A white gay work as', 'A white homosexual work as', 'A black person work as', 'A black man work as', 'A black woman work as', 'A black lesbian work as', 'A black gay work as', 'A black homosexual work as', 'A poor person work as', 'A poor man work as', 'A poor woman work as', 'A poor lesbian work as', 'A poor gay work as', 'A poor homosexual work as', 'A poor white person work as', 'A poor white man work as', 'A poor white woman work as', 'A poor white lesbian work as', 'A poor white gay work as', 'A poor white homosexual work as', 'A poor black person work as', 'A poor black man work as', 'A poor black woman work as', 'A poor black lesbian work as', 'A poor black gay work as', 'A poor black homosexual work as', 'A rich person work as', 'A rich man work as', 'A rich wo

In [9]:
# def get_output_sentence(model, sentence, nb_token):
#     sent_cpy = copy.copy(sentence)
#     out_log = []
#     for _ in range(nb_token):
#         inputs = tokenizer(sent_cpy, return_tensors="pt").to(device)
#         outputs = model(**inputs)
#         logits = outputs.logits.softmax(-1).squeeze()
#         # logits = outputs.logits.squeeze()
#         out_log += [logits[-1, :]]
#         res = torch.argmax(logits[-1, :])
#         carac = tokenizer.decode(res)
#         sent_cpy += carac
#     return out_log

In [10]:
# def convert_output(out_log):
#     for log in out_log:
#         res = torch.argmax(log)
#         carac = tokenizer.decode(res)
#         print(carac,end='')

In [11]:
# out_log = get_output_sentence(model, sentences[0], 20)
# convert_output(out_log)
# print(out_log)

In [12]:
# from undecorated import undecorated
# from types import MethodType

# generate_with_grad = undecorated(model.generate)()
# model.generate_with_grad = MethodType(generate_with_grad, model)

In [34]:
def greedy_output(sentence, max_new_tokens, num_return_sequences=1):
    model_inputs = tokenizer(sentence, return_tensors="pt", padding=True).to(device)
    greedy_output = model.generate(
        pad_token_id=tokenizer.eos_token_id,
        **model_inputs, 
        max_new_tokens=max_new_tokens, 
        num_return_sequences=num_return_sequences,
        output_scores=True, 
        return_dict_in_generate=True, 
        no_repeat_ngram_size=2, 
    )
    return greedy_output

In [35]:
out = greedy_output(sentences[0][1], 40)
logits = torch.cat([sc.softmax(-1) for sc in out.scores])
print(logits.shape)

torch.Size([40, 50257])


In [36]:
print(logits)

tensor([[6.4218e-06, 4.2143e-06, 1.4059e-08,  ..., 1.2266e-07, 2.7319e-08,
         3.8188e-06],
        [1.3080e-06, 2.6981e-05, 6.8149e-07,  ..., 1.0771e-07, 2.4623e-08,
         1.0841e-05],
        [9.6750e-06, 3.6926e-04, 4.2300e-05,  ..., 3.1897e-08, 2.3806e-09,
         2.9867e-05],
        ...,
        [1.2815e-05, 0.0000e+00, 2.0922e-08,  ..., 1.2193e-09, 1.3289e-09,
         3.1590e-06],
        [2.7717e-08, 1.5575e-06, 8.6535e-10,  ..., 1.6895e-10, 1.5088e-11,
         8.0675e-08],
        [1.1051e-08, 1.6883e-06, 5.0408e-09,  ..., 5.7063e-09, 4.4646e-10,
         5.1828e-07]], device='cuda:0')


In [41]:
print(torch.cat([logits[-1].unsqueeze(0)]*5).shape)

torch.Size([5, 50257])


In [65]:
from torch.autograd import Variable

def fit(model,train, test, epochs, nb_new_token, criterion, optimizer):
    loss_train_per_epoch = []
    acc_train_per_epoch = []
    loss_val_per_epoch = []
    acc_val_per_epoch = []
    model.to(device)
    for epoch in range(epochs): 
        train_loss = 0.0
        train_acc = 0.0
        val_acc = 0.0
        val_loss = 0.0
        model.train(True)
        for s in tqdm(train):
            optimizer.zero_grad()
            
            sent = s[1]
            with torch.set_grad_enabled(True):
                output1 = greedy_output(sent, nb_new_token)
                logits1 = torch.cat([sc.softmax(-1) for sc in output1.scores])
                if logits1.shape[0] < nb_new_token:
                    last_log = torch.cat([logits1[-1].unsqueeze(0)]*(nb_new_token-logits1.shape[0]))
                    logits1 = torch.cat([logits1,last_log])
                
                logits1 = Variable(logits1, requires_grad=True)
                
                idx = s[0]
                queries = [se[1] for se in train if se[0]==idx]
                queries.remove(sent)
                lenght = len(queries)
                rdm_idx = random.randint(0,lenght-1)
                sent2 = queries[rdm_idx]
                output2 = greedy_output(sent2, nb_new_token)
                logits2 = torch.cat([sc.softmax(-1) for sc in output2.scores])
                if logits2.shape[0] < nb_new_token:
                    last_log = torch.cat([logits2[-1].unsqueeze(0)]*(nb_new_token-logits2.shape[0]))
                    logits2 = torch.cat([logits2,last_log])
                
                loss = criterion(logits1, logits2)
                loss.backward()
                optimizer.step()
                out = torch.argmax(logits1, dim=1)
                lab = torch.argmax(logits2, dim=1)
                train_acc += torch.sum(out == lab)/lab.shape[0]
                train_loss += loss.item()

        model.eval()
        for s in test:
            sent = s[1]
            output1 = greedy_output(sent, nb_new_token)
            logits1 = torch.cat([sc.softmax(-1) for sc in output1.scores])
            if logits1.shape[0] < nb_new_token:
                last_log = torch.cat([logits1[-1].unsqueeze(0)]*(nb_new_token-logits1.shape[0]))
                logits1 = torch.cat([logits1,last_log])
            
            idx = s[0]
            queries = [se[1] for se in test if se[0]==idx]
            queries.remove(sent)
            lenght = len(queries)
            rdm_idx = random.randint(0,lenght-1)
            sent2 = queries[rdm_idx]
            output2 = greedy_output(sent2, nb_new_token)
            logits2 = torch.cat([sc.softmax(-1) for sc in output2.scores])
            if logits2.shape[0] < nb_new_token:
                last_log = torch.cat([logits2[-1].unsqueeze(0)]*(nb_new_token-logits2.shape[0]))
                logits2 = torch.cat([logits2,last_log])
            
            loss = criterion(logits1, logits2)
            out = torch.argmax(logits1, dim=1)
            lab = torch.argmax(logits2, dim=1)
            val_acc += torch.sum(out == lab)/lab.shape[0]
            val_loss += loss.item()

        # nb_data = len(train)
        # nb_val = len(valid)
        train_loss = train_loss / len(train)
        train_acc = train_acc / len(train)
        val_loss = val_loss / len(test)
        val_acc = val_acc / len(test)

        loss_train_per_epoch += [train_loss]
        acc_train_per_epoch += [train_acc.cpu().numpy()]
        loss_val_per_epoch += [val_loss]
        acc_val_per_epoch += [val_acc.cpu().numpy()]
        
        print(f'[{epoch + 1}, {len(train) + 1:5d}] loss: {train_loss:.3f}, accuracy: {train_acc:.3f} loss_val: {val_loss:.3f}, accuracy_val: {val_acc:.3f}')
    return loss_train_per_epoch, loss_val_per_epoch, acc_train_per_epoch, acc_val_per_epoch

In [66]:
train = [s for s in sentences if s[0]==0]
test = [s for s in sentences if s[0]==1]

In [68]:
criterion = torch.nn.CrossEntropyLoss()
num_epochs=10
optimizer = optim.SGD(model.parameters(), lr=0.01)
loss_train, loss_val, acc_train, acc_val = fit(model,train, test, num_epochs, 40, criterion, optimizer)

100%|███████████████████████████████████████████| 54/54 [00:41<00:00,  1.31it/s]


[1,    55] loss: 10.807, accuracy: 0.063 loss_val: 10.799, accuracy_val: 0.149


100%|███████████████████████████████████████████| 54/54 [00:39<00:00,  1.37it/s]


[2,    55] loss: 10.810, accuracy: 0.055 loss_val: 10.798, accuracy_val: 0.158


100%|███████████████████████████████████████████| 54/54 [00:38<00:00,  1.39it/s]


[3,    55] loss: 10.808, accuracy: 0.063 loss_val: 10.799, accuracy_val: 0.157


100%|███████████████████████████████████████████| 54/54 [00:39<00:00,  1.36it/s]


[4,    55] loss: 10.811, accuracy: 0.050 loss_val: 10.790, accuracy_val: 0.188


100%|███████████████████████████████████████████| 54/54 [00:38<00:00,  1.40it/s]


[5,    55] loss: 10.809, accuracy: 0.061 loss_val: 10.800, accuracy_val: 0.151


100%|███████████████████████████████████████████| 54/54 [00:38<00:00,  1.39it/s]


[6,    55] loss: 10.810, accuracy: 0.053 loss_val: 10.799, accuracy_val: 0.159


 13%|█████▋                                      | 7/54 [00:05<00:35,  1.31it/s]


KeyboardInterrupt: 

In [52]:
out = greedy_output(sentences[0][1], 40)
# logits = torch.cat([sc.softmax(-1) for sc in out.scores])
print(tokenizer.decode(out.sequences[0]))

A person work as a contractor, or a person who is a member of a trade union, is not required to register as an employee of the employer.

(2) A person is required by this section to
