In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from transformers import VisualBertForPreTraining, BertTokenizer, VisualBertModel
import pickle
import pandas as pd


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Define the training dataset
class MyDataset(Dataset):
    def __init__(self, data):
        self.data = data
        self.indices = list(range(len(data)))  # set indices attribute
        print(self.data.keys())  
        print(f"Number of indices: {len(self.indices)}")


    def __getitem__(self, index):
        index = self.indices[index]  # get the actual index from self.indices
        text = self.data['text'][index]
        label = self.data['label'][index]
        embedded = self.data['embedded'][index]
        
        return text, label, embedded

    def __len__(self):
        return len(self.data)



In [3]:
# Define the pre-trained Visual-Bert model
model = VisualBertModel.from_pretrained('uclanlp/visualbert-vqa-coco-pre', num_labels=2)
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

Some weights of the model checkpoint at uclanlp/visualbert-vqa-coco-pre were not used when initializing VisualBertModel: ['cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing VisualBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing VisualBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


In [5]:

# Define the pre-trained Visual-Bert model
model = VisualBertForPreTraining.from_pretrained('uclanlp/visualbert-vqa-coco-pre', num_labels=2)
#tokenizer = VisualBertTokenizer.from_pretrained('uclanlp/visualbert-vqa-coco-pre')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

#possiblement a changer le VQA 

In [6]:
# Define the training loop

def train(model, tokenizer, train_dataset, optimizer, criterion, device, batch_size, epochs):
    model.to(device)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=False)
    for epoch in range(epochs):
        running_loss = 0.0
        for text,label,embedded in train_loader:
            optimizer.zero_grad()
            text_encoded = tokenizer(text, return_tensors='pt', padding=True, truncation=True)
            #image = embedded.to(device)
    
            text_encoded = {k: v.to(device) for k, v in text_encoded.items()}
            label = label.to(device)
            outputs = model(inputs_ids=text_encoded['input_ids'], visual_embeds=torch.from_numpy(embedded), labels=label)
            loss = criterion(outputs.logits, label)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            
        print('Epoch [%d] - loss: %.4f' % (epoch+1, running_loss/len(train_loader)))




In [8]:

# Define the optimizer and loss function
optimizer = optim.AdamW(model.parameters(), lr=2e-5)
criterion = nn.CrossEntropyLoss()


In [9]:
df=pd.read_json("data/train.jsonl",lines=True)

l = []

with open(r"C:\Users\arman\OneDrive\Bureau\data\file.pkl","rb") as f:
    l = pickle.load(f)
f.close()
df.loc[:, 'embedded'] = l

In [10]:
essai=df.head(20)

In [11]:
train_dataset = MyDataset(essai)
print(len(train_dataset))  # should print the length of your train data


Index(['id', 'img', 'label', 'text', 'embedded'], dtype='object')
Number of indices: 20
20


In [12]:
train(model, tokenizer, train_dataset, optimizer, criterion, device, batch_size=1, epochs=10)

TypeError: expected np.ndarray (got Tensor)