In [None]:
!pip3 install transformers
!pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118

In [None]:
from torch.utils.data import Dataset
from transformers import LayoutLMTokenizer
import torch
import json
import os

class FUNSDDataset(Dataset):
    def __init__(self, data_dir):
        self.data_dir = data_dir
        self.tokenizer = LayoutLMTokenizer.from_pretrained('microsoft/layoutlm-base-uncased')
        self.labels_map = {"other": 0, "question": 1, "header": 2, "answer": 3}
        self.data_files = os.listdir(data_dir)

    def __len__(self):
        return len(self.data_files)

    def __getitem__(self, idx):
        with open(os.path.join(self.data_dir, self.data_files[idx]), 'r') as f:
            data = json.load(f)

        words = [word['text'] for word in data['form']]
        labels = [self.labels_map[word['label']] for word in data['form']]
        boxes = [word['box'] for word in data['form']]
        image_size = data['form'][0]['page_size']

        # Convert words to token ids, attention mask, and token type ids
        inputs = self.tokenizer(words, padding="max_length", truncation=True, return_tensors="pt", max_length=512)
        input_ids = inputs.input_ids[0]
        attention_mask = inputs.attention_mask[0]
        token_type_ids = inputs.token_type_ids[0]

        # Convert labels to tensor
        label_ids = torch.tensor([label for label in labels])

        # Convert bounding boxes to normalized format
        width, height = image_size
        boxes = [[left/width, top/height, right/width, bottom/height] for left, top, right, bottom in boxes]
        bbox = torch.tensor(boxes)

        return {
            'input_ids': input_ids,
            'attention_mask': attention_mask,
            'bbox': bbox,
            'labels': label_ids,
            'token_type_ids': token_type_ids
        }

In [None]:
from transformers import LayoutLMForTokenClassification, Trainer, TrainingArguments

# Load the pre-trained model
model = LayoutLMForTokenClassification.from_pretrained("microsoft/layoutlm-base-uncased", num_labels=4)

# Create the dataset
train_dataset = FUNSDDataset("path_to_your_train_data")
eval_dataset = FUNSDDataset("path_to_your_eval_data")

# Define the training arguments
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=3,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=64,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
)

# Create the trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset
)

# Train the model
trainer.train()

# Save the model
trainer.save_model("./model")