In [1]:
import sys
sys.path.append('/opt/ml/Dacon/unilm/beit3/')

import os
import pandas as pd

import torch
import torch.optim as optim
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

import torchvision.models as models # 이미지
from torchvision import transforms
from PIL import Image

from transformers import GPT2Tokenizer, GPT2Model # 텍스트

from tqdm.auto import tqdm
from modeling_finetune import beit3_large_patch16_224_vqav2
from dataset import VQADataset



In [2]:
import importlib

In [4]:
module = importlib.import_module('dataset')

In [7]:
f = getattr(module, 'VQADataset')

In [11]:
f()

TypeError: __init__() missing 4 required positional arguments: 'df', 'tokenizer', 'transform', and 'img_path'

In [2]:
def inference(model, loader):
    model.eval()
    preds = []
    with torch.no_grad():
        for data in tqdm(loader, total=len(loader)):
            images = data['image'].to(device)
            question = data['question'].to(device)

            outputs = model(images, question,None) # [batch, sequence, vocab]

            _, pred = torch.max(outputs, dim=2) # values, indices = _, pred
            preds.extend(pred.cpu().numpy())

    return preds

## Post-Processing

In [3]:
# csv file
test_df = pd.read_csv('../data/test.csv')
test_img_path = '../data/image/test'
sample_submission = pd.read_csv('../data/sample_submission.csv')

#tozenizer
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
tokenizer.add_special_tokens({'pad_token': '[PAD]'})
vocab_size = len(tokenizer)

#transform
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Dataset & DataLoader
test_dataset = VQADataset(test_df, tokenizer, transform, test_img_path, is_test=True)
test_loader = DataLoader(test_dataset, num_workers=4, batch_size=16, shuffle=False)
assert len(test_df) == len(test_dataset)

In [4]:
#model
device = 'cuda'
MODEL_NAME = 'BEiT3_Fold4'
model = beit3_large_patch16_224_vqav2(pretrained=True).cuda()
CHK_PATH = '/opt/ml/Dacon/weight/BEiT/BEiT_best_model.pt'
model.load_state_dict(torch.load(CHK_PATH)['model_state_dict'])
print('loaded weights')

loaded weights


In [None]:
# inference
preds = inference(model, test_loader)

no_pad_output = []
for pred in preds:
    output = pred[pred != 50257] # [PAD] token 제외
    no_pad_output.append(tokenizer.decode(output).strip()) # 토큰 id -> 토큰

  0%|          | 0/2530 [00:00<?, ?it/s]

## Submission

In [None]:
sample_submission['answer'] = no_pad_output
sample_submission.to_csv(f'./submission/submission_{MODEL_NAME}.csv', index=False)

In [9]:
solution = pd.read_csv(f'./submission/submission_{MODEL_NAME}_epoch5.csv')
solution

Unnamed: 0,ID,answer
0,TEST_00000,yes
1,TEST_00001,mount
2,TEST_00002,red
3,TEST_00003,brown
4,TEST_00004,no
...,...,...
40474,TEST_40474,phone
40475,TEST_40475,sm
40476,TEST_40476,o
40477,TEST_40477,white


In [11]:
test_df

Unnamed: 0,ID,image_id,question
0,TEST_00000,test_00000,Is this a trade show?
1,TEST_00001,test_00001,Where are the people?
2,TEST_00002,test_00002,What color is the ski suit?
3,TEST_00003,test_00003,What color are the cabinets?
4,TEST_00004,test_00004,Is there a person standing in the background?
...,...,...,...
40474,TEST_40474,test_09502,What is the man with the camera using?
40475,TEST_40475,test_09782,What is graffiti on the train?
40476,TEST_40476,test_08648,Is this vehicle traveling through the air or t...
40477,TEST_40477,test_02416,What color is the blanket on the back of the e...
