In [5]:
import os
import torch
import random
import pandas as pd

from utils import *
from model_module import BertClfModel
from data_module import DFDataset, build_context_input


In [6]:
class GlobalConfig:   
    def __init__(self):
        # general setting
        self.seed = 2022
        # model setting
        self.model_name = 'prajjwal1/bert-tiny'
        self.model_path = 'saved_models/test.pth'
        # data setting
        self.tsv_path = 'data/us_remote-yes_2021_descr.tsv'
        self.context_length = 8
        self.max_length = 40
        self.batch_size = 32

config = GlobalConfig()

In [9]:

test_df = pd.read_csv(config.tsv_path, sep='\t').fillna('')
test_df['text'] = test_df.apply(lambda x: build_context_input(x, config.context_length), axis=1)
test_df = test_df[['posting_id', 'text']].drop_duplicates(subset=['text']).reset_index(drop=True)

test_loader = DFDataset(test_df, config, True).build_dataloader()

model = torch.load(config.model_path)
model = model.cuda() if torch.cuda.is_available() else model
model.eval()

y_preds = []
for batch in test_loader:
    input_dict = batch
    if torch.cuda.is_available():
        input_dict = {k: v.cuda() for k, v in input_dict.items()}
    with torch.no_grad():
        logits, pooled_output= model(input_dict)
    y_pred = torch.max(logits.data, 1)[1].cpu().numpy().tolist()

    y_preds.extend(y_pred)


test_df['labels'] = y_preds
test_df[['posting_id', 'text', 'labels']]

Unnamed: 0,posting_id,text,labels
0,09ab57719f254aceb4322d4c63575758,"Company: IT Services, Burlington, MA Remote",1
1,625d5a9a2d0d48be8280c0f50f38d14e,of and enjoy the challenges of growing a remot...,1
2,f201e6c280374c23a91010cfc29363ae,these experiences are highly valued. * You can...,1
3,899feb3dcc6e41919465a3029586f5ad,Excel and presentation skills * Ability to eff...,1
4,926b055bf404458fafcc36bac86d8193,have Bachelor's Degree * Must be able to work ...,1
...,...,...,...
3887,13a81781603f4345af3aa48bc2caa8a3,cooperative attitude * Ability to travel 50% a...,1
3888,565078cc141b45ed9d40c442e077c545,Global Business Unit (HSGBU) This is a 100% re...,1
3889,edd40242b3e8470f993779bd1c203922,innovation starts with diversity and inclusion...,1
3890,a76c0fdb9eb049ab966abbbe03e87dcf,company with an existing team in the Philippin...,1
