In [1]:
!pip install ../input/sacremoses/sacremoses-master/ > /dev/null
!pip install ../input/transformers/transformers-master/ > /dev/null

In [2]:
import os
import random
import multiprocessing
import transformers
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import torch
from torch import nn
from torch.nn import BCEWithLogitsLoss
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from transformers import AdamW, XLNetTokenizer, XLNetModel, XLNetLMHeadModel, XLNetConfig
from tqdm.notebook import tqdm
from tqdm import trange
from transformers import BertModel
from torch.utils import data
from transformers import (
    BertTokenizer, BertModel, BertForSequenceClassification, 
    WEIGHTS_NAME, CONFIG_NAME
)
from transformers import AdamW
from transformers.optimization import get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup



'2.2.0'

In [3]:
train = pd.read_csv("../input/google-quest-challenge/train.csv",)
test = pd.read_csv("../input/google-quest-challenge/test.csv",)

target_cols = ['question_asker_intent_understanding', 'question_body_critical', 
               'question_conversational', 'question_expect_short_answer', 
               'question_fact_seeking', 'question_has_commonly_accepted_answer', 
               'question_interestingness_others', 'question_interestingness_self', 
               'question_multi_intent', 'question_not_really_a_question', 
               'question_opinion_seeking', 'question_type_choice',
               'question_type_compare', 'question_type_consequence',
               'question_type_definition', 'question_type_entity', 
               'question_type_instructions', 'question_type_procedure', 
               'question_type_reason_explanation', 'question_type_spelling', 
               'question_well_written', 'answer_helpful',
               'answer_level_of_information', 'answer_plausible', 
               'answer_relevance', 'answer_satisfaction', 
               'answer_type_instructions', 'answer_type_procedure', 
               'answer_type_reason_explanation', 'answer_well_written']

In [13]:
# From the Ref Kernel's
from math import floor, ceil

def _get_masks(tokens, max_seq_length):
    """Mask for padding"""
    if len(tokens)>max_seq_length:
        raise IndexError("Token length more than max seq length!")
    return [1]*len(tokens) + [0] * (max_seq_length - len(tokens))

def _get_segments(tokens, max_seq_length):
    """Segments: 0 for the first sequence, 1 for the second"""
    
    if len(tokens) > max_seq_length:
        raise IndexError("Token length more than max seq length!")
        
    segments = []
    first_sep = True
    current_segment_id = 0
    
    for token in tokens:
        segments.append(current_segment_id)
        if token == "[SEP]":
            if first_sep:
                first_sep = False 
            else:
                current_segment_id = 1
    return segments + [0] * (max_seq_length - len(tokens))

def _get_ids(tokens, tokenizer, max_seq_length):
    """Token ids from Tokenizer vocab"""
    
    token_ids = tokenizer.convert_tokens_to_ids(tokens)
    input_ids = token_ids + [0] * (max_seq_length-len(token_ids))
    return input_ids

def _trim_input(title, question, answer, max_sequence_length=512, t_max_len=30, q_max_len=120, a_max_len=358):
    
    #293+239+30 = 508 + 4 = 512
    t = tokenizer.tokenize(title)
    q = tokenizer.tokenize(question)
    a = tokenizer.tokenize(answer)
    
    t_len = len(t)
    q_len = len(q)
    a_len = len(a)

    if (t_len+q_len+a_len+4) > max_sequence_length:
        
        if t_max_len > t_len:
            t_new_len = t_len
            a_max_len = a_max_len + floor((t_max_len - t_len)/2)
            q_max_len = q_max_len + ceil((t_max_len - t_len)/2)
        else:
            t_new_len = t_max_len
      
        if a_max_len > a_len:
            a_new_len = a_len 
            q_new_len = q_max_len + (a_max_len - a_len)
        elif q_max_len > q_len:
            a_new_len = a_max_len + (q_max_len - q_len)
            q_new_len = q_len
        else:
            a_new_len = a_max_len
            q_new_len = q_max_len
            
            
        if t_new_len+a_new_len+q_new_len+4 != max_sequence_length:
            raise ValueError("New sequence length should be %d, but is %d"%(max_sequence_length, (t_new_len + a_new_len + q_new_len + 4)))
        
        t = t[:t_new_len]
        q = q[:q_new_len]
        a = a[:a_new_len]
    
    return t, q, a

def _convert_to_bert_inputs(title, question, answer, tokenizer, max_sequence_length):
    """Converts tokenized input to ids, masks and segments for BERT"""
    
    stoken = ["[CLS]"] + title + ["[SEP]"] + question + ["[SEP]"] + answer + ["[SEP]"]

    input_ids = _get_ids(stoken, tokenizer, max_sequence_length)
    input_masks = _get_masks(stoken, max_sequence_length)
    input_segments = _get_segments(stoken, max_sequence_length)

    return [input_ids, input_masks, input_segments]

def compute_input_arays(df, columns, tokenizer, max_sequence_length):
    
    input_ids, input_masks, input_segments = [], [], []
    for _, instance in tqdm(df[columns].iterrows()):
        t, q, a = instance.question_title, instance.question_body, instance.answer
        t, q, a = _trim_input(t, q, a, max_sequence_length)
        ids, masks, segments = _convert_to_bert_inputs(t, q, a, tokenizer, max_sequence_length)
        input_ids.append(ids)
        input_masks.append(masks)
        input_segments.append(segments)
    return [
        torch.from_numpy(np.asarray(input_ids, dtype=np.int32)).long(), 
        torch.from_numpy(np.asarray(input_masks, dtype=np.int32)).long(),
        torch.from_numpy(np.asarray(input_segments, dtype=np.int32)).long(),
    ]

def compute_output_arrays(df, columns):
    return np.asarray(df[columns])

In [14]:
tokenizer = BertTokenizer.from_pretrained("../input/pretrained-bert-models-for-pytorch/bert-base-uncased-vocab.txt")
input_categories = list(train.columns[[1,2,5]]); input_categories

['question_title', 'question_body', 'answer']

In [15]:
outputs = compute_output_arrays(train, columns = target_cols)
inputs = compute_input_arays(train, input_categories, tokenizer, max_sequence_length=512)
test_inputs = compute_input_arays(test, input_categories, tokenizer, max_sequence_length=512)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


CPU times: user 54 s, sys: 408 ms, total: 54.4 s
Wall time: 54.3 s


In [19]:
lengths = np.argmax(inputs[0] == 0, axis=1)
lengths[lengths == 0] = inputs[0].shape[1]
y_train_torch = torch.tensor(train[target_cols].values, dtype=torch.float32)

CPU times: user 8 ms, sys: 0 ns, total: 8 ms
Wall time: 28.4 ms


In [23]:
inputs[0] = torch.tensor(inputs[0]).numpy()
inputs[1] = torch.tensor(inputs[1]).numpy()
inputs[2] = torch.tensor(inputs[2]).numpy()

lengths = torch.tensor(lengths).numpy()
y_train_torch = y_train_torch.numpy()

  """Entry point for launching an IPython kernel.
  
  This is separate from the ipykernel package so we can avoid doing imports until
  """


In [24]:
from sklearn.model_selection import train_test_split
X_tr_inputs_ids, X_val_inputs_ids, X_tr_masks, X_val_masks, X_tr_inputs_segs, X_val_inputs_segs, y_train, y_val, X_tr_lengths, X_val_lengths = train_test_split(inputs[0], inputs[1], inputs[2], y_train_torch,lengths,
                                                                              test_size=0.30, random_state=46)

In [25]:
X_tr_inputs_ids = torch.tensor(X_tr_inputs_ids)
X_val_inputs_ids = torch.tensor(X_val_inputs_ids)

X_tr_masks = torch.tensor(X_tr_masks, dtype=torch.long)
X_val_masks = torch.tensor(X_val_masks, dtype=torch.long)

X_tr_inputs_segs = torch.tensor(X_tr_inputs_segs)
X_val_inputs_segs = torch.tensor(X_val_inputs_segs)

y_train = torch.tensor(y_train, dtype=torch.float32)
y_val = torch.tensor(y_val, dtype=torch.float32)

X_tr_lengths = torch.tensor(X_tr_lengths)
X_val_lengths = torch.tensor(X_val_lengths)


In [27]:
# Select a batch size for training
batch_size = 8

train_data = TensorDataset(X_tr_inputs_ids, X_tr_masks, X_tr_inputs_segs, y_train, X_tr_lengths)
train_sampler = RandomSampler(train_data)
train_dataloader = DataLoader(train_data,\
                              sampler=train_sampler,\
                              batch_size=batch_size)

validation_data = TensorDataset(X_val_inputs_ids, X_val_masks, X_val_inputs_segs, y_val, X_val_lengths)
validation_sampler = SequentialSampler(validation_data)
validation_dataloader = DataLoader(validation_data,\
                                   sampler=validation_sampler,\
                                   batch_size=batch_size)


In [28]:
next(iter(train_dataloader)) #input_ids, input_masks, input_segments, targets, lengths

[tensor([[  101,  9585,  4487,  ...,     0,     0,     0],
         [  101,  2224,  2448,  ...,     0,     0,     0],
         [  101,  2342,  3477,  ...,     0,     0,     0],
         ...,
         [  101,  3308, 12403,  ...,     0,     0,     0],
         [  101, 10047,  7971,  ...,     0,     0,     0],
         [  101,  2488,  2404,  ...,     0,     0,     0]]),
 tensor([[1, 1, 1,  ..., 0, 0, 0],
         [1, 1, 1,  ..., 0, 0, 0],
         [1, 1, 1,  ..., 0, 0, 0],
         ...,
         [1, 1, 1,  ..., 0, 0, 0],
         [1, 1, 1,  ..., 0, 0, 0],
         [1, 1, 1,  ..., 0, 0, 0]]),
 tensor([[0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0],
         ...,
         [0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0]]),
 tensor([[0.6667, 0.3333, 0.0000, 0.0000, 1.0000, 1.0000, 0.3333, 0.3333, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.0000, 0.0000,
          0.00

In [29]:
from transformers import BertConfig

class BertSequenceClassification(torch.nn.Module):
    def __init__(self, num_labels=2):
        super(BertSequenceClassification, self).__init__()
        self.num_labels = num_labels
        bert_model_config = '../input/pretrained-bert-models-for-pytorch/bert-base-uncased/bert_config.json'
        bert_config = BertConfig.from_json_file(bert_model_config)
        bert_config.num_labels = 30
        self.bert = BertModel.from_pretrained('../input/pretrained-bert-models-for-pytorch/bert-base-uncased/', config=bert_config)
        self.dropout = torch.nn.Dropout(0.25)
        self.classifier = torch.nn.Linear(768, num_labels)
        self.loss_fct = BCEWithLogitsLoss()
        
        torch.nn.init.xavier_normal_(self.classifier.weight)

    def forward(self, input_ids, token_type_ids=None,\
              attention_mask=None, labels=None):
        # last hidden layer
        last_hidden_state = self.bert(input_ids=input_ids,\
                                   attention_mask=attention_mask,\
                                   token_type_ids=token_type_ids)
        # pool the outputs into a mean vector
        mean_last_hidden_state = self.pool_hidden_state(last_hidden_state)
        mean_last_hidden_state = self.dropout(mean_last_hidden_state)
        logits = self.classifier(mean_last_hidden_state)
        
        if labels is not None:
            loss = self.loss_fct(logits.view(-1, self.num_labels),\
                      labels.view(-1, self.num_labels))
            return loss
        else:
            return logits
    
    def freeze_xlnet_decoder(self):
        for param in self.xlnet.parameters():
            param.requires_grad = False
    
    def unfreeze_xlnet_decoder(self):
        for param in self.xlnet.parameters():
            param.requires_grad = True
    
    def pool_hidden_state(self, last_hidden_state):
        last_hidden_state = last_hidden_state[0]
        mean_last_hidden_state = torch.mean(last_hidden_state, 1)
        return mean_last_hidden_state

# len(Y_train[0]) = 6
model = BertSequenceClassification(num_labels=len(y_train[0]))
# model.freeze_xlnet_decoder()        

In [30]:
model.classifier

Linear(in_features=768, out_features=30, bias=True)

In [31]:
from scipy.stats import spearmanr

def train(model, num_epochs,\
          optimizer,\
          train_dataloader, valid_dataloader,\
          train_loss_set=[], valid_loss_set = [],\
          lowest_eval_loss=None, start_epoch=0,\
          device="cpu"
          ):
    """
    Train the model and save the model with the lowest validation loss
    """
    crit_function = nn.BCEWithLogitsLoss()
    model.to(device)

    # trange is a tqdm wrapper around the normal python range
    for i in trange(num_epochs, desc="Epoch"):
        # if continue training from saved model
        actual_epoch = start_epoch + i

        # Training

        # Set our model to training mode (as opposed to evaluation mode)
        model.train()

        # Tracking variables
        tr_loss = 0
        num_train_samples = 0

        t = tqdm(total=len(train_data), desc="Training: ", position=0)
        # Train the data for one epoch
        for step, batch in enumerate(train_dataloader):
            # Add batch to GPU
            batch = tuple(t.to(device) for t in batch)
            # Unpack the inputs from our dataloader
            b_input_ids, b_input_mask, b_input_segs, b_labels, b_lengths = batch
            # Clear out the gradients (by default they accumulate)
            optimizer.zero_grad()
            # Forward pass
            loss = model(b_input_ids, attention_mask=b_input_mask, token_type_ids = b_input_segs, labels=b_labels)
            # store train loss
            tr_loss += loss.item()
            num_train_samples += b_labels.size(0)
            # Backward pass
            loss.backward()
            # Update parameters and take a step using the computed gradient
            optimizer.step()
            #scheduler.step()
            t.update(n=b_input_ids.shape[0])
        t.close()
        # Update tracking variables
        epoch_train_loss = tr_loss/num_train_samples
        train_loss_set.append(epoch_train_loss)

        print("Train loss: {}".format(epoch_train_loss))

        # Validation

        # Put model in evaluation mode to evaluate loss on the validation set
        model.eval()

        # Tracking variables 
        eval_loss = 0
        num_eval_samples = 0

        v_preds = []
        v_labels = []

        # Evaluate data for one epoch
        t = tqdm(total=len(validation_data), desc="Validating: ", position=0)
        for batch in valid_dataloader:
            # Add batch to GPU
            batch = tuple(t.to(device) for t in batch)
            # Unpack the inputs from our dataloader
            b_input_ids, b_input_mask, b_input_segs, b_labels, b_lengths = batch
            # Telling the model not to compute or store gradients,
            # saving memory and speeding up validation
            with torch.no_grad():
                # Forward pass, calculate validation loss
                preds = model(b_input_ids, attention_mask=b_input_mask, token_type_ids = b_input_segs)
                loss = crit_function(preds, b_labels)
                v_labels.append(b_labels.cpu().numpy())
                v_preds.append(preds.cpu().numpy())
                # store valid loss
                eval_loss += loss.item()
                num_eval_samples += b_labels.size(0)
            t.update(n=b_labels.shape[0])
        t.close()

        v_labels = np.vstack(v_labels)
        v_preds = np.vstack(v_preds)
        print(v_labels.shape)
        print(v_preds.shape)
        rho_val = np.mean([spearmanr(v_labels[:, ind] + np.random.normal(0, 1e-7, v_preds.shape[0]),
                                            v_preds[:, ind] + np.random.normal(0, 1e-7, v_preds.shape[0])).correlation for ind in range(v_preds.shape[1])]
                                )
        epoch_eval_loss = eval_loss/num_eval_samples
        valid_loss_set.append(epoch_eval_loss)

        print("Epoch #{}, training BCE loss: {}, validation BCE loss: ~{}, validation spearmanr: {}"\
                .format(0, epoch_train_loss, epoch_eval_loss, rho_val))

        if lowest_eval_loss == None:
            lowest_eval_loss = epoch_eval_loss
            # save model
        #   save_model(model, model_save_path, actual_epoch,\
        #              lowest_eval_loss, train_loss_set, valid_loss_set)
        else:
            if epoch_eval_loss < lowest_eval_loss:
                lowest_eval_loss = epoch_eval_loss
            # save model
            # save_model(model, model_save_path, actual_epoch,\
            #            lowest_eval_loss, train_loss_set, valid_loss_set)
        print("\n")

    return model, train_loss_set, valid_loss_set


In [32]:
optimizer = AdamW(model.parameters(), lr=2e-5, weight_decay=1, correct_bias=False)
model, train_loss_set, valid_loss_set = train(model=model,\
                                              num_epochs = 7,
                                              optimizer = optimizer,
                                              train_dataloader = train_dataloader,
                                              valid_dataloader = validation_dataloader,
                                              device='cuda'
                                              )

Epoch:   0%|          | 0/7 [00:00<?, ?it/s]

HBox(children=(IntProgress(value=0, description='Training: ', max=4255, style=ProgressStyle(description_width=…


Train loss: 0.051778042841742096


HBox(children=(IntProgress(value=0, description='Validating: ', max=1824, style=ProgressStyle(description_widt…

Epoch:  14%|█▍        | 1/7 [04:49<28:57, 289.64s/it]


(1824, 30)
(1824, 30)
Epoch #0, training BCE loss: 0.051778042841742096, validation BCE loss: ~0.049813302364527134, validation spearmanr: 0.24108826421926144




HBox(children=(IntProgress(value=0, description='Training: ', max=4255, style=ProgressStyle(description_width=…


Train loss: 0.04910443742462947


HBox(children=(IntProgress(value=0, description='Validating: ', max=1824, style=ProgressStyle(description_widt…

Epoch:  29%|██▊       | 2/7 [09:38<24:07, 289.43s/it]


(1824, 30)
(1824, 30)
Epoch #0, training BCE loss: 0.04910443742462947, validation BCE loss: ~0.04956876885211259, validation spearmanr: 0.2655391221913696




HBox(children=(IntProgress(value=0, description='Training: ', max=4255, style=ProgressStyle(description_width=…


Train loss: 0.0473620011369434


HBox(children=(IntProgress(value=0, description='Validating: ', max=1824, style=ProgressStyle(description_widt…

Epoch:  43%|████▎     | 3/7 [14:27<19:16, 289.24s/it]


(1824, 30)
(1824, 30)
Epoch #0, training BCE loss: 0.0473620011369434, validation BCE loss: ~0.04824141395000512, validation spearmanr: 0.2902069790230128




HBox(children=(IntProgress(value=0, description='Training: ', max=4255, style=ProgressStyle(description_width=…


Train loss: 0.04582048238514453


HBox(children=(IntProgress(value=0, description='Validating: ', max=1824, style=ProgressStyle(description_widt…

Epoch:  57%|█████▋    | 4/7 [19:16<14:27, 289.11s/it]


(1824, 30)
(1824, 30)
Epoch #0, training BCE loss: 0.04582048238514453, validation BCE loss: ~0.04830634785129836, validation spearmanr: 0.28750613914462897




HBox(children=(IntProgress(value=0, description='Training: ', max=4255, style=ProgressStyle(description_width=…


Train loss: 0.04433434134224467


HBox(children=(IntProgress(value=0, description='Validating: ', max=1824, style=ProgressStyle(description_widt…

Epoch:  71%|███████▏  | 5/7 [24:05<09:38, 289.06s/it]


(1824, 30)
(1824, 30)
Epoch #0, training BCE loss: 0.04433434134224467, validation BCE loss: ~0.04876778224123675, validation spearmanr: 0.2801410749557413




HBox(children=(IntProgress(value=0, description='Training: ', max=4255, style=ProgressStyle(description_width=…


Train loss: 0.04294143438899559


HBox(children=(IntProgress(value=0, description='Validating: ', max=1824, style=ProgressStyle(description_widt…

Epoch:  86%|████████▌ | 6/7 [28:54<04:49, 289.19s/it]


(1824, 30)
(1824, 30)
Epoch #0, training BCE loss: 0.04294143438899559, validation BCE loss: ~0.048828133872072944, validation spearmanr: 0.279772880783252




HBox(children=(IntProgress(value=0, description='Training: ', max=4255, style=ProgressStyle(description_width=…


Train loss: 0.04150708825902009


HBox(children=(IntProgress(value=0, description='Validating: ', max=1824, style=ProgressStyle(description_widt…

Epoch: 100%|██████████| 7/7 [33:43<00:00, 289.09s/it]


(1824, 30)
(1824, 30)
Epoch #0, training BCE loss: 0.04150708825902009, validation BCE loss: ~0.04997280578276044, validation spearmanr: 0.27718416880198976







In [33]:
test_inputs

[tensor([[  101,  2975, 18113,  ...,     0,     0,     0],
         [  101, 24471,  2140,  ...,     0,     0,     0],
         [  101, 10640, 27429,  ...,     0,     0,     0],
         ...,
         [  101, 29296,  8241,  ...,     0,     0,     0],
         [  101, 17970, 21907,  ...,     0,     0,     0],
         [  101,  2424,  2814,  ...,     0,     0,     0]]),
 tensor([[1, 1, 1,  ..., 0, 0, 0],
         [1, 1, 1,  ..., 0, 0, 0],
         [1, 1, 1,  ..., 0, 0, 0],
         ...,
         [1, 1, 1,  ..., 0, 0, 0],
         [1, 1, 1,  ..., 0, 0, 0],
         [1, 1, 1,  ..., 0, 0, 0]]),
 tensor([[0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0],
         ...,
         [0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0]])]

In [34]:
%%time
sequences = np.array(test_inputs[0])
lengths = np.argmax(sequences == 0, axis=1)
lengths[lengths == 0] = sequences.shape[1]

CPU times: user 0 ns, sys: 0 ns, total: 0 ns
Wall time: 988 µs


In [35]:
lengths

array([113,  97, 119, 512, 103,  67, 314, 158, 357,  54, 265,  60, 242,
       121, 178, 512, 326, 102, 186, 100,  68, 318,  68,  80,  40, 190,
       512, 196, 202, 318, 237, 512, 181,  97, 190, 244,  85, 388, 174,
        81, 187, 453, 226, 360, 433, 162, 145, 165, 320, 104, 106, 152,
       382, 215, 265,  78,  79, 512, 137, 341,  92, 109,  50,  86, 153,
       127, 100, 512,  68, 129, 203, 167, 512, 134, 132,  63, 241, 111,
        67, 128, 130, 164,  77, 125,  63, 353, 307, 293, 118, 104, 143,
        69,  38, 142,  37, 512,  66, 348,  76, 351, 512, 242, 104, 101,
       148,  49, 103,  52, 194, 136, 327, 129, 376, 101,  77, 512, 380,
       169, 197, 113, 195, 152, 369, 231,  54, 287,  79, 213,  67, 230,
        79, 256, 512, 199, 123, 307, 305, 158, 512, 250, 309, 340, 237,
       226,  38, 156, 512, 174,  48, 512, 166, 161, 441,  58, 131, 133,
        68, 342,  79, 129, 510, 118, 206, 182, 350, 131, 214, 178, 180,
       147, 512,  66,  27, 103, 121, 213,  99, 114, 241, 157, 17

In [36]:
dataset = data.TensorDataset(torch.tensor(test_inputs[0]),
                             torch.tensor(test_inputs[1], dtype=torch.long),
                             torch.tensor(test_inputs[2]),
                            )

test_dataloader = data.DataLoader(dataset, 
                               batch_size=8,  
                               shuffle=False, 
                               drop_last=False
                             )

  """Entry point for launching an IPython kernel.
  
  This is separate from the ipykernel package so we can avoid doing imports until


In [37]:
next(iter(test_dataloader))

[tensor([[  101,  2975, 18113,  ...,     0,     0,     0],
         [  101, 24471,  2140,  ...,     0,     0,     0],
         [  101, 10640, 27429,  ...,     0,     0,     0],
         ...,
         [  101,  3599, 24896,  ...,     0,     0,     0],
         [  101,  9530,  8873,  ...,     0,     0,     0],
         [  101,  2360, 24468,  ...,     0,     0,     0]]),
 tensor([[1, 1, 1,  ..., 0, 0, 0],
         [1, 1, 1,  ..., 0, 0, 0],
         [1, 1, 1,  ..., 0, 0, 0],
         ...,
         [1, 1, 1,  ..., 0, 0, 0],
         [1, 1, 1,  ..., 0, 0, 0],
         [1, 1, 1,  ..., 0, 0, 0]]),
 tensor([[0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0],
         ...,
         [0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0]])]

In [38]:
for items in test_dataloader:
    print(items)

[tensor([[  101,  2975, 18113,  ...,     0,     0,     0],
        [  101, 24471,  2140,  ...,     0,     0,     0],
        [  101, 10640, 27429,  ...,     0,     0,     0],
        ...,
        [  101,  3599, 24896,  ...,     0,     0,     0],
        [  101,  9530,  8873,  ...,     0,     0,     0],
        [  101,  2360, 24468,  ...,     0,     0,     0]]), tensor([[1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        ...,
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0]]), tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]])]
[tensor([[  101,  1039,  2131,  ...,     0,     0,     0],
        [  101, 17680,  8816,  ...,     0,     0,     0],
        [  101, 19933,  6167,  ...,     0,     0,     0],
        ...,
        [ 

In [39]:
def generate_predictions(model, dataloader, num_labels, device="cpu", batch_size=8):

    pred_probs = np.array([]).reshape(0, num_labels)

    model.to(device)
    model.eval()

    for X, masks, segments in dataloader:
        X = X.to(device)
        masks = masks.to(device)
        segments = segments.to(device)
        with torch.no_grad():
            logits = model(input_ids=X, attention_mask=masks, token_type_ids=segments)
            logits = logits.sigmoid().detach().cpu().numpy()
            pred_probs = np.vstack([pred_probs, logits])
    return pred_probs
num_labels = len(target_cols)
pred_probs = generate_predictions(model, test_dataloader, num_labels=30, device="cuda", batch_size=8)

In [40]:
df_submit = pd.read_csv('/kaggle/input/google-quest-challenge/sample_submission.csv')
df_submit[target_cols] = pred_probs

In [41]:
df_submit.to_csv("submission.csv", index = False)
df_submit

Unnamed: 0,qa_id,question_asker_intent_understanding,question_body_critical,question_conversational,question_expect_short_answer,question_fact_seeking,question_has_commonly_accepted_answer,question_interestingness_others,question_interestingness_self,question_multi_intent,...,question_well_written,answer_helpful,answer_level_of_information,answer_plausible,answer_relevance,answer_satisfaction,answer_type_instructions,answer_type_procedure,answer_type_reason_explanation,answer_well_written
0,39,0.931062,0.696387,0.095076,0.755078,0.397395,0.532561,0.655127,0.653465,0.289167,...,0.921924,0.921750,0.532786,0.976143,0.952282,0.807921,0.044982,0.020425,0.873525,0.929921
1,46,0.865886,0.623261,0.002759,0.855455,0.680685,0.969054,0.571143,0.469156,0.121529,...,0.783437,0.963440,0.609924,0.981067,0.980359,0.856551,0.981178,0.035070,0.011664,0.906428
2,70,0.907716,0.779838,0.017781,0.470002,0.965914,0.959301,0.661537,0.659824,0.723543,...,0.931366,0.854351,0.397639,0.951362,0.932357,0.717864,0.043873,0.092316,0.875612,0.915666
3,132,0.862703,0.438286,0.006860,0.700325,0.743659,0.928124,0.533995,0.432076,0.054313,...,0.735572,0.955285,0.680599,0.975147,0.983691,0.883326,0.858125,0.143607,0.576939,0.913352
4,200,0.935625,0.576240,0.014007,0.750773,0.953409,0.952470,0.587655,0.591944,0.406222,...,0.857246,0.928472,0.640416,0.972696,0.922611,0.809743,0.334768,0.194107,0.905750,0.912765
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
471,9569,0.833554,0.605282,0.007766,0.616893,0.936130,0.934966,0.561452,0.441242,0.129550,...,0.837092,0.938635,0.680808,0.938514,0.971526,0.903263,0.508141,0.346526,0.435665,0.923079
472,9590,0.882944,0.599539,0.005476,0.805003,0.745474,0.936673,0.528047,0.491742,0.092067,...,0.769381,0.960799,0.659242,0.978317,0.986068,0.900776,0.952529,0.114075,0.068112,0.936390
473,9597,0.750751,0.364743,0.034463,0.478284,0.733934,0.697751,0.471832,0.534594,0.656206,...,0.715260,0.923436,0.609850,0.958549,0.955609,0.773400,0.259858,0.166405,0.847441,0.923323
474,9623,0.940551,0.732285,0.029034,0.859528,0.856275,0.926819,0.616503,0.474860,0.154186,...,0.883736,0.984608,0.738422,0.990320,0.995358,0.971394,0.238995,0.271274,0.772236,0.949011


In [42]:
ls

__notebook__.ipynb  __output__.json  submission.csv
