In [None]:
import os
import gc
gc.enable()
import random
import warnings
warnings.filterwarnings("ignore", category=UserWarning)

import numpy as np
import pandas as pd

import torch
import torch.nn as nn
from torch.utils.data import (
    Dataset, DataLoader,
    SequentialSampler
)
from transformers import (
    AutoConfig,
    AutoModel,
    AutoTokenizer,
    logging,
)
logging.set_verbosity_warning()
logging.set_verbosity_error()

import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [None]:
class configs:
        
    # model
    model_name_or_path = "../input/muril-large-squad-v2"
    config_name = "../input/muril-large-squad-v2"

    # tokenizer
    tokenizer_name = "../input/muril-large-squad-v2"
    max_seq_length = 384
    doc_stride = 128

    eval_batch_size = 128    

In [None]:
class DatasetRetriever(Dataset):
    def __init__(self, features):
        super().__init__()
        self.features = features
        
    def __len__(self):
        return len(self.features)
    
    def __getitem__(self, item):   
        feature = self.features[item]
        return {
                'input_ids':torch.tensor(feature['input_ids'], dtype=torch.long),
                'attention_mask':torch.tensor(feature['attention_mask'], dtype=torch.long),
                'offset_mapping':feature['offset_mapping'],
                'sequence_ids':feature['sequence_ids'],
                'id':feature['example_id'],
                'context': feature['context'],
                'question': feature['question']
            }

In [None]:
class Model(nn.Module):
    def __init__(self, modelname_or_path, config):
        super(Model, self).__init__()
        self.config = config
        self.bert = AutoModel.from_pretrained(modelname_or_path, config=config)
        self.qa_outputs = nn.Linear(config.hidden_size, 2)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self._init_weights(self.qa_outputs)
        
    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            if module.bias is not None:
                module.bias.data.zero_()

    def forward(
        self, 
        input_ids, 
        attention_mask=None, 
        # token_type_ids=None
    ):
        outputs = self.bert(
            input_ids,
            attention_mask=attention_mask,
        )

        sequence_output = outputs[0]
        pooled_output = outputs[1]
        
        # sequence_output = self.dropout(sequence_output)
        qa_logits = self.qa_outputs(sequence_output)
        
        start_logits, end_logits = qa_logits.split(1, dim=-1)
        start_logits = start_logits.squeeze(-1)
        end_logits = end_logits.squeeze(-1)
    
        return start_logits, end_logits

In [None]:
def prepare_test_features(args, example, tokenizer):
    example["question"] = example["question"].lstrip()
    
    tokenized_example = tokenizer(
        example["question"],
        example["context"],
        truncation="only_second",
        max_length=args.max_seq_length,
        stride=args.doc_stride,
        return_overflowing_tokens=True,
        return_offsets_mapping=True,
        padding="max_length",
    )

    features = []
    for i in range(len(tokenized_example["input_ids"])):
        feature = {}
        feature["example_id"] = example['id']
        feature['context'] = example['context']
        feature['question'] = example['question']
        feature['input_ids'] = tokenized_example['input_ids'][i]
        feature['attention_mask'] = tokenized_example['attention_mask'][i]
        feature['offset_mapping'] = tokenized_example['offset_mapping'][i]
        feature['sequence_ids'] = [0 if i is None else i for i in tokenized_example.sequence_ids(i)]
        features.append(feature)
    return features

In [None]:
import collections

def postprocess_qa_predictions(examples, features, raw_predictions, n_best_size = 20, max_answer_length = 30):
    all_start_logits, all_end_logits = raw_predictions
    
    example_id_to_index = {k: i for i, k in enumerate(examples["id"])}
    features_per_example = collections.defaultdict(list)
    for i, feature in enumerate(features):
        features_per_example[example_id_to_index[feature["example_id"]]].append(i)

    predictions = collections.OrderedDict()

    print(f"Post-processing {len(examples)} example predictions split into {len(features)} features.")

    for example_index, example in examples.iterrows():
        feature_indices = features_per_example[example_index]

        min_null_score = None
        valid_answers = []
        
        context = example["context"]
        for feature_index in feature_indices:
            start_logits = all_start_logits[feature_index]
            end_logits = all_end_logits[feature_index]

            sequence_ids = features[feature_index]["sequence_ids"]
            context_index = 1

            features[feature_index]["offset_mapping"] = [
                (o if sequence_ids[k] == context_index else None)
                for k, o in enumerate(features[feature_index]["offset_mapping"])
            ]
            offset_mapping = features[feature_index]["offset_mapping"]
            cls_index = features[feature_index]["input_ids"].index(tokenizer.cls_token_id)
            feature_null_score = start_logits[cls_index] + end_logits[cls_index]
            if min_null_score is None or min_null_score < feature_null_score:
                min_null_score = feature_null_score

            start_indexes = np.argsort(start_logits)[-1 : -n_best_size - 1 : -1].tolist()
            end_indexes = np.argsort(end_logits)[-1 : -n_best_size - 1 : -1].tolist()
            for start_index in start_indexes:
                for end_index in end_indexes:
                    if (
                        start_index >= len(offset_mapping)
                        or end_index >= len(offset_mapping)
                        or offset_mapping[start_index] is None
                        or offset_mapping[end_index] is None
                    ):
                        continue
                    # Don't consider answers with a length that is either < 0 or > max_answer_length.
                    if end_index < start_index or end_index - start_index + 1 > max_answer_length:
                        continue

                    start_char = offset_mapping[start_index][0]
                    end_char = offset_mapping[end_index][1]
                    valid_answers.append(
                        {
                            "score": start_logits[start_index] + end_logits[end_index],
                            "text": context[start_char: end_char]
                        }
                    )
        
        if len(valid_answers) > 0:
            best_answer = sorted(valid_answers, key=lambda x: x["score"], reverse=True)[0]
        else:
            best_answer = {"text": "", "score": 0.0}
        
        predictions[example["id"]] = best_answer["text"]
        
        
    return predictions

In [None]:
def make_model(args):
    config = AutoConfig.from_pretrained(args.config_name)
    tokenizer = AutoTokenizer.from_pretrained(args.tokenizer_name)
    model = Model(args.model_name_or_path, config=config)
    return config, tokenizer, model

In [None]:
tokenizer = AutoTokenizer.from_pretrained(configs.tokenizer_name)

test_df = pd.read_csv("../input/chaii-hindi-and-tamil-question-answering/test.csv")
test_features = []
for i, row in test_df.iterrows():
    test_features += prepare_test_features(configs, row, tokenizer)

test_dataset = DatasetRetriever(test_features)
dataloader = DataLoader(
    test_dataset,
    batch_size=configs.eval_batch_size, 
    sampler=SequentialSampler(test_dataset),
    num_workers=4,
    pin_memory=True, 
    drop_last=False
)

In [None]:
def get_predictions(checkpoint_path):
    config, tokenizer, model = make_model(configs)
    model.cuda();
    model.load_state_dict(
        torch.load( checkpoint_path)
    );
    
    start_logits = []
    end_logits = []
    for batch in dataloader:
        with torch.no_grad():
            outputs_start, outputs_end = model(batch['input_ids'].cuda(), batch['attention_mask'].cuda())
            start_logits.append(outputs_start.cpu().numpy().tolist())
            end_logits.append(outputs_end.cpu().numpy().tolist())
            del outputs_start, outputs_end
    del model, tokenizer, config
    gc.collect()
    return np.vstack(start_logits), np.vstack(end_logits)

In [None]:
temp_start_logits = []
temp_end_logits = []

for i in range(5):
    start, end = get_predictions(f"../input/chaii-qa-v2-fit-2epoch-4bs-10fold-1234-0-4fold/output/checkpoint-{i}/pytorch_model.bin")
    temp_start_logits.append(start)
    temp_end_logits.append(end)
for i in [5,6,7,9]:
    start, end = get_predictions(f"../input/chaii-qa-v2-fit/output/checkpoint-{i}/pytorch_model.bin")
    temp_start_logits.append(start)
    temp_end_logits.append(end)

w1 = [1.25523, 1.25155, 1.69658, 1.28333, 1.23949]
w2 = [1.60539, 2.07192, 1.27841, 1.19798] # fold8 5.95064
weight = 1 / np.array(w1+w2)
start_logits = np.average(temp_start_logits, weights=weight, axis=0)
end_logits = np.average(temp_end_logits, weights=weight, axis=0)
start_logits = sum(temp_start_logits) / len(temp_start_logits)
end_logits = sum(temp_end_logits) / len(temp_end_logits)

In [None]:
predictions = postprocess_qa_predictions(test_df, test_features, (start_logits, end_logits))
test_df['PredictionString'] = test_df['id'].map(predictions)
test_df[['id', 'PredictionString']].to_csv('submission.csv', index=False)

print(test_df[['id', 'PredictionString']])