In [None]:
# Clone this repository:
!git clone https://github.com/adarshdotexe/IndoML.git
%cd /content/IndoML

In [None]:
# Install the requirements:
%pip install torch transformers datasets sentencepiece rouge-score nltk

In [None]:
# Import the required libraries:
import json
import numpy as np
from torch import device
from transformers import AutoModelForSequenceClassification, AutoTokenizer

In [None]:
# Load the hypothesis-intent mapping:
with open('intent_hypothesis_mapping.json') as f:
    intent_hypothesis_mapping = json.load(f)


# {"indoml_id": 1, "utt": "Kindly show me my entire transaction record from the beginning of this year."}
# {"indoml_id": 2, "utt": "Could you help me get this thing delivered today?"}
# {"indoml_id": 3, "utt": "Can you join forces with my mobile?"}
data = []
with open('massive_test.data') as f:
    for line in f:
        data.append(json.loads(line))

premise = [data[i]['utt'] for i in range(len(data))]
ids = [data[i]['indoml_id'] for i in range(len(data))]
hypothesis = list(intent_hypothesis_mapping.values())

# Reverse the mapping:
intent_hypothesis_mapping = {v: k for k, v in intent_hypothesis_mapping.items()}



# pose sequence as a NLI premise and label as a hypothesis
nli_model = AutoModelForSequenceClassification.from_pretrained('facebook/bart-large-mnli')
tokenizer = AutoTokenizer.from_pretrained('facebook/bart-large-mnli')

output = []

device = device('cuda' if torch.cuda.is_available() else 'cpu')

for p, i in enumerate(premise):
    logits = []
    for h in hypothesis:
        x = tokenizer.encode(p, h, return_tensors='pt',
                             truncation_strategy='only_first')
        temp_logits = nli_model(x.to(device))[0]
        temp_logits = temp_logits[:,[0,2]]
        temp_probs = temp_logits.softmax(dim=1)
        temp_prob_label_is_true = temp_probs[:,1]
        logits.append(temp_prob_label_is_true)
    k = np.argmax(logits)
    output.append({"indoml_id": ids[i], "intent": intent_hypothesis_mapping[hypothesis[k]]})

with open('massive_test.output', 'w') as f:
    for line in output:
        f.write(json.dumps(line) + '\n')
