In [1]:
import torch
from torch.utils.data import DataLoader
from dataloader.test_dataset import QADataset_Test
from networks import SiameseNet
from options import get_opts
import numpy as np
from sklearn.metrics import classification_report

from utils import load_vocab, load_checkpoint
from hazm import Normalizer,word_tokenize

In [2]:
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--vector_size',type=int,default=300)
parser.add_argument('--max_no_tokens',type=int,default=20)
parser.add_argument('--training_size',type=int,default=10000)
parser.add_argument('--embedding_dim',type=int,default=300)
parser.add_argument('--hidden_dim',type=int,default=200)
parser.add_argument('--epochs',type=int,default=10)
parser.add_argument('--lr_rate',type=float,default=0.0001)
parser.add_argument('--batch_size',type=int,default=64)

opt = parser.parse_known_args()[0]

In [3]:
START_TAG="<START>"
STOP_TAG="<STOP>"
PAD_TAG="<PAD>"
OOV_TAG="<OOV>"
normalizer = Normalizer()

In [4]:
def preprocess(x):
    out = normalizer.normalize(x)
    out = word_tokenize(out)
    out = [i for i in out if i not in ["؟", "!", ".", "،", ",", "?", ":", "<", ">", "(", ")", "{", "}"]]
    return out

def pad_sentence(sentence):
    new_sentence = [START_TAG]
    new_sentence = new_sentence + sentence[:min(opt.max_no_tokens - 2, len(sentence))]
    new_sentence.append(STOP_TAG)
    new_sentence = new_sentence + [PAD_TAG] * max(opt.max_no_tokens - len(new_sentence), 0)
    return new_sentence

def to_tensor(seq):
    idxs = [vocab[w] if w in vocab else vocab[OOV_TAG] for w in seq ]
    return torch.tensor(idxs, dtype=torch.long)

In [5]:
vocab = load_vocab('checkpoints/vocab.pickle')
model = SiameseNet(opt, len(vocab)).eval()
load_checkpoint(model, 'checkpoints/final_model.pth')

In [22]:
s1 = "آب"
s2 = "آتش"

s1 = preprocess(s1)
s1 = pad_sentence(s1)
s1 = to_tensor(s1).unsqueeze(0)

s2 = preprocess(s2)
s2 = pad_sentence(s2)
s2 = to_tensor(s2).unsqueeze(0)

predication = model(s1, s2)
# loss backward
# predication = (predication > 0.5).type(torch.float)
print(predication)

tensor([[0.2599]], grad_fn=<SigmoidBackward>)
