In [None]:
# mounting drive
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
from torch import optim
import random
from copy import deepcopy
import time
import pprint
import nltk
from nltk.tokenize import word_tokenize
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [None]:
class RNN(nn.Module):
  def __init__(self, 
               embedding_vectors,
               hidden_dim,
               output_dim):
    super().__init__()

    self.embedding_vectors = embedding_vectors
    self.hidden_dim = hidden_dim
    self.output_dim = output_dim

    weights = torch.FloatTensor(self.embedding_vectors)
    self.e1 = nn.Embedding.from_pretrained(weights, padding_idx=0)
    self.e1.requires_grad_(False)

    self.gru_title = nn.GRU(input_size=len(self.embedding_vectors[0]),
                            hidden_size=self.hidden_dim,
                            num_layers=2,
                            batch_first=True,
                            bidirectional=True)
    
    self.gru_sentence = nn.GRU(input_size=len(self.embedding_vectors[0]),
                               hidden_size=self.hidden_dim,
                               num_layers=2,
                               batch_first=True,
                               bidirectional=True)
    
    self.drop = nn.Dropout(p=0.3)

    self.l1 = nn.Linear(4*self.hidden_dim, self.hidden_dim)
    self.r1 = nn.ReLU()
    self.l2 = nn.Linear(self.hidden_dim, self.output_dim)

    nn.init.xavier_uniform_(self.l1.weight)
    nn.init.xavier_uniform_(self.l2.weight)

  def forward(self, title, sentence):

    title_mask = (title != 0).type(
            torch.cuda.LongTensor if title.is_cuda else
            torch.LongTensor)
    
    title_lengths = title_mask.sum(dim=1).cpu()

    sentence_mask = (sentence != 0).type(
            torch.cuda.LongTensor if sentence.is_cuda else
            torch.LongTensor)
    
    sentence_lengths = sentence_mask.sum(dim=1).cpu()
            
    title = self.e1(title)
    sentence = self.e1(sentence)

    packed_input_title = pack_padded_sequence(title, title_lengths, batch_first=True, enforce_sorted=False)
    packed_output_title, _ = self.gru_title(packed_input_title)
    output_title, _ = pad_packed_sequence(packed_output_title, batch_first=True)

    out_forward_title = output_title[range(len(output_title)), title_lengths - 1, :self.hidden_dim]
    out_reverse_title = output_title[:, 0, self.hidden_dim:]
    out_reduced_title = torch.cat((out_forward_title, out_reverse_title), 1)
    text_fea_title = self.drop(out_reduced_title)

    packed_input_sentence = pack_padded_sequence(sentence, sentence_lengths, batch_first=True, enforce_sorted=False)
    packed_output_sentence, _ = self.gru_sentence(packed_input_sentence)
    output_sentence, _ = pad_packed_sequence(packed_output_sentence, batch_first=True)

    out_forward_sentence = output_sentence[range(len(output_sentence)), sentence_lengths - 1, :self.hidden_dim]
    out_reverse_sentence = output_sentence[:, 0, self.hidden_dim:]
    out_reduced_sentence = torch.cat((out_forward_sentence, out_reverse_sentence), 1)
    text_fea_sentence = self.drop(out_reduced_sentence)

    input_fea = torch.cat((text_fea_title, text_fea_sentence), dim=1)

    x = self.l1(input_fea)
    x = self.r1(x)
    x = self.l2(x)

    return x

In [None]:
PATH = '/content/gdrive/MyDrive/models/query_classification/query_classifier.word2vec.economics.300d.pt'
model = RNN(*args, **kwargs)
model = torch.load(PATH, map_location=torch.device('cpu'))
model.eval()

NameError: ignored