In [9]:
# Add custom modules in src directory to Jupyter path
import sys
sys.path.append("src")

# Import standard modules
import pandas as pd
import torch.optim as optim
from collections import defaultdict

# Import custom modules
from setup import read_config
from setup import setup_datasets
from setup import setup_embedding_matrix
from setup import setup_model
from setup import setup_word_vectors
from process import texts_to_features
from utils import load_checkpoint

In [20]:
# Required inputs (change these as necessary)
CONFIG_PATH = "/home/cody/abcnn/configs/quora/abcnn1.yaml"
CHECKPOINT_PATH = "/home/cody/abcnn/checkpoints/quora/word2vec/google_news/abcnn1/best_checkpoint"

In [11]:
# Read in the config file
config = read_config(CONFIG_PATH)

In [12]:
# Setup the datasets
data_paths = config["data_paths"]
embeddings_size = config["embeddings"]["size"]
max_length = config["model"]["max_length"]
datasets = {name: pd.read_csv(data_path) for name, data_path in data_paths.items()}
datasets, texts, word2index = setup_datasets(datasets, embeddings_size, max_length)

quora_train: 100%|██████████| 283003/283003 [01:44<00:00, 2702.51it/s]
quora_val: 100%|██████████| 80049/80049 [00:30<00:00, 2649.83it/s]
quora_test: 100%|██████████| 41238/41238 [00:15<00:00, 2715.52it/s]


In [15]:
# Setup the embedding matrix
word_vectors = setup_word_vectors(config)
embeddings = setup_embedding_matrix(word_vectors, word2index, embeddings_size)

Loading Word2Vec word vectors from: /home/cody/abcnn/embeddings/word2vec/google_news/GoogleNews-vectors-negative300.bin.gz


embedding matrix: 100%|██████████| 85856/85856 [00:00<00:00, 354274.10it/s]


In [21]:
# Setup the model and history dict
model = setup_model(embeddings, config)
state = load_checkpoint(CHECKPOINT_PATH)
model_dict, _, history, _ = state
model.load_state_dict(model_dict)

Creating the ABCNN model...


In [22]:
# Setup the loss function and optimizer
optimizer = \
    optim.Adagrad(
        filter(lambda p: p.requires_grad, model.parameters()),
        lr=config["optim"]["lr"],
        weight_decay=config["optim"]["weight_decay"]
    )

In [None]:
# Make predictions
examples = [
    ("How do I connect to VPN?", "I need connecting to VPN"),
    ("What's the wifi password?", "I want to get online.")
]

featurized_examples, processed_texts = \
    texts_to_features(examples, word_vectors, embeddings_size, max_length)
scores = model(featurized_examples)