In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

import re
import numpy as np
import time

from torch.utils.data import Dataset, DataLoader, TensorDataset

from tqdm import tqdm
import matplotlib.pyplot as plt
import time
from collections import deque

from sklearn.model_selection import GridSearchCV
import random


In [None]:

class DeepNetwork(nn.Module):
    def __init__(self, vocab_size, embedding_dim=100, hidden_size=256, num_layers=2, drop_out=0.0):
        super(DeepNetwork, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.rnn = nn.GRU(embedding_dim, hidden_size, num_layers=num_layers, batch_first=True, dropout=drop_out)
        self.final = nn.Linear(hidden_size, vocab_size)

    def forward(self, t, hidden):
        embeddings = self.embedding(t)
        out, hidden = self.rnn(embeddings, hidden)
        out = self.final(out[:, -1, :])
        return out, hidden



In [None]:
class Evaluate:
    def __init__(self, file_path, device='cpu'):
        self.device = device
        self.load_model(file_path)

    def load_model(self, file_path):
        checkpoint = torch.load(file_path, map_location=self.device)

        # Reconstruct the text processor
        self._w2i = checkpoint['w2i']
        self._i2w = checkpoint['i2w']

        # Retrieve model parameters
        embedding_dim = checkpoint['embedding_dim']
        hidden_size = checkpoint['hidden_size']
        num_layers = checkpoint['num_layers']
        vocab_size = checkpoint['vocab_size']
        drop_out = checkpoint['drop_out']

        # Reconstruct the model with parameters loaded from the checkpoint
        self.model = DeepNetwork(vocab_size, embedding_dim, hidden_size, num_layers=num_layers).to(self.device)
        self.model.load_state_dict(checkpoint['model_state_dict'])
        self.model.eval()

        # Load losses and accuracies
        self.losses = checkpoint['losses']
        self.accuracies = checkpoint['accuracies']
        self.accuracies_words = checkpoint['accuracies_words']
        self.epoch_acc = checkpoint['epoch_acc']

        self.sequence_length = checkpoint['sequence_length']
        self.num_layers = checkpoint['num_layers']
        self.hidden_size = checkpoint['hidden_size']
        self.eval_text = checkpoint['eval_text']

        print(f"Model loaded from {file_path}")

    def generate_text(self, seed_text, next_words=50):
        self.model.eval()
        words = seed_text.lower().split()
        hidden = None

        for _ in range(next_words):
            input_indices = [self._w2i[word] if word in self._w2i else 0 for word in words[-self.sequence_length:]]
            input_tensor = torch.LongTensor([input_indices]).to(self.device)

            with torch.no_grad():
                output = self.model(input_tensor)

            output = output.squeeze(0)[-1]
            probabilities = F.softmax(output, dim=0).cpu().numpy()

            top_indices = np.argsort(probabilities)[-5:][::-1]
            top_words = [self._i2w[idx] for idx in top_indices]
            top_probs = probabilities[top_indices]

            predicted_index = np.random.choice(top_indices, p=top_probs/top_probs.sum())
            predicted_word = self._i2w[predicted_index]
            words.append(predicted_word)

        return ' '.join(words)

    def _generate_words(self, input_indices, neighbours, hidden):
        input_tensor = torch.LongTensor([input_indices]).to(self.device)
        with torch.no_grad():
            output, hidden = self.model(input_tensor, hidden)

        output = output.squeeze(0)

        probabilities = F.softmax(output, dim=0).cpu().numpy()

        top_indices = np.argsort(probabilities)[-neighbours:][::-1]
        top_words = [self._i2w[idx] for idx in top_indices]
        top_probs = probabilities[top_indices]

        return top_indices, top_words, hidden


    def evaluate_text(self, neighbours=5):
        start = self.eval_text[0]
        saved_keystrokes = 0
        total_keystrokes = 0

        found_words = 0
        total_words = 0

        input_indices = deque([start], maxlen=self.sequence_length-1)

        hidden = torch.zeros(self.num_layers, 1, self.hidden_size).to(self.device)  # Ensure correct device
        for index in self.eval_text[1:]:
            top_indices, top_words, hidden = self._generate_words(input_indices, neighbours, hidden)

            fw, sk = self.evaluate_options(self._i2w[index], top_words)

            saved_keystrokes += sk
            found_words += fw

            total_words += 1
            total_keystrokes += len(self._i2w[index])  # Adjust index to get the actual item
            input_indices.append(index)  # Adjust index to get the actual item

        print(f'Saved keystroke percentage: {100*saved_keystrokes/total_keystrokes:.2f}%')
        print(f'Found words percentage: {100*found_words/total_words:.2f}%')
        return saved_keystrokes/total_keystrokes, found_words/total_words


    def evaluate_options(self, target_word, options, nr_of_suggestions=5):
        """Target word is the next word, options is a list of the k most probable words according to the model
        Outputs found_word which is 1 if the target_word is in options else 0
        Outputs saved_keystrokes which is the amount of saved keystrokes, 0 if target_word not in options"""
        if target_word in options:
            if target_word in options[:nr_of_suggestions]:
                found_word = 1
                saved_keystrokes = len(target_word)
                return found_word, saved_keystrokes
            for len_of_word in range(len(target_word)+1):
                options = [word for word in options if word[:len_of_word] == target_word[:len_of_word]]
                if target_word in options[:nr_of_suggestions]:
                    found_word = 1
                    saved_keystrokes = len(target_word) - len_of_word
                    return found_word, saved_keystrokes
            return (0, 0)

        else:
            found_word = 0
            saved_keystrokes = 0
            return found_word, saved_keystrokes




In [None]:
evale = Evaluate('model.pth')

In [None]:
evale.evaluate_text()