# Setting up

In [1]:
import torch
import json
import spacy
import numpy as np
from pprint import pprint
from spacy.language import Language
from spacy.tokens import Doc
from spacy.tokenizer import Tokenizer
import re
import unicodedata
from tqdm import tqdm
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence, pad_packed_sequence
from collections import defaultdict
from sklearn.preprocessing import LabelEncoder
device="cuda" if torch.cuda.is_available() else "cpu"
import torch.nn.functional as F
print(torch.__version__)

2.6.0+cu124


In [2]:
# Setting up text preprocessing functions
def unicodeToAscii(s):
    # Convert a Unicode string 's' to plain ASCII.
    # This is done by first normalizing the string into its decomposed form using 'NFD',
    # which separates characters from their accents. Then, it filters out all nonspacing marks (Mn).
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
    )

def normalize_whitespace(text):
  return re.sub(r'\s+', ' ', text).strip()

def preprocess_sentence(s:str) -> str:
    """
    Preprocesses sentence text for consistency
    """
    s = s.strip()
    s = normalize_whitespace(s)
    s = unicodeToAscii(s)
    s = s.strip()
    return s

## Preparing Dataset

### Preparing Tokenizer

We are using spacy pre-built NLP pipeline 'en_core_web_lg' with a few changes:

*   Using  whitespace tokenisation
*   Making use of the Named Entity Recognition (NER) component to identify tokens which belong to Geopolitical Entity (GPE) and Organisation (ORG) Entity Type, to make sure that words belonging to these entity types remain a single token for easier implementation of the tagging problem

For the Linear model, we are utilising the static embeddings provided by spacy which uses floret word vectors by default. For the Feedforward, LSTM and Transformer models, we are utilising the contextual embeddings provided by spacy which use CNN network to generate them. We are not fine-tuning the embeddings.


In [4]:
!python -m spacy download en_core_web_lg

nlp = spacy.load('en_core_web_lg')


# Create a custom component to merge entities
@Language.component("entity_merger")
def entity_merger(doc):
    """
    Custom component of the spacy nlp pipeline which merges geopolitical and organisation entity tokens into a single token
    For example: 'New York' would normally be split into 2 tokens 'New' and 'York' but this will combine into a single 'New York' token
    This is implemented because variables could have the value like 'New York' and for effective tagging we aim to keep the tokenisation scheme consistent to the dataset
    """
    # Iterate over the entities in reverse order (to avoid index issues when merging)
    with doc.retokenize() as retokenizer:
        for ent in reversed(list(doc.ents)):
            # Merge the entity tokens into one token
            if(ent.label_ in ["GPE", "ORG"]):
                attrs = {"LEMMA": ent.text}
                retokenizer.merge(ent, attrs=attrs)
    return doc

# Add the custom component after NER
nlp.add_pipe("entity_merger", after="ner")

def whitespace_tokenizer(nlp):
  """
  Create a custom tokenizer that splits only on whitespace
  """
  return Tokenizer(nlp.vocab, token_match=re.compile(r'\S+').match)

nlp.tokenizer = whitespace_tokenizer(nlp)


Collecting en-core-web-lg==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.8.0/en_core_web_lg-3.8.0-py3-none-any.whl (400.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m400.7/400.7 MB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: en-core-web-lg
Successfully installed en-core-web-lg-3.8.0
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_lg')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


### Preparing Pytorch Datasets and DataLoaders

We are using a custom pytorch dataset to standardise the dataloading process. The dataset reads the json file and determines which examples to keep depending on the split type i.e. 'question' or 'query' and split i.e. 'train', 'dev', or 'test'. The sentences and sql queries with variables, the variable names and their corresponding values are stored for each sample. The variables in sentences and sql queries are replaced by their values and also stored. The shortest sql query by length is also identified for training. Depending on the embed type i.e. 'static' or 'context', static token vectors or contextual token embeddings are stored. Disinct sql query templates and variables names are also identified and converted to ids for classification and tagging. Dataloader is used for batching which uses padding ot ensure all input tokens and tagging labels are the same size

In [5]:
class ATISClassificationDataset(Dataset):

    def __init__(self, dataset_loc, nlp, split_type='query', split=['train'], tokenizer=None, embed_type='static'):

        self.nlp = nlp

        if(embed_type not in ['static', 'context']):
          raise ValueError(f"{embed_type=} is not valid. Please choose from 'static' or 'context'")

        self.embed_type = embed_type
        self.data = []

        self.variable_names = set()
        self.sql_templates = set()

        # Reads Json
        with open(dataset_loc) as f:
            dataset_json = json.load(f)

        for sample in dataset_json:

            processed_sample = {}

            # All valid sql queries for this examples soretd by length
            sql = sorted(sample['sql'],key=len)

            # Adds shortest sql query template to list of sql templates
            self.sql_templates.add(sql[0])

            # Check query split
            query_split = sample['query-split']
            if split_type == "query" and query_split not in split:
                continue

            # Adds to variables set
            variables_metadata = sample["variables"]
            for var in variables_metadata:
                self.variable_names.add(var.get("name"))

            # Process each sentence
            for sentence in sample['sentences']:

                # Check question split
                if split_type == "question" and sentence['question-split'] not in split:
                    continue

                # variables/placeholder mapping dictionary
                variables = sentence['variables']

                # Sentence text with variables/placeholders
                text_with_vars = sentence['text']

                # Replacing variables/placeholders in current sentence and sql query with their values from the variables dictionary
                text_with_vars_replaced = text_with_vars
                sql_with_vars_replaced = sql.copy()

                # Replace sentence and all sql variables with their values
                for var in variables:
                    text_with_vars_replaced = text_with_vars_replaced.replace(var,variables[var])

                    # sql_with_vars_replaced = sql_with_vars_replaced.replace(var,variables[var])
                    sql_with_vars_replaced = [query.replace(var,variables[var]) for query in sql_with_vars_replaced]

                # Tagging expected output
                sentence_var_tagging_labels = []
                for word in text_with_vars.split():
                    if(word in variables):
                        # Use variable name as tag
                        sentence_var_tagging_labels.append(word)
                    else:
                        # Use 'no_var' for non-variable tokens
                        sentence_var_tagging_labels.append("no_var")

                # Appends preprocessed dictionary of current sentence to the processesed_dataset list
                self.data.append({
                    "text_with_vars": text_with_vars,
                    "text_with_vars_replaced":text_with_vars_replaced,
                    "tagging_labels":sentence_var_tagging_labels,
                    "variables":variables,
                    "sql_with_vars": sql,
                    "shortest_sql_with_vars":sql[0],
                    "sql_with_vars_replaced": sql_with_vars_replaced,
                    "shortest_sql_with_vars_replaced": sql_with_vars_replaced[0]
                })

        # Setup tagging label encoder
        # For tagging task - include all variable names plus "no_var" for non-variables
        all_tags = ["no_var"] + list(self.variable_names)
        all_tags = np.array(all_tags)
        self.tag_encoder = LabelEncoder()
        self.tag_encoder.fit(all_tags)

        # Setup SQL template label encoder
        self.sql_encoder = LabelEncoder()
        self.sql_encoder.fit(np.array(list(self.sql_templates)))

        # Process sentences using spacy nlp pipeline to get docs, corresponding label ids and tag ids
        self.docs = []
        self.tag_labels = []
        self.sql_labels = []

        for sample in self.data:
            # Convert sentence with variables into spacy Doc
            doc = self.nlp(preprocess_sentence(sample['text_with_vars_replaced']))
            self.docs.append(doc)

            # Covert variable tags into ids for tagging
            tags = sample["tagging_labels"]
            self.tag_labels.append(self.tag_encoder.transform(tags))

            # Convert sql query with variables into ids for classification
            sql_template = sample["shortest_sql_with_vars"]
            self.sql_labels.append(self.sql_encoder.transform([sql_template])[0])

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        """
        Get a single sample from the dataset
        """

        doc = self.docs[idx]
        raw_item = self.data[idx]

        # Create tensor from tokens
        if(self.embed_type == 'context'):
          # Get contextual embeddings from spacy Doc
          token_vectors = torch.tensor(doc.tensor, dtype=torch.float32)

        elif(self.embed_type == 'static'):
          # Get static token vectors from spacy Doc
          token_vectors = torch.tensor([token.vector for token in doc], dtype=torch.float32)

        # Get token texts (needed for variable replacement during inference)
        tokens = [token.text for token in doc]

        # Get tag labels
        tag_labels = torch.tensor(self.tag_labels[idx], dtype=torch.long)

        # Get SQL label
        sql_label = torch.tensor(self.sql_labels[idx], dtype=torch.long)

        return {
            "token_vectors": token_vectors,
            "tokens": tokens,
            "tag_labels": tag_labels,
            "sql_label": sql_label,
            "raw_item": raw_item,
            "doc_len": len(doc),
            "true_sql_text": raw_item['shortest_sql_with_vars_replaced'] # For inference
        }

    def get_dataloader(self, batch_size=32, shuffle=True, num_workers=0):
        """Helper function to create a DataLoader with custom collate function"""
        return DataLoader(
            self,
            batch_size=batch_size,
            shuffle=shuffle,
            num_workers=num_workers,
            collate_fn=self.collate_fn,
            pin_memory=True
        )

    def collate_fn(self, batch):
      """Custom collate function that pads sequences to the longest in the batch"""

      # Find max length for both token vectors and tag labels in this batch
      max_token_len = max([item["doc_len"] for item in batch])
      max_tag_len = max([len(item["tag_labels"]) for item in batch])

      # Ensure both use the same max length for consistent padding
      max_len = max(max_token_len, max_tag_len)

      # Prepare lists to collect tensors and data
      token_vectors_list = []
      tag_labels_list = []
      attention_masks = []
      sql_labels = []
      tokens_list = []
      raw_items = []
      true_sql_text_list = []

      for item in batch:
          # Get original tensors and data
          token_vecs = item["token_vectors"]
          tags = item["tag_labels"]
          tokens = item["tokens"]

          # Create attention mask (1 for real tokens, 0 for padding)
          seq_len = len(token_vecs)
          tag_len = len(tags)
          attention_mask = torch.ones(seq_len, dtype=torch.long)

          # If padding is needed for tokens
          if(seq_len < max_len):
            # Pad token vectors
            padding = torch.zeros(max_len - seq_len, token_vecs.shape[1], dtype=torch.long)
            token_vecs = torch.cat([token_vecs,padding], dim=0)
            attention_mask = torch.cat([attention_mask, torch.zeros(max_len - seq_len, dtype=torch.long)])

            # Pad tokens list
            tokens.extend([""] * (max_len - seq_len))

          # Pad tag labels separately to ensure they all have the same length
          if(tag_len < max_len):
            padding = torch.zeros(max_len - tag_len, dtype=torch.long)
            tags = torch.cat([tags, padding], dim=0)

          # Add to lists
          token_vectors_list.append(token_vecs)
          tag_labels_list.append(tags)
          attention_masks.append(attention_mask)
          sql_labels.append(item["sql_label"])
          tokens_list.append(tokens)
          raw_items.append(item["raw_item"])
          true_sql_text_list.append(item["true_sql_text"])

      # Stack tensors
      return {
          "token_vectors": torch.stack(token_vectors_list),
          "tag_labels": torch.stack(tag_labels_list),
          "attention_mask": torch.stack(attention_masks),
          "sql_labels": torch.stack(sql_labels),
          "tokens": tokens_list,
          "raw_items": raw_items,
          "true_sql_texts":true_sql_text_list
      }

    def get_tag_vocab_size(self):
        """Returns the size of the tag vocabulary"""
        return len(self.tag_encoder.classes_)

    def get_sql_vocab_size(self):
        """Returns the number of unique SQL templates"""
        return len(self.sql_encoder.classes_)

    def get_vector_dim(self):
        """Returns the dimensionality of token vectors"""
        if(self.embed_type == 'context'):
          return self.nlp.get_pipe("tok2vec").model.get_dim("nO")
        elif(self.embed_type == 'static'):
          return self.nlp.vocab.vectors_length

    def decode_tag(self, tag_id):
      """Convert tag ID back to original variable name or 'no_var' """
      try:
          return self.tag_encoder.inverse_transform([tag_id])[0]
      except ValueError:
          # Handle unseen labels
          return "unknown"

    def decode_sql_template(self, sql_id):
        """Convert SQL template ID back to original SQL template"""
        try:
          return self.sql_encoder.inverse_transform([sql_id])[0]
        except ValueError:
          return "Unknown sql template"

# Model Setup

## Linear Model

The Linear Model has a separate model for variable tagging and SQL template classification. The Linear model is a Multi-Layer Perceptron with no non-linear activation to keep the model linear.

In [6]:
import torch
import torch.nn as nn

class LinearTagger(nn.Module):
    def __init__(self, input_dim: int, tag_vocab_size: int,hidden_sizes=None, dropout: float = 0):
        """
        Linear model for token-level tagging.

        Args:
            input_dim: Dimensionality of input token vectors
            tag_vocab_size: Number of unique tags
            dropout: Dropout probability
        """
        super(LinearTagger, self).__init__()
        layers = []

        if not hidden_sizes:
            hidden_dim1 = 512
            hidden_dim2 = 256
            layers.append(nn.Linear(input_dim, hidden_dim1))
            layers.append(nn.Dropout(dropout))
            layers.append(nn.Linear(hidden_dim1, hidden_dim2))

            layers.append(nn.Dropout(dropout))

            layers.append(nn.Linear(hidden_dim2, tag_vocab_size))
        else:

            current_dim = input_dim
            for h_size in hidden_sizes:
                layers.append(nn.Linear(current_dim, h_size))
                layers.append(nn.Dropout(dropout))
                current_dim = h_size
            layers.append(nn.Linear(current_dim, tag_vocab_size))

        self.layers = nn.Sequential(*layers)


    def forward(self, token_vectors):
        """
        Args:
            token_vectors: [batch_size, seq_len, input_dim]

        Returns:
            tag_logits: [batch_size, seq_len, tag_vocab_size]
        """

        tag_logits = self.layers(token_vectors)
        return tag_logits

class LinearClassifier(nn.Module):
    def __init__(self, input_dim: int, sql_vocab_size: int,hidden_sizes=None, dropout: float = 0):
        """
        Linear model for SQL template classification.

        Args:
            input_dim: Dimensionality of input token vectors
            sql_vocab_size: Number of SQL templates
            dropout: Dropout probability
        """
        super(LinearClassifier, self).__init__()
        layers = []

        if not hidden_sizes:
            hidden_dim1 = 512
            hidden_dim2 = 256
            layers.append(nn.Linear(input_dim, hidden_dim1))
            layers.append(nn.Dropout(dropout))
            layers.append(nn.Linear(hidden_dim1, hidden_dim2))
            layers.append(nn.Dropout(dropout))
            layers.append(nn.Linear(hidden_dim2, sql_vocab_size))

        else:
            current_dim = input_dim
            for h_size in hidden_sizes:
                layers.append(nn.Linear(current_dim, h_size))
                layers.append(nn.Dropout(dropout))
                current_dim = h_size
            layers.append(nn.Linear(current_dim, sql_vocab_size))

        self.layers = nn.Sequential(*layers)

    def forward(self, token_vectors, attention_mask=None):
        """
        Args:
            token_vectors: [batch_size, seq_len, input_dim]
            attention_mask: [batch_size, seq_len] (1 = real token, 0 = padding)

        Returns:
            sql_logits: [batch_size, sql_vocab_size]
        """
        if attention_mask is not None:
            # Apply mask by setting padding tokens to a very small number before max pooling
            masked_vectors = token_vectors.masked_fill(~attention_mask.unsqueeze(-1).bool(), -float('inf'))
            pooled = masked_vectors.max(dim=1)[0] # Get values, ignore indices
        else:
            pooled = token_vectors.max(dim=1)[0]

        sql_logits = self.layers(pooled)
        return sql_logits



## Feedforward Model

The feed-forward model has a separate model for variable tagging and SQL template classification. Both models are Multi-Layer Perceptrons with GELU activation. Dropout and Layer normalisation is used for better generalisation.

In [7]:
import torch
import torch.nn as nn

class FFTagger(nn.Module):
    def __init__(self, input_dim: int, tag_vocab_size: int,hidden_sizes=None, dropout: float = 0.1, use_layernorm: bool = True):
        """
        Feedforward model for token-level tagging.

        Args:
            input_dim: Dimensionality of input token vectors
            tag_vocab_size: Number of unique tags
            dropout: Dropout probability
            use_layernorm: If true, uses Layer Normalisation after each hidden layer
        """
        super(FFTagger, self).__init__()
        layers = []

        if not hidden_sizes:
            hidden_dim1 = 512
            hidden_dim2 = 256
            layers.append(nn.Linear(input_dim, hidden_dim1))
            layers.append(nn.GELU())
            if use_layernorm:
                    layers.append(nn.LayerNorm(hidden_dim1)) # Normalize features of this layer
            layers.append(nn.Dropout(dropout))
            layers.append(nn.Linear(hidden_dim1, hidden_dim2))
            layers.append(nn.GELU())
            if use_layernorm:
                    layers.append(nn.LayerNorm(hidden_dim2)) # Normalize features of this layer
            layers.append(nn.Dropout(dropout))

            layers.append(nn.Linear(hidden_dim2, tag_vocab_size))
        else:

            current_dim = input_dim
            for h_size in hidden_sizes:
                layers.append(nn.Linear(current_dim, h_size))
                layers.append(nn.GELU())
                if use_layernorm:
                    layers.append(nn.LayerNorm(h_size)) # Normalize features of this layer
                layers.append(nn.Dropout(dropout))
                current_dim = h_size
            layers.append(nn.Linear(current_dim, tag_vocab_size))

        self.layers = nn.Sequential(*layers)


    def forward(self, token_vectors):
        """
        Args:
            token_vectors: [batch_size, seq_len, input_dim]

        Returns:
            tag_logits: [batch_size, seq_len, tag_vocab_size]
        """

        tag_logits = self.layers(token_vectors)
        return tag_logits

class FFClassifier(nn.Module):
    def __init__(self, input_dim: int, sql_vocab_size: int,hidden_sizes=None, dropout: float = 0.25, use_layernorm: bool = True):
        """
        Feedforward model for SQL template classification.

        Args:
            input_dim: Dimensionality of input token vectors
            sql_vocab_size: Number of SQL templates
            dropout: Dropout probability
            use_layernorm: If true, uses Layer Normalisation after each hidden layer
        """
        super(FFClassifier, self).__init__()
        layers = []

        if not hidden_sizes:
            hidden_dim1 = 512
            hidden_dim2 = 256
            layers.append(nn.Linear(input_dim, hidden_dim1))
            layers.append(nn.GELU())
            if use_layernorm:
                    layers.append(nn.LayerNorm(hidden_dim1)) # Normalize features of this layer
            layers.append(nn.Dropout(dropout))
            layers.append(nn.Linear(hidden_dim1, hidden_dim2))
            layers.append(nn.GELU())
            if use_layernorm:
                    layers.append(nn.LayerNorm(hidden_dim2)) # Normalize features of this layer
            layers.append(nn.Dropout(dropout))
            layers.append(nn.Linear(hidden_dim2, sql_vocab_size))
        else:
            current_dim = input_dim
            for h_size in hidden_sizes:
                layers.append(nn.Linear(current_dim, h_size))
                layers.append(nn.GELU())
                if use_layernorm:
                    layers.append(nn.LayerNorm(h_size)) # Normalize features of this layer
                layers.append(nn.Dropout(dropout))
                current_dim = h_size
            layers.append(nn.Linear(current_dim, sql_vocab_size))

        self.layers = nn.Sequential(*layers)

    def forward(self, token_vectors, attention_mask=None):
        """
        Args:
            token_vectors: [batch_size, seq_len, input_dim]
            attention_mask: [batch_size, seq_len] (1 = real token, 0 = padding)

        Returns:
            sql_logits: [batch_size, sql_vocab_size]
        """
        if attention_mask is not None:
            # Apply mask by setting padding tokens to a very small number before max pooling
            masked_vectors = token_vectors.masked_fill(~attention_mask.unsqueeze(-1).bool(), -float('inf'))
            pooled = masked_vectors.max(dim=1)[0]
        else:
            pooled = token_vectors.max(dim=1)[0]

        sql_logits = self.layers(pooled)
        return sql_logits



## LSTM Model

A multi-layered, bidirectional LSTM is used for tagging and classification. Output (concatenated from both directions) from each cell of the last LSTM layer is passed to a linear layer for tagging. Hidden state of the last cell of the last LSTM layer in both directons is concatenated and passed to Linear Layer for SQL template classification.

In [8]:
class LSTMTaggerClassifer(nn.Module):
    def __init__(self,
                 input_dim:int,
                 hidden_dim:int,
                 tag_vocab_size:int,
                 sql_vocab_size:int,
                 num_layers:int = 1,
                 dropout:float = 0.25):
        """
        LSTM model for both token tagging and SQL template classification

        Args:
            input_dim: Dimensionality of input vectors
            hidden_dim: Hidden dimension of LSTM
            tag_vocab_size: Size of tag vocabulary for tagging task
            sql_vocab_size: Number of unique SQL templates
            num_layers: Number of LSTM layers
            dropout: Dropout probability
        """
        super(LSTMTaggerClassifer, self).__init__()

        #LSTM layer
        self.lstm = nn.LSTM(
            input_dim,
            hidden_dim,
            num_layers=num_layers,
            bidirectional=True,
            dropout=dropout if num_layers > 1 else 0,
            batch_first=True
        )

        # Dropout
        self.dropout = nn.Dropout(dropout)

        # Tagging layer (token classification)
        # hidden * 2 is for bidirectionality
        self.tag_classifier = nn.Linear(hidden_dim * 2, tag_vocab_size)

        # SQL Template classification layer
        # hidden * 2 is for bidirectionality
        self.sql_classifier = nn.Linear(hidden_dim * 2, sql_vocab_size)

    def forward(self, token_vectors, attention_mask=None):
        """
        Forward pass

        Args:
            token_vectors: Token vectors from SpaCy [batch_size, seq_len, input_dim]
            attention_mask: Attention mask indicating valid tokens [batch_size, seq_len]

        Returns:
            tag_logits: Token classification logits [batch_size, seq_len, tag_vocab_size]
            sql_logits: SQL template classification logits [batch_size, sql_vocab_size]
        """
        batch_size = token_vectors.shape[0]
        seq_len = token_vectors.shape[1]

        # Ignore padded vectrors in batch
        packed = nn.utils.rnn.pack_padded_sequence(token_vectors, attention_mask.sum(dim=1).cpu().long(), batch_first=True, enforce_sorted=False)
        # LSTM
        output, (hidden, _) = self.lstm(packed)
        # Unpack output back to padded sequences
        output, _ = nn.utils.rnn.pad_packed_sequence(output, batch_first=True,total_length=seq_len)
        # Appply dropout to output
        output = self.dropout(output)
        # Feedforward layer to get tagging logits
        tag_logits = self.tag_classifier(output)  # shape: (B, T, num_tags)
        # tag_logits = self.tag_classifier(F.gelu(output))  # shape: (B, T, num_tags)

        # Classification from last hidden states (concat of both directions)
        final_hidden = torch.cat((hidden[-2], hidden[-1]), dim=1)
        # Apply dropout to final hidden output
        # final_hidden = self.dropout(final_hidden)
        # Feedforward layer to get SQL classification
        sql_logits = self.sql_classifier(final_hidden)
        # sql_logits = self.sql_classifier(F.gelu(final_hidden))

        return tag_logits, sql_logits

## Transfomer Model

The Transformer model is an encoder only model. It uses the outputs of each token of the encoder passed to a linear layer for tagging. For classification, it uses the output of a special [CLS] token passed to a linear layer, similar to BERT. The [CLS] token is appended to the input sentence

In [9]:
class TransformerTaggerClassifier(nn.Module):
  "Transformer Encoder for Tagging and Classification"

  def __init__(self, input_dim, hidden_dim, tag_vocab_size, sql_vocab_size, num_layers=2, num_heads=6, dropout=0.2):
    super().__init__()

    self.project = nn.Linear(input_dim, hidden_dim) if input_dim != hidden_dim else nn.Identity()

    encoder_layer = nn.TransformerEncoderLayer(d_model=hidden_dim, nhead=num_heads, dropout=dropout, batch_first=True,)
    self.encoder = nn.TransformerEncoder(encoder_layer, num_layers)

    self.tag_classifier = nn.Linear(hidden_dim, tag_vocab_size)
    self.sql_classifier = nn.Linear(hidden_dim, sql_vocab_size)

    # BERT style [CLS] token for classification
    self.cls_token = nn.Parameter(torch.randn(1, 1, hidden_dim))
    self.dropout = nn.Dropout(dropout)

  def forward(self, token_vectors, attention_mask=None):
    # x: [batch, seq_len, input_dim]
    x = self.dropout(self.project(token_vectors))
    batch_size = x.size(0)

    # Add [CLS] token
    cls = self.cls_token.expand(batch_size, -1, -1) # [batch, 1, hidden_dim]
    x = torch.cat([cls, x], dim=1)  # [batch, seq_len+1, hidden_dim]

    # Update attention mask
    if attention_mask is not None:
        cls_mask = torch.ones(batch_size, 1, device=attention_mask.device, dtype=attention_mask.dtype)
        attention_mask = torch.cat([cls_mask, attention_mask], dim=1)
        key_padding_mask = attention_mask == 0
    else:
        key_padding_mask = None

    x = self.encoder(x, src_key_padding_mask=key_padding_mask)

    # exclude CLS for token tagging
    tag_logits = self.tag_classifier(x[:, 1:])

    # use CLS for classification
    sql_logits = self.sql_classifier(x[:, 0])

    return tag_logits, sql_logits



In [218]:
import math

class PositionalEncoding(nn.Module):
    r"""Inject some information about the relative or absolute position of the tokens in the sequence.
        The positional encodings have the same dimension as the embeddings, so that the two can be summed.
        Here, we use sine and cosine functions of different frequencies.
    .. math:
        \text{PosEncoder}(pos, 2i) = sin(pos/10000^(2i/d_model))
        \text{PosEncoder}(pos, 2i+1) = cos(pos/10000^(2i/d_model))
        \text{where pos is the word position and i is the embed idx)
    Args:
        d_model: the embed dim (required).
        dropout: the dropout value (default=0.1).
        max_len: the max. length of the incoming sequence (default=5000).
    Examples:
        >>> pos_encoder = PositionalEncoding(d_model)
    """

    def __init__(self, d_model, dropout=0.1, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)

        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)

    def forward(self, x):
        r"""Inputs of forward function
        Args:
            x: the sequence fed to the positional encoder model (required).
        Shape:
            x: [sequence length, batch size, embed dim]
            output: [sequence length, batch size, embed dim]
        Examples:
            >>> output = pos_encoder(x)
        """

        x = x + self.pe[:x.size(0), :]
        return self.dropout(x)

class TransformerTaggerClassifierStatic(nn.Module):
  "Transformer Encoder for Tagging and Classification using static embedding. Adding Poistional encoding"

  def __init__(self, input_dim, hidden_dim, tag_vocab_size, sql_vocab_size, num_layers=2, num_heads=6, dropout=0.2):
    super().__init__()

    self.project = nn.Linear(input_dim, hidden_dim) if input_dim != hidden_dim else nn.Identity()
    self.pos_encoder = PositionalEncoding(hidden_dim)
    encoder_layer = nn.TransformerEncoderLayer(d_model=hidden_dim, nhead=num_heads, dropout=dropout, batch_first=True,)
    self.encoder = nn.TransformerEncoder(encoder_layer, num_layers)

    self.tag_classifier = nn.Linear(hidden_dim, tag_vocab_size)
    self.sql_classifier = nn.Linear(hidden_dim, sql_vocab_size)

    # BERT style [CLS] token for classification
    self.cls_token = nn.Parameter(torch.randn(1, 1, hidden_dim))
    self.dropout = nn.Dropout(dropout)

  def forward(self, token_vectors, attention_mask=None):
    # x: [batch, seq_len, input_dim]
    x = self.dropout(self.project(token_vectors))
    # Apply positional encoding
    x = self.pos_encoder(x)
    batch_size = x.size(0)

    # Add [CLS] token
    cls = self.cls_token.expand(batch_size, -1, -1) # [batch, 1, hidden_dim]
    x = torch.cat([cls, x], dim=1)  # [batch, seq_len+1, hidden_dim]

    # Update attention mask
    if attention_mask is not None:
        cls_mask = torch.ones(batch_size, 1, device=attention_mask.device, dtype=attention_mask.dtype)
        attention_mask = torch.cat([cls_mask, attention_mask], dim=1)
        key_padding_mask = attention_mask == 0
    else:
        key_padding_mask = None

    x = self.encoder(x, src_key_padding_mask=key_padding_mask)

    # exclude CLS for token tagging
    tag_logits = self.tag_classifier(x[:, 1:])

    # use CLS for classification
    sql_logits = self.sql_classifier(x[:, 0])

    return tag_logits, sql_logits

# Training models

Training function for LSTM and Transformer models. CrossEntropy Loss from both tagging ang Classification is addded to get the loss which is to be minimised.

In [10]:
def train_model(model, train_loader, val_loader=None, epochs=1-0, lr=1e-3,weight_decay=0.01,gamma=0.1,steps=20, warmup_epochs=10, amsgrad=False, device="cuda" if torch.cuda.is_available() else "cpu"):
    """
    Train LSTM and Transformer model
    """
    model = model.to(device)
    optimizer = optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_decay, amsgrad=amsgrad)
    # StepLR: Reduces learning rate every 10 epochs by a factor of 0.1
    # ExponentialLR reduces learning by gammma
    scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=gamma)
    tag_criterion = nn.CrossEntropyLoss(ignore_index=0)
    sql_criterion = nn.CrossEntropyLoss()

    for epoch in range(epochs):
        # Set model to training model
        model.train()
        total_loss = 0

        # Training loop
        for batch in tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}"):
          # Get batch data
          token_vectors = batch["token_vectors"].to(device)
          attention_mask = batch["attention_mask"].to(device)
          tag_labels = batch["tag_labels"].to(device)
          sql_labels = batch["sql_labels"].to(device)

          # Forward pass
          tag_logits, sql_logits = model(token_vectors, attention_mask)

          # Handle dimension mismatches
          tag_seq_len = tag_logits.size(1)
          label_seq_len = tag_labels.size(1)

          if tag_seq_len != label_seq_len:
              # Either pad or truncate logits/labels to match
              if tag_seq_len < label_seq_len:
                  # If logits are shorter, truncate labels
                  tag_labels = tag_labels[:, :tag_seq_len]
              else:
                  # If labels are shorter, truncate logits
                  tag_logits = tag_logits[:, :label_seq_len, :]

          tag_loss = tag_criterion(tag_logits.view(-1, tag_logits.size(-1)), tag_labels.view(-1))
          sql_loss = sql_criterion(sql_logits, sql_labels)
          loss = tag_loss + sql_loss

          # Backward pass
          optimizer.zero_grad()
          loss.backward()
          optimizer.step()

          total_loss += loss.item()
        # Warm start
        if epoch+1 > warmup_epochs:
          scheduler.step()

        avg_loss = total_loss/len(train_loader)
        print(f"Epoch {epoch+1}/{epochs}, Training Loss: {avg_loss:.4f}, Learning Rate: {optimizer.param_groups[0]['lr']}")

    return model

Training function for Linear and Feedforward models. Both tagging and classification models are trained at the same time. The loss from tagging and classification is added to compute overall loss which is to minimised.

In [11]:
def train_dual_model(tagger_model, classifier_model, train_loader, val_loader=None,
                     epochs=10, lr=1e-3,weight_decay=0.01,gamma=0.1,steps=20, warmup_epochs=10,
                     device="cuda" if torch.cuda.is_available() else "cpu"):
    """
    Train separate tagger and classifier models.
    """
    tagger_model = tagger_model.to(device)
    classifier_model = classifier_model.to(device)

    optimizer = optim.AdamW(
        list(tagger_model.parameters()) + list(classifier_model.parameters()), lr=lr, weight_decay=weight_decay, amsgrad=False
    )
    scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=gamma)

    tag_criterion = nn.CrossEntropyLoss()
    sql_criterion = nn.CrossEntropyLoss()

    for epoch in range(epochs):
        tagger_model.train()
        classifier_model.train()
        total_loss = 0

        for batch in tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}"):
            token_vectors = batch["token_vectors"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            tag_labels = batch["tag_labels"].to(device)
            sql_labels = batch["sql_labels"].to(device)

            # Forward pass through both models
            tag_logits = tagger_model(token_vectors)  # [B, T, tag_vocab]
            sql_logits = classifier_model(token_vectors, attention_mask)  # [B, sql_vocab]

            # Align tag_logits and tag_labels
            tag_seq_len = tag_logits.size(1)
            label_seq_len = tag_labels.size(1)
            if tag_seq_len != label_seq_len:
                if tag_seq_len < label_seq_len:
                    tag_labels = tag_labels[:, :tag_seq_len]
                else:
                    tag_logits = tag_logits[:, :label_seq_len, :]

            # Compute losses
            tag_loss = tag_criterion(tag_logits.view(-1, tag_logits.size(-1)), tag_labels.view(-1))
            sql_loss = sql_criterion(sql_logits, sql_labels)
            loss = tag_loss + sql_loss

            # Calculating gradients and updating weights
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        if epoch+1 > warmup_epochs:
          scheduler.step()
        avg_loss = total_loss / len(train_loader)
        print(f"Epoch {epoch+1}/{epochs}, Training Loss: {avg_loss:.4f}, Learning Rate: {optimizer.param_groups[0]['lr']}")

    return tagger_model, classifier_model


# Evaluating Models

Class for evaluation and inference. Can used to genrate SQL for a single input sentence or a batch. When batching is used using the DatLoader, accuracy is also checked with comparing combined putput of the tagging ang classification with the list of valid SQL queries for that input sentence.



In [53]:
class SQLPipeline:
    def __init__(self,model, dataset,embed_type="static", device="cuda" if torch.cuda.is_available() else "cpu"):
        """
        End-to-end evaluation and inference pipeline. Handles single model for tagging and classification and dual models.
        Can be used for onference and evaluation
        """
        if(isinstance(model,nn.Module)):
          self.model = model.to(device)
          self.model.eval()
        if(isinstance(model,list)):
          self.tagger = model[0].to(device)
          self.tagger.eval()
          self.classifier = model[1].to(device)
          self.classifier.eval()

        self.dataset = dataset
        self.device = device
        self.embed_type = embed_type

    def predict(self, text, nlp=None):
        """
        Generate SQL query for input text

        Args:
            text: Input text query
            nlp: Optional SpaCy pipeline (if not provided, uses the dataset's)

        Returns:
            Dict containing the predicted SQL query, identified variables, and template
        """

        nlp = nlp or self.dataset.nlp

        # Process text with SpaCy
        doc = nlp(text)

        # Extract token vectors and texts
        # squeeze(0) used add batch dimension bcos model expects batch but we are processing a single sample
        if(self.embed_type == "context"):
          token_vectors = torch.tensor(doc.tensor, dtype=torch.float32).unsqueeze(0).to(self.device)
        elif(self.embed_type == "static"):
          token_vectors = torch.tensor([token.vector for token in doc], dtype=torch.float32).unsqueeze(0).to(self.device)
        elif(self.embed_type == "none"):
          raise NotImplementedError(f"Using token ids not implemented")
        else:
          raise ValueError(f"{self.embed_type} is not valid. Please choose from 'context', 'static', or 'none'")

        tokens = [token.text for token in doc]

        # Create attention mask
        attention_mask = torch.ones(1, len(doc), dtype=torch.float32).to(self.device)

        # Forward pass
        with torch.no_grad():
            if(hasattr(self,"model")):
              tag_logits, sql_logits =self.model(token_vectors, attention_mask)
            elif(hasattr(self,"tagger") and hasattr(self,"classifier")):
              tag_logits = self.tagger(token_vectors)
              sql_logits = self.classifier(token_vectors, attention_mask)

            # Get predicted tag for each token
            tag_preds = torch.argmax(tag_logits, dim=-1)[0].cpu().numpy()

            # Get predicted SQL template
            sql_pred = torch.argmax(sql_logits, dim=-1)[0].item()

        # Convert predictions back to human-readable form
        predicted_tags = [self.dataset.decode_tag(tag) for tag in tag_preds[:len(tokens)]]
        predicted_template = self.dataset.decode_sql_template(sql_pred)

        # Extract identified variables
        identified_variables = {}
        for i, (token, tag) in enumerate(zip(tokens, predicted_tags)):
            if tag not in ['no_var','unknown'] :
                # Tag is a variable name
                identified_variables[tag] = token

        # Replace variables in the SQL template
        final_sql = predicted_template
        for var_name, var_value in identified_variables.items():
            final_sql = final_sql.replace(var_name, var_value)

        return {
            "tokens": tokens,
            "predicted_tags": predicted_tags,
            "identified_variables": identified_variables,
            "predicted_template": predicted_template,
            "final_sql": final_sql
        }

    def batch_predict(self, dataloader):
        """
        Generate SQL queries for a batch of inputs and get accuracy

        Args:
            dataloader: DataLoader with test data

        Returns:
            List(dict): List of prediction results
            (float): Batch Accuracy
        """
        results = []
        total_samples = len(self.dataset)
        correct = 0
        for batch in tqdm(dataloader, desc="Generating SQL queries"):

            # Move batch to device
            token_vectors = batch["token_vectors"].to(self.device)
            attention_mask = batch["attention_mask"].to(self.device)
            tokens = batch["tokens"]
            raw_items = batch["raw_items"]

            # Forward pass
            with torch.no_grad():
                if(hasattr(self,"model")):
                  tag_logits, sql_logits =self.model(token_vectors, attention_mask)
                elif(hasattr(self,"tagger") and hasattr(self,"classifier")):
                  tag_logits = self.tagger(token_vectors)
                  sql_logits = self.classifier(token_vectors, attention_mask)

                # Get predictions
                tag_preds = torch.argmax(tag_logits, dim=-1).cpu().numpy()
                sql_preds = torch.argmax(sql_logits, dim=-1).cpu().numpy()

            # Process each item in batch
            for i in range(len(tokens)):
              # Get token and tag sequences
              item_tokens = tokens[i]
              # Filter out padding
              valid_tokens = [t for t in item_tokens if t]

              # Get predicted tags for valid tokens
              item_tags = [self.dataset.decode_tag(tag) for tag in tag_preds[i, :len(valid_tokens)]]

              # Get predicted SQL template
              predicted_template = self.dataset.decode_sql_template(sql_preds[i])

              # Extract identified variables
              identified_variables = {}
              for j, (token, tag) in enumerate(zip(valid_tokens, item_tags)):
                  if tag not in ['no_var','unknown']:
                      identified_variables[tag] = token

              # Replace variables in the SQL template
              final_sql = predicted_template
              for var_name, var_value in identified_variables.items():
                  final_sql = final_sql.replace(var_name, var_value)

              # Check if produces SQL query is correct
              correct_example = False
              if(final_sql in raw_items[i]["sql_with_vars_replaced"]):
                correct_example = True
                correct += 1
                # print(f"\nCorrrect PRediction:\n{final_sql}\nFor {' '.join([token for token in valid_tokens])}")



              # Add to results
              results.append({
                  "tokens": valid_tokens,
                  "predicted_tags": item_tags,
                  "identified_variables": identified_variables,
                  "predicted_template": predicted_template,
                  "final_sql": final_sql,
                  "raw_item": raw_items[i],
                  "correct_sql":raw_items[i]["sql_with_vars_replaced"],
                  "is_correct":correct_example
              })
        # print(f"Num Correct: {correct}\nNum Samples:{total_samples}")
        accuracy = correct/total_samples
        return results,accuracy

# Training and evaluating all 4 models

## Creating datasets and dataloaders for both questions and query split

### Question split using static embeddings

In [13]:
# Create datasets
question_static_train_val_dataset = ATISClassificationDataset(
    dataset_loc="atis.json",
    nlp=nlp,
    split_type="question",
    embed_type='static',
    split=["train","dev"]
)

question_static_test_dataset = ATISClassificationDataset(
    dataset_loc="atis.json",
    nlp=nlp,
    split_type="question",
    embed_type='static',
    split=["test"]
)

# Create dataloaders
question_static_train_val_loader = question_static_train_val_dataset.get_dataloader(batch_size=32)
question_static_test_loader = question_static_test_dataset.get_dataloader(batch_size=32)

### Question split using contextual embeddings

In [14]:
# Create datasets
question_context_train_val_dataset = ATISClassificationDataset(
    dataset_loc="atis.json",
    nlp=nlp,
    split_type="question",
    embed_type='context',
    split=["train","dev"]
)

question_context_test_dataset = ATISClassificationDataset(
    dataset_loc="atis.json",
    nlp=nlp,
    split_type="question",
    embed_type='context',
    split=["test"]
)

# Create dataloaders
question_context_train_val_loader = question_context_train_val_dataset.get_dataloader(batch_size=32)
question_context_test_loader = question_context_test_dataset.get_dataloader(batch_size=32)

### Query split using static embeddings

In [15]:
# Creata datasets with query split
query_static_train_val_dataset = ATISClassificationDataset(
    dataset_loc="atis.json",
    nlp=nlp,
    split_type="query",
    embed_type='static',
    split=["train","dev"]
)
query_static_test_dataset = ATISClassificationDataset(
    dataset_loc="atis.json",
    nlp=nlp,
    split_type="query",
    embed_type='static',
    split=["test"]
)

# Create dataloaders
query_static_train_val_loader = query_static_train_val_dataset.get_dataloader(batch_size=32)
query_static_test_loader = query_static_test_dataset.get_dataloader(batch_size=32)

### Query split using contextual embeddings

In [16]:
query_context_train_val_dataset = ATISClassificationDataset(
    dataset_loc="atis.json",
    nlp=nlp,
    split_type="query",
    embed_type='context',
    split=["train","dev"]
)

query_context_test_dataset = ATISClassificationDataset(
    dataset_loc="atis.json",
    nlp=nlp,
    split_type="query",
    embed_type='context',
    split=["test"]
)

# Create dataloaders
query_context_train_val_loader = query_context_train_val_dataset.get_dataloader(batch_size=32)
query_context_test_loader = query_context_test_dataset.get_dataloader(batch_size=32)

## Training and Evaluating Tagging and Classification models

### Training and Evaluating Linear models using question and query split

In [17]:
# Question split
# Model init
question_linear_tag_model = LinearTagger(
  input_dim=question_static_train_val_dataset.get_vector_dim(),
  tag_vocab_size=question_static_train_val_dataset.get_tag_vocab_size(),
  hidden_sizes=[256,128],
  dropout=0
)

question_linear_classifier_model = LinearClassifier(
  input_dim=question_static_train_val_dataset.get_vector_dim(),
  sql_vocab_size=question_static_train_val_dataset.get_sql_vocab_size(),
  hidden_sizes=[256,128],
  dropout=0
)
# Training
question_trained_linear_models = train_dual_model(
    tagger_model=question_linear_tag_model,
    classifier_model=question_linear_classifier_model,
    train_loader=question_static_train_val_loader,
    epochs=50,
    warmup_epochs=15,
    gamma=0.90,
    weight_decay=0.06
)

  token_vectors = torch.tensor([token.vector for token in doc], dtype=torch.float32)
Epoch 1/50: 100%|██████████| 152/152 [00:05<00:00, 28.02it/s]


Epoch 1/50, Training Loss: 5.7158, Learning Rate: 0.001


Epoch 2/50: 100%|██████████| 152/152 [00:03<00:00, 41.06it/s]


Epoch 2/50, Training Loss: 4.1558, Learning Rate: 0.001


Epoch 3/50: 100%|██████████| 152/152 [00:03<00:00, 41.06it/s]


Epoch 3/50, Training Loss: 3.3605, Learning Rate: 0.001


Epoch 4/50: 100%|██████████| 152/152 [00:04<00:00, 32.48it/s]


Epoch 4/50, Training Loss: 2.7956, Learning Rate: 0.001


Epoch 5/50: 100%|██████████| 152/152 [00:03<00:00, 41.08it/s]


Epoch 5/50, Training Loss: 2.4284, Learning Rate: 0.001


Epoch 6/50: 100%|██████████| 152/152 [00:03<00:00, 41.43it/s]


Epoch 6/50, Training Loss: 2.1497, Learning Rate: 0.001


Epoch 7/50: 100%|██████████| 152/152 [00:04<00:00, 32.54it/s]


Epoch 7/50, Training Loss: 1.8888, Learning Rate: 0.001


Epoch 8/50: 100%|██████████| 152/152 [00:03<00:00, 41.40it/s]


Epoch 8/50, Training Loss: 1.6378, Learning Rate: 0.001


Epoch 9/50: 100%|██████████| 152/152 [00:03<00:00, 41.35it/s]


Epoch 9/50, Training Loss: 1.4900, Learning Rate: 0.001


Epoch 10/50: 100%|██████████| 152/152 [00:04<00:00, 32.31it/s]


Epoch 10/50, Training Loss: 1.3355, Learning Rate: 0.001


Epoch 11/50: 100%|██████████| 152/152 [00:03<00:00, 41.39it/s]


Epoch 11/50, Training Loss: 1.1856, Learning Rate: 0.001


Epoch 12/50: 100%|██████████| 152/152 [00:03<00:00, 39.88it/s]


Epoch 12/50, Training Loss: 1.0608, Learning Rate: 0.001


Epoch 13/50: 100%|██████████| 152/152 [00:04<00:00, 32.18it/s]


Epoch 13/50, Training Loss: 0.9774, Learning Rate: 0.001


Epoch 14/50: 100%|██████████| 152/152 [00:03<00:00, 41.59it/s]


Epoch 14/50, Training Loss: 0.9026, Learning Rate: 0.001


Epoch 15/50: 100%|██████████| 152/152 [00:03<00:00, 41.27it/s]


Epoch 15/50, Training Loss: 0.8365, Learning Rate: 0.001


Epoch 16/50: 100%|██████████| 152/152 [00:04<00:00, 32.17it/s]


Epoch 16/50, Training Loss: 0.7959, Learning Rate: 0.0009000000000000001


Epoch 17/50: 100%|██████████| 152/152 [00:03<00:00, 40.47it/s]


Epoch 17/50, Training Loss: 0.7558, Learning Rate: 0.0008100000000000001


Epoch 18/50: 100%|██████████| 152/152 [00:03<00:00, 41.30it/s]


Epoch 18/50, Training Loss: 0.6522, Learning Rate: 0.000729


Epoch 19/50: 100%|██████████| 152/152 [00:04<00:00, 32.08it/s]


Epoch 19/50, Training Loss: 0.6181, Learning Rate: 0.0006561000000000001


Epoch 20/50: 100%|██████████| 152/152 [00:03<00:00, 40.88it/s]


Epoch 20/50, Training Loss: 0.5733, Learning Rate: 0.00059049


Epoch 21/50: 100%|██████████| 152/152 [00:03<00:00, 41.82it/s]


Epoch 21/50, Training Loss: 0.5110, Learning Rate: 0.000531441


Epoch 22/50: 100%|██████████| 152/152 [00:08<00:00, 18.51it/s]


Epoch 22/50, Training Loss: 0.4889, Learning Rate: 0.0004782969


Epoch 23/50: 100%|██████████| 152/152 [00:06<00:00, 22.77it/s]


Epoch 23/50, Training Loss: 0.4585, Learning Rate: 0.00043046721


Epoch 24/50: 100%|██████████| 152/152 [00:04<00:00, 31.90it/s]


Epoch 24/50, Training Loss: 0.4355, Learning Rate: 0.000387420489


Epoch 25/50: 100%|██████████| 152/152 [00:03<00:00, 41.14it/s]


Epoch 25/50, Training Loss: 0.4223, Learning Rate: 0.0003486784401


Epoch 26/50: 100%|██████████| 152/152 [00:03<00:00, 41.10it/s]


Epoch 26/50, Training Loss: 0.4378, Learning Rate: 0.00031381059609000004


Epoch 27/50: 100%|██████████| 152/152 [00:04<00:00, 32.50it/s]


Epoch 27/50, Training Loss: 0.3965, Learning Rate: 0.00028242953648100003


Epoch 28/50: 100%|██████████| 152/152 [00:03<00:00, 41.02it/s]


Epoch 28/50, Training Loss: 0.3957, Learning Rate: 0.00025418658283290005


Epoch 29/50: 100%|██████████| 152/152 [00:03<00:00, 41.78it/s]


Epoch 29/50, Training Loss: 0.3693, Learning Rate: 0.00022876792454961005


Epoch 30/50: 100%|██████████| 152/152 [00:04<00:00, 32.41it/s]


Epoch 30/50, Training Loss: 0.3648, Learning Rate: 0.00020589113209464906


Epoch 31/50: 100%|██████████| 152/152 [00:03<00:00, 41.80it/s]


Epoch 31/50, Training Loss: 0.3541, Learning Rate: 0.00018530201888518417


Epoch 32/50: 100%|██████████| 152/152 [00:03<00:00, 40.96it/s]


Epoch 32/50, Training Loss: 0.3473, Learning Rate: 0.00016677181699666576


Epoch 33/50: 100%|██████████| 152/152 [00:04<00:00, 31.60it/s]


Epoch 33/50, Training Loss: 0.3380, Learning Rate: 0.0001500946352969992


Epoch 34/50: 100%|██████████| 152/152 [00:03<00:00, 41.13it/s]


Epoch 34/50, Training Loss: 0.3293, Learning Rate: 0.0001350851717672993


Epoch 35/50: 100%|██████████| 152/152 [00:03<00:00, 40.56it/s]


Epoch 35/50, Training Loss: 0.3306, Learning Rate: 0.00012157665459056936


Epoch 36/50: 100%|██████████| 152/152 [00:04<00:00, 32.06it/s]


Epoch 36/50, Training Loss: 0.3319, Learning Rate: 0.00010941898913151243


Epoch 37/50: 100%|██████████| 152/152 [00:03<00:00, 41.48it/s]


Epoch 37/50, Training Loss: 0.3187, Learning Rate: 9.847709021836118e-05


Epoch 38/50: 100%|██████████| 152/152 [00:03<00:00, 41.06it/s]


Epoch 38/50, Training Loss: 0.3131, Learning Rate: 8.862938119652506e-05


Epoch 39/50: 100%|██████████| 152/152 [00:05<00:00, 29.88it/s]


Epoch 39/50, Training Loss: 0.3165, Learning Rate: 7.976644307687256e-05


Epoch 40/50: 100%|██████████| 152/152 [00:03<00:00, 40.95it/s]


Epoch 40/50, Training Loss: 0.3099, Learning Rate: 7.17897987691853e-05


Epoch 41/50: 100%|██████████| 152/152 [00:04<00:00, 35.41it/s]


Epoch 41/50, Training Loss: 0.3155, Learning Rate: 6.461081889226677e-05


Epoch 42/50: 100%|██████████| 152/152 [00:04<00:00, 32.33it/s]


Epoch 42/50, Training Loss: 0.3186, Learning Rate: 5.81497370030401e-05


Epoch 43/50: 100%|██████████| 152/152 [00:03<00:00, 41.31it/s]


Epoch 43/50, Training Loss: 0.3020, Learning Rate: 5.233476330273609e-05


Epoch 44/50: 100%|██████████| 152/152 [00:03<00:00, 41.31it/s]


Epoch 44/50, Training Loss: 0.2997, Learning Rate: 4.7101286972462485e-05


Epoch 45/50: 100%|██████████| 152/152 [00:04<00:00, 32.16it/s]


Epoch 45/50, Training Loss: 0.2993, Learning Rate: 4.239115827521624e-05


Epoch 46/50: 100%|██████████| 152/152 [00:03<00:00, 40.59it/s]


Epoch 46/50, Training Loss: 0.2978, Learning Rate: 3.8152042447694614e-05


Epoch 47/50: 100%|██████████| 152/152 [00:03<00:00, 41.50it/s]


Epoch 47/50, Training Loss: 0.2950, Learning Rate: 3.433683820292515e-05


Epoch 48/50: 100%|██████████| 152/152 [00:04<00:00, 32.43it/s]


Epoch 48/50, Training Loss: 0.2921, Learning Rate: 3.090315438263264e-05


Epoch 49/50: 100%|██████████| 152/152 [00:03<00:00, 40.35it/s]


Epoch 49/50, Training Loss: 0.3016, Learning Rate: 2.7812838944369376e-05


Epoch 50/50: 100%|██████████| 152/152 [00:03<00:00, 41.04it/s]

Epoch 50/50, Training Loss: 0.2941, Learning Rate: 2.503155504993244e-05





In [54]:
# Evaluating on the test set to get accuracy
question_linear_sql_pipe = SQLPipeline(dataset=question_static_test_dataset,model=list(question_trained_linear_models),embed_type="static")
question_linear_results, question_linear_acc = question_linear_sql_pipe.batch_predict(dataloader=question_static_test_loader)
print(f"\nAccuracy of Linear model on question split test set: {question_linear_acc:.2%}")

Generating SQL queries: 100%|██████████| 14/14 [00:01<00:00,  9.67it/s]


Accuracy of Linear model on question split test set: 11.19%





In [19]:
# Query split
# Model init
query_linear_tag_model = LinearTagger(
  input_dim=query_static_train_val_dataset.get_vector_dim(),
  tag_vocab_size=query_static_train_val_dataset.get_tag_vocab_size(),
  hidden_sizes=[256,128]
)

query_linear_classifier_model = LinearClassifier(
  input_dim=query_static_train_val_dataset.get_vector_dim(),
  sql_vocab_size=query_static_train_val_dataset.get_sql_vocab_size(),
  hidden_sizes=[256,128]
)
# Training
query_trained_linear_models = train_dual_model(
    tagger_model=query_linear_tag_model,
    classifier_model=query_linear_classifier_model,
    train_loader=query_static_train_val_loader,
    epochs=50,
    warmup_epochs=15,
    gamma=0.90,
    weight_decay=0.06
)

Epoch 1/50: 100%|██████████| 155/155 [00:04<00:00, 32.66it/s]


Epoch 1/50, Training Loss: 5.5867, Learning Rate: 0.001


Epoch 2/50: 100%|██████████| 155/155 [00:03<00:00, 42.20it/s]


Epoch 2/50, Training Loss: 3.9004, Learning Rate: 0.001


Epoch 3/50: 100%|██████████| 155/155 [00:03<00:00, 41.60it/s]


Epoch 3/50, Training Loss: 3.1378, Learning Rate: 0.001


Epoch 4/50: 100%|██████████| 155/155 [00:04<00:00, 33.30it/s]


Epoch 4/50, Training Loss: 2.6230, Learning Rate: 0.001


Epoch 5/50: 100%|██████████| 155/155 [00:03<00:00, 41.86it/s]


Epoch 5/50, Training Loss: 2.2444, Learning Rate: 0.001


Epoch 6/50: 100%|██████████| 155/155 [00:03<00:00, 42.17it/s]


Epoch 6/50, Training Loss: 1.9717, Learning Rate: 0.001


Epoch 7/50: 100%|██████████| 155/155 [00:04<00:00, 32.97it/s]


Epoch 7/50, Training Loss: 1.7309, Learning Rate: 0.001


Epoch 8/50: 100%|██████████| 155/155 [00:03<00:00, 42.42it/s]


Epoch 8/50, Training Loss: 1.5718, Learning Rate: 0.001


Epoch 9/50: 100%|██████████| 155/155 [00:03<00:00, 41.53it/s]


Epoch 9/50, Training Loss: 1.3889, Learning Rate: 0.001


Epoch 10/50: 100%|██████████| 155/155 [00:04<00:00, 32.69it/s]


Epoch 10/50, Training Loss: 1.2809, Learning Rate: 0.001


Epoch 11/50: 100%|██████████| 155/155 [00:03<00:00, 41.97it/s]


Epoch 11/50, Training Loss: 1.1367, Learning Rate: 0.001


Epoch 12/50: 100%|██████████| 155/155 [00:03<00:00, 42.06it/s]


Epoch 12/50, Training Loss: 1.0326, Learning Rate: 0.001


Epoch 13/50: 100%|██████████| 155/155 [00:04<00:00, 32.74it/s]


Epoch 13/50, Training Loss: 0.9616, Learning Rate: 0.001


Epoch 14/50: 100%|██████████| 155/155 [00:03<00:00, 42.18it/s]


Epoch 14/50, Training Loss: 0.8618, Learning Rate: 0.001


Epoch 15/50: 100%|██████████| 155/155 [00:03<00:00, 42.27it/s]


Epoch 15/50, Training Loss: 0.8195, Learning Rate: 0.001


Epoch 16/50: 100%|██████████| 155/155 [00:04<00:00, 32.61it/s]


Epoch 16/50, Training Loss: 0.7609, Learning Rate: 0.0009000000000000001


Epoch 17/50: 100%|██████████| 155/155 [00:03<00:00, 42.16it/s]


Epoch 17/50, Training Loss: 0.6932, Learning Rate: 0.0008100000000000001


Epoch 18/50: 100%|██████████| 155/155 [00:03<00:00, 41.11it/s]


Epoch 18/50, Training Loss: 0.6179, Learning Rate: 0.000729


Epoch 19/50: 100%|██████████| 155/155 [00:04<00:00, 32.65it/s]


Epoch 19/50, Training Loss: 0.5612, Learning Rate: 0.0006561000000000001


Epoch 20/50: 100%|██████████| 155/155 [00:03<00:00, 41.31it/s]


Epoch 20/50, Training Loss: 0.5237, Learning Rate: 0.00059049


Epoch 21/50: 100%|██████████| 155/155 [00:03<00:00, 41.81it/s]


Epoch 21/50, Training Loss: 0.4910, Learning Rate: 0.000531441


Epoch 22/50: 100%|██████████| 155/155 [00:04<00:00, 33.29it/s]


Epoch 22/50, Training Loss: 0.4641, Learning Rate: 0.0004782969


Epoch 23/50: 100%|██████████| 155/155 [00:03<00:00, 40.93it/s]


Epoch 23/50, Training Loss: 0.4428, Learning Rate: 0.00043046721


Epoch 24/50: 100%|██████████| 155/155 [00:04<00:00, 33.55it/s]


Epoch 24/50, Training Loss: 0.4169, Learning Rate: 0.000387420489


Epoch 25/50: 100%|██████████| 155/155 [00:04<00:00, 32.37it/s]


Epoch 25/50, Training Loss: 0.4014, Learning Rate: 0.0003486784401


Epoch 26/50: 100%|██████████| 155/155 [00:03<00:00, 42.11it/s]


Epoch 26/50, Training Loss: 0.3904, Learning Rate: 0.00031381059609000004


Epoch 27/50: 100%|██████████| 155/155 [00:03<00:00, 42.00it/s]


Epoch 27/50, Training Loss: 0.3775, Learning Rate: 0.00028242953648100003


Epoch 28/50: 100%|██████████| 155/155 [00:04<00:00, 33.05it/s]


Epoch 28/50, Training Loss: 0.3595, Learning Rate: 0.00025418658283290005


Epoch 29/50: 100%|██████████| 155/155 [00:03<00:00, 42.07it/s]


Epoch 29/50, Training Loss: 0.3472, Learning Rate: 0.00022876792454961005


Epoch 30/50: 100%|██████████| 155/155 [00:03<00:00, 41.52it/s]


Epoch 30/50, Training Loss: 0.3423, Learning Rate: 0.00020589113209464906


Epoch 31/50: 100%|██████████| 155/155 [00:04<00:00, 32.64it/s]


Epoch 31/50, Training Loss: 0.3372, Learning Rate: 0.00018530201888518417


Epoch 32/50: 100%|██████████| 155/155 [00:03<00:00, 42.11it/s]


Epoch 32/50, Training Loss: 0.3290, Learning Rate: 0.00016677181699666576


Epoch 33/50: 100%|██████████| 155/155 [00:03<00:00, 41.31it/s]


Epoch 33/50, Training Loss: 0.3204, Learning Rate: 0.0001500946352969992


Epoch 34/50: 100%|██████████| 155/155 [00:04<00:00, 32.69it/s]


Epoch 34/50, Training Loss: 0.3185, Learning Rate: 0.0001350851717672993


Epoch 35/50: 100%|██████████| 155/155 [00:03<00:00, 41.69it/s]


Epoch 35/50, Training Loss: 0.3135, Learning Rate: 0.00012157665459056936


Epoch 36/50: 100%|██████████| 155/155 [00:03<00:00, 41.96it/s]


Epoch 36/50, Training Loss: 0.3097, Learning Rate: 0.00010941898913151243


Epoch 37/50: 100%|██████████| 155/155 [00:04<00:00, 32.84it/s]


Epoch 37/50, Training Loss: 0.3090, Learning Rate: 9.847709021836118e-05


Epoch 38/50: 100%|██████████| 155/155 [00:03<00:00, 41.21it/s]


Epoch 38/50, Training Loss: 0.3044, Learning Rate: 8.862938119652506e-05


Epoch 39/50: 100%|██████████| 155/155 [00:03<00:00, 42.33it/s]


Epoch 39/50, Training Loss: 0.3008, Learning Rate: 7.976644307687256e-05


Epoch 40/50: 100%|██████████| 155/155 [00:04<00:00, 32.57it/s]


Epoch 40/50, Training Loss: 0.2952, Learning Rate: 7.17897987691853e-05


Epoch 41/50: 100%|██████████| 155/155 [00:03<00:00, 41.90it/s]


Epoch 41/50, Training Loss: 0.2947, Learning Rate: 6.461081889226677e-05


Epoch 42/50: 100%|██████████| 155/155 [00:03<00:00, 42.22it/s]


Epoch 42/50, Training Loss: 0.2894, Learning Rate: 5.81497370030401e-05


Epoch 43/50: 100%|██████████| 155/155 [00:04<00:00, 32.54it/s]


Epoch 43/50, Training Loss: 0.2874, Learning Rate: 5.233476330273609e-05


Epoch 44/50: 100%|██████████| 155/155 [00:03<00:00, 41.62it/s]


Epoch 44/50, Training Loss: 0.2875, Learning Rate: 4.7101286972462485e-05


Epoch 45/50: 100%|██████████| 155/155 [00:03<00:00, 41.51it/s]


Epoch 45/50, Training Loss: 0.2856, Learning Rate: 4.239115827521624e-05


Epoch 46/50: 100%|██████████| 155/155 [00:04<00:00, 32.83it/s]


Epoch 46/50, Training Loss: 0.2828, Learning Rate: 3.8152042447694614e-05


Epoch 47/50: 100%|██████████| 155/155 [00:03<00:00, 42.02it/s]


Epoch 47/50, Training Loss: 0.2821, Learning Rate: 3.433683820292515e-05


Epoch 48/50: 100%|██████████| 155/155 [00:03<00:00, 41.25it/s]


Epoch 48/50, Training Loss: 0.2810, Learning Rate: 3.090315438263264e-05


Epoch 49/50: 100%|██████████| 155/155 [00:04<00:00, 32.91it/s]


Epoch 49/50, Training Loss: 0.2774, Learning Rate: 2.7812838944369376e-05


Epoch 50/50: 100%|██████████| 155/155 [00:03<00:00, 41.27it/s]

Epoch 50/50, Training Loss: 0.2798, Learning Rate: 2.503155504993244e-05





In [20]:
# Evaluating on the test set to get accuracy
query_linear_sql_pipe = SQLPipeline(dataset=query_static_test_dataset,model=list(query_trained_linear_models),embed_type="static")
query_linear_results, query_linear_acc = query_linear_sql_pipe.batch_predict(dataloader=query_static_test_loader)
print(f"\nAccuracy of Linear model on query split test set: {query_linear_acc:.2%}")

Generating SQL queries: 100%|██████████| 11/11 [00:01<00:00,  9.63it/s]


Accuracy of Linear model on query split test set: 0.00%





### Training Feedforward models using question and query split

In [21]:
# Question split
# Model init
question_ff_tag_model = FFTagger(input_dim=question_context_train_val_dataset.get_vector_dim(),
    tag_vocab_size=question_context_train_val_dataset.get_tag_vocab_size(),
    hidden_sizes=[512,256,128],
    dropout=0.2
    )
question_ff_classifier_model = FFClassifier(
    input_dim=question_context_train_val_dataset.get_vector_dim(),
    sql_vocab_size=question_context_train_val_dataset.get_sql_vocab_size(),
    hidden_sizes=[512,256,128],
    dropout=0.2
    )
# Training
question_trained_ff_model = train_dual_model(
    tagger_model=question_ff_tag_model,
    classifier_model=question_ff_classifier_model,
    train_loader=question_context_train_val_loader,
    epochs=50,
    warmup_epochs=15,
    gamma=0.95
)

Epoch 1/50: 100%|██████████| 152/152 [00:01<00:00, 114.22it/s]


Epoch 1/50, Training Loss: 5.3318, Learning Rate: 0.001


Epoch 2/50: 100%|██████████| 152/152 [00:01<00:00, 122.48it/s]


Epoch 2/50, Training Loss: 4.1862, Learning Rate: 0.001


Epoch 3/50: 100%|██████████| 152/152 [00:01<00:00, 128.88it/s]


Epoch 3/50, Training Loss: 3.6233, Learning Rate: 0.001


Epoch 4/50: 100%|██████████| 152/152 [00:01<00:00, 117.22it/s]


Epoch 4/50, Training Loss: 3.2546, Learning Rate: 0.001


Epoch 5/50: 100%|██████████| 152/152 [00:01<00:00, 88.35it/s]


Epoch 5/50, Training Loss: 2.8786, Learning Rate: 0.001


Epoch 6/50: 100%|██████████| 152/152 [00:01<00:00, 125.39it/s]


Epoch 6/50, Training Loss: 2.5936, Learning Rate: 0.001


Epoch 7/50: 100%|██████████| 152/152 [00:01<00:00, 127.26it/s]


Epoch 7/50, Training Loss: 2.3862, Learning Rate: 0.001


Epoch 8/50: 100%|██████████| 152/152 [00:01<00:00, 127.04it/s]


Epoch 8/50, Training Loss: 2.2397, Learning Rate: 0.001


Epoch 9/50: 100%|██████████| 152/152 [00:01<00:00, 127.88it/s]


Epoch 9/50, Training Loss: 2.0319, Learning Rate: 0.001


Epoch 10/50: 100%|██████████| 152/152 [00:01<00:00, 128.13it/s]


Epoch 10/50, Training Loss: 1.7857, Learning Rate: 0.001


Epoch 11/50: 100%|██████████| 152/152 [00:01<00:00, 126.40it/s]


Epoch 11/50, Training Loss: 1.5876, Learning Rate: 0.001


Epoch 12/50: 100%|██████████| 152/152 [00:01<00:00, 127.45it/s]


Epoch 12/50, Training Loss: 1.6007, Learning Rate: 0.001


Epoch 13/50: 100%|██████████| 152/152 [00:01<00:00, 127.94it/s]


Epoch 13/50, Training Loss: 1.3261, Learning Rate: 0.001


Epoch 14/50: 100%|██████████| 152/152 [00:01<00:00, 102.04it/s]


Epoch 14/50, Training Loss: 1.3415, Learning Rate: 0.001


Epoch 15/50: 100%|██████████| 152/152 [00:01<00:00, 91.62it/s] 


Epoch 15/50, Training Loss: 1.1497, Learning Rate: 0.001


Epoch 16/50: 100%|██████████| 152/152 [00:01<00:00, 128.56it/s]


Epoch 16/50, Training Loss: 1.0896, Learning Rate: 0.00095


Epoch 17/50: 100%|██████████| 152/152 [00:01<00:00, 130.72it/s]


Epoch 17/50, Training Loss: 0.9218, Learning Rate: 0.0009025


Epoch 18/50: 100%|██████████| 152/152 [00:01<00:00, 129.41it/s]


Epoch 18/50, Training Loss: 0.8808, Learning Rate: 0.000857375


Epoch 19/50: 100%|██████████| 152/152 [00:01<00:00, 129.71it/s]


Epoch 19/50, Training Loss: 0.8029, Learning Rate: 0.0008145062499999999


Epoch 20/50: 100%|██████████| 152/152 [00:01<00:00, 127.82it/s]


Epoch 20/50, Training Loss: 0.7328, Learning Rate: 0.0007737809374999998


Epoch 21/50: 100%|██████████| 152/152 [00:01<00:00, 129.15it/s]


Epoch 21/50, Training Loss: 0.7455, Learning Rate: 0.0007350918906249997


Epoch 22/50: 100%|██████████| 152/152 [00:01<00:00, 129.51it/s]


Epoch 22/50, Training Loss: 0.6283, Learning Rate: 0.0006983372960937497


Epoch 23/50: 100%|██████████| 152/152 [00:01<00:00, 125.77it/s]


Epoch 23/50, Training Loss: 0.5581, Learning Rate: 0.0006634204312890621


Epoch 24/50: 100%|██████████| 152/152 [00:01<00:00, 94.12it/s]


Epoch 24/50, Training Loss: 0.5138, Learning Rate: 0.000630249409724609


Epoch 25/50: 100%|██████████| 152/152 [00:01<00:00, 101.61it/s]


Epoch 25/50, Training Loss: 0.4663, Learning Rate: 0.0005987369392383785


Epoch 26/50: 100%|██████████| 152/152 [00:01<00:00, 125.83it/s]


Epoch 26/50, Training Loss: 0.4706, Learning Rate: 0.0005688000922764595


Epoch 27/50: 100%|██████████| 152/152 [00:01<00:00, 126.34it/s]


Epoch 27/50, Training Loss: 0.5406, Learning Rate: 0.0005403600876626365


Epoch 28/50: 100%|██████████| 152/152 [00:01<00:00, 126.06it/s]


Epoch 28/50, Training Loss: 0.4185, Learning Rate: 0.0005133420832795047


Epoch 29/50: 100%|██████████| 152/152 [00:01<00:00, 121.36it/s]


Epoch 29/50, Training Loss: 0.4068, Learning Rate: 0.00048767497911552944


Epoch 30/50: 100%|██████████| 152/152 [00:01<00:00, 122.07it/s]


Epoch 30/50, Training Loss: 0.3707, Learning Rate: 0.00046329123015975297


Epoch 31/50: 100%|██████████| 152/152 [00:01<00:00, 121.08it/s]


Epoch 31/50, Training Loss: 0.3344, Learning Rate: 0.0004401266686517653


Epoch 32/50: 100%|██████████| 152/152 [00:01<00:00, 124.16it/s]


Epoch 32/50, Training Loss: 0.3237, Learning Rate: 0.00041812033521917703


Epoch 33/50: 100%|██████████| 152/152 [00:01<00:00, 106.46it/s]


Epoch 33/50, Training Loss: 0.3143, Learning Rate: 0.00039721431845821814


Epoch 34/50: 100%|██████████| 152/152 [00:01<00:00, 89.76it/s]


Epoch 34/50, Training Loss: 0.2877, Learning Rate: 0.0003773536025353072


Epoch 35/50: 100%|██████████| 152/152 [00:01<00:00, 127.05it/s]


Epoch 35/50, Training Loss: 0.2825, Learning Rate: 0.0003584859224085418


Epoch 36/50: 100%|██████████| 152/152 [00:01<00:00, 125.42it/s]


Epoch 36/50, Training Loss: 0.2770, Learning Rate: 0.0003405616262881147


Epoch 37/50: 100%|██████████| 152/152 [00:01<00:00, 128.64it/s]


Epoch 37/50, Training Loss: 0.2772, Learning Rate: 0.00032353354497370894


Epoch 38/50: 100%|██████████| 152/152 [00:01<00:00, 120.76it/s]


Epoch 38/50, Training Loss: 0.2740, Learning Rate: 0.00030735686772502346


Epoch 39/50: 100%|██████████| 152/152 [00:01<00:00, 126.61it/s]


Epoch 39/50, Training Loss: 0.2929, Learning Rate: 0.00029198902433877225


Epoch 40/50: 100%|██████████| 152/152 [00:01<00:00, 128.09it/s]


Epoch 40/50, Training Loss: 0.2554, Learning Rate: 0.00027738957312183364


Epoch 41/50: 100%|██████████| 152/152 [00:01<00:00, 130.35it/s]


Epoch 41/50, Training Loss: 0.2265, Learning Rate: 0.0002635200944657419


Epoch 42/50: 100%|██████████| 152/152 [00:01<00:00, 126.30it/s]


Epoch 42/50, Training Loss: 0.2191, Learning Rate: 0.0002503440897424548


Epoch 43/50: 100%|██████████| 152/152 [00:01<00:00, 95.53it/s]


Epoch 43/50, Training Loss: 0.2124, Learning Rate: 0.00023782688525533205


Epoch 44/50: 100%|██████████| 152/152 [00:01<00:00, 101.68it/s]


Epoch 44/50, Training Loss: 0.2035, Learning Rate: 0.00022593554099256544


Epoch 45/50: 100%|██████████| 152/152 [00:01<00:00, 127.11it/s]


Epoch 45/50, Training Loss: 0.2212, Learning Rate: 0.00021463876394293716


Epoch 46/50: 100%|██████████| 152/152 [00:01<00:00, 127.59it/s]


Epoch 46/50, Training Loss: 0.2237, Learning Rate: 0.0002039068257457903


Epoch 47/50: 100%|██████████| 152/152 [00:01<00:00, 125.57it/s]


Epoch 47/50, Training Loss: 0.1863, Learning Rate: 0.00019371148445850077


Epoch 48/50: 100%|██████████| 152/152 [00:01<00:00, 127.67it/s]


Epoch 48/50, Training Loss: 0.1956, Learning Rate: 0.00018402591023557573


Epoch 49/50: 100%|██████████| 152/152 [00:01<00:00, 127.90it/s]


Epoch 49/50, Training Loss: 0.1904, Learning Rate: 0.00017482461472379692


Epoch 50/50: 100%|██████████| 152/152 [00:01<00:00, 126.84it/s]

Epoch 50/50, Training Loss: 0.1799, Learning Rate: 0.00016608338398760707





In [22]:
# Evaluating on the test set to get accuracy
question_ff_sql_pipe = SQLPipeline(dataset=question_context_test_dataset,model=list(question_trained_ff_model),embed_type="context")
question_ff_results, question_ff_acc = question_ff_sql_pipe.batch_predict(dataloader=question_context_test_loader)
print(f"\nAccuracy of Feedforward model on question split test set: {question_ff_acc:.2%}")

Generating SQL queries: 100%|██████████| 14/14 [00:00<00:00, 17.93it/s]


Accuracy of Feedforward model on question split test set: 19.02%





In [23]:
# Query split
# Model init
query_ff_tag_model = FFTagger(input_dim=query_context_train_val_dataset.get_vector_dim(),
    tag_vocab_size=query_context_train_val_dataset.get_tag_vocab_size(),
    hidden_sizes=[512,256,128],
    dropout=0.2
  )
query_ff_classifier_model = FFClassifier(
    input_dim=query_context_train_val_dataset.get_vector_dim(),
    sql_vocab_size=query_context_train_val_dataset.get_sql_vocab_size(),
    hidden_sizes=[512,256,128],
    dropout=0.2
    )
# Training
query_trained_ff_model = train_dual_model(
    tagger_model=query_ff_tag_model,
    classifier_model=query_ff_classifier_model,
    train_loader=query_context_train_val_loader,
    epochs=50,
    warmup_epochs=15,
    gamma=0.95
)

Epoch 1/50: 100%|██████████| 155/155 [00:01<00:00, 129.32it/s]


Epoch 1/50, Training Loss: 5.2312, Learning Rate: 0.001


Epoch 2/50: 100%|██████████| 155/155 [00:01<00:00, 93.26it/s]


Epoch 2/50, Training Loss: 4.0993, Learning Rate: 0.001


Epoch 3/50: 100%|██████████| 155/155 [00:01<00:00, 106.12it/s]


Epoch 3/50, Training Loss: 3.5747, Learning Rate: 0.001


Epoch 4/50: 100%|██████████| 155/155 [00:01<00:00, 124.48it/s]


Epoch 4/50, Training Loss: 3.1633, Learning Rate: 0.001


Epoch 5/50: 100%|██████████| 155/155 [00:01<00:00, 127.48it/s]


Epoch 5/50, Training Loss: 2.8374, Learning Rate: 0.001


Epoch 6/50: 100%|██████████| 155/155 [00:01<00:00, 129.48it/s]


Epoch 6/50, Training Loss: 2.5575, Learning Rate: 0.001


Epoch 7/50: 100%|██████████| 155/155 [00:01<00:00, 129.24it/s]


Epoch 7/50, Training Loss: 2.3032, Learning Rate: 0.001


Epoch 8/50: 100%|██████████| 155/155 [00:01<00:00, 128.41it/s]


Epoch 8/50, Training Loss: 2.0985, Learning Rate: 0.001


Epoch 9/50: 100%|██████████| 155/155 [00:01<00:00, 127.72it/s]


Epoch 9/50, Training Loss: 1.8702, Learning Rate: 0.001


Epoch 10/50: 100%|██████████| 155/155 [00:01<00:00, 127.90it/s]


Epoch 10/50, Training Loss: 1.7056, Learning Rate: 0.001


Epoch 11/50: 100%|██████████| 155/155 [00:01<00:00, 109.12it/s]


Epoch 11/50, Training Loss: 1.5575, Learning Rate: 0.001


Epoch 12/50: 100%|██████████| 155/155 [00:01<00:00, 90.38it/s]


Epoch 12/50, Training Loss: 1.4228, Learning Rate: 0.001


Epoch 13/50: 100%|██████████| 155/155 [00:01<00:00, 130.20it/s]


Epoch 13/50, Training Loss: 1.3213, Learning Rate: 0.001


Epoch 14/50: 100%|██████████| 155/155 [00:01<00:00, 128.98it/s]


Epoch 14/50, Training Loss: 1.1656, Learning Rate: 0.001


Epoch 15/50: 100%|██████████| 155/155 [00:01<00:00, 127.61it/s]


Epoch 15/50, Training Loss: 1.0837, Learning Rate: 0.001


Epoch 16/50: 100%|██████████| 155/155 [00:01<00:00, 128.57it/s]


Epoch 16/50, Training Loss: 0.9927, Learning Rate: 0.00095


Epoch 17/50: 100%|██████████| 155/155 [00:01<00:00, 128.80it/s]


Epoch 17/50, Training Loss: 0.9182, Learning Rate: 0.0009025


Epoch 18/50: 100%|██████████| 155/155 [00:01<00:00, 127.39it/s]


Epoch 18/50, Training Loss: 0.8141, Learning Rate: 0.000857375


Epoch 19/50: 100%|██████████| 155/155 [00:01<00:00, 128.60it/s]


Epoch 19/50, Training Loss: 0.7513, Learning Rate: 0.0008145062499999999


Epoch 20/50: 100%|██████████| 155/155 [00:01<00:00, 128.62it/s]


Epoch 20/50, Training Loss: 0.6803, Learning Rate: 0.0007737809374999998


Epoch 21/50: 100%|██████████| 155/155 [00:01<00:00, 97.23it/s]


Epoch 21/50, Training Loss: 0.6165, Learning Rate: 0.0007350918906249997


Epoch 22/50: 100%|██████████| 155/155 [00:01<00:00, 103.03it/s]


Epoch 22/50, Training Loss: 0.5700, Learning Rate: 0.0006983372960937497


Epoch 23/50: 100%|██████████| 155/155 [00:01<00:00, 130.30it/s]


Epoch 23/50, Training Loss: 0.5456, Learning Rate: 0.0006634204312890621


Epoch 24/50: 100%|██████████| 155/155 [00:01<00:00, 129.15it/s]


Epoch 24/50, Training Loss: 0.4978, Learning Rate: 0.000630249409724609


Epoch 25/50: 100%|██████████| 155/155 [00:01<00:00, 129.06it/s]


Epoch 25/50, Training Loss: 0.4575, Learning Rate: 0.0005987369392383785


Epoch 26/50: 100%|██████████| 155/155 [00:01<00:00, 126.90it/s]


Epoch 26/50, Training Loss: 0.4424, Learning Rate: 0.0005688000922764595


Epoch 27/50: 100%|██████████| 155/155 [00:01<00:00, 127.66it/s]


Epoch 27/50, Training Loss: 0.4247, Learning Rate: 0.0005403600876626365


Epoch 28/50: 100%|██████████| 155/155 [00:01<00:00, 127.07it/s]


Epoch 28/50, Training Loss: 0.3665, Learning Rate: 0.0005133420832795047


Epoch 29/50: 100%|██████████| 155/155 [00:01<00:00, 127.55it/s]


Epoch 29/50, Training Loss: 0.3419, Learning Rate: 0.00048767497911552944


Epoch 30/50: 100%|██████████| 155/155 [00:01<00:00, 115.08it/s]


Epoch 30/50, Training Loss: 0.3376, Learning Rate: 0.00046329123015975297


Epoch 31/50: 100%|██████████| 155/155 [00:01<00:00, 88.61it/s]


Epoch 31/50, Training Loss: 0.3219, Learning Rate: 0.0004401266686517653


Epoch 32/50: 100%|██████████| 155/155 [00:01<00:00, 102.48it/s]


Epoch 32/50, Training Loss: 0.3048, Learning Rate: 0.00041812033521917703


Epoch 33/50: 100%|██████████| 155/155 [00:01<00:00, 96.43it/s] 


Epoch 33/50, Training Loss: 0.3067, Learning Rate: 0.00039721431845821814


Epoch 34/50: 100%|██████████| 155/155 [00:01<00:00, 126.75it/s]


Epoch 34/50, Training Loss: 0.2749, Learning Rate: 0.0003773536025353072


Epoch 35/50: 100%|██████████| 155/155 [00:01<00:00, 125.26it/s]


Epoch 35/50, Training Loss: 0.2576, Learning Rate: 0.0003584859224085418


Epoch 36/50: 100%|██████████| 155/155 [00:01<00:00, 126.34it/s]


Epoch 36/50, Training Loss: 0.2485, Learning Rate: 0.0003405616262881147


Epoch 37/50: 100%|██████████| 155/155 [00:01<00:00, 128.82it/s]


Epoch 37/50, Training Loss: 0.2293, Learning Rate: 0.00032353354497370894


Epoch 38/50: 100%|██████████| 155/155 [00:01<00:00, 127.81it/s]


Epoch 38/50, Training Loss: 0.2315, Learning Rate: 0.00030735686772502346


Epoch 39/50: 100%|██████████| 155/155 [00:01<00:00, 109.96it/s]


Epoch 39/50, Training Loss: 0.2312, Learning Rate: 0.00029198902433877225


Epoch 40/50: 100%|██████████| 155/155 [00:01<00:00, 89.96it/s]


Epoch 40/50, Training Loss: 0.2180, Learning Rate: 0.00027738957312183364


Epoch 41/50: 100%|██████████| 155/155 [00:01<00:00, 129.61it/s]


Epoch 41/50, Training Loss: 0.1957, Learning Rate: 0.0002635200944657419


Epoch 42/50: 100%|██████████| 155/155 [00:01<00:00, 128.05it/s]


Epoch 42/50, Training Loss: 0.1988, Learning Rate: 0.0002503440897424548


Epoch 43/50: 100%|██████████| 155/155 [00:01<00:00, 129.58it/s]


Epoch 43/50, Training Loss: 0.1832, Learning Rate: 0.00023782688525533205


Epoch 44/50: 100%|██████████| 155/155 [00:01<00:00, 128.34it/s]


Epoch 44/50, Training Loss: 0.1788, Learning Rate: 0.00022593554099256544


Epoch 45/50: 100%|██████████| 155/155 [00:01<00:00, 129.04it/s]


Epoch 45/50, Training Loss: 0.1743, Learning Rate: 0.00021463876394293716


Epoch 46/50: 100%|██████████| 155/155 [00:01<00:00, 128.79it/s]


Epoch 46/50, Training Loss: 0.1703, Learning Rate: 0.0002039068257457903


Epoch 47/50: 100%|██████████| 155/155 [00:01<00:00, 125.53it/s]


Epoch 47/50, Training Loss: 0.1636, Learning Rate: 0.00019371148445850077


Epoch 48/50: 100%|██████████| 155/155 [00:01<00:00, 123.59it/s]


Epoch 48/50, Training Loss: 0.1753, Learning Rate: 0.00018402591023557573


Epoch 49/50: 100%|██████████| 155/155 [00:01<00:00, 90.42it/s]


Epoch 49/50, Training Loss: 0.1650, Learning Rate: 0.00017482461472379692


Epoch 50/50: 100%|██████████| 155/155 [00:01<00:00, 106.33it/s]

Epoch 50/50, Training Loss: 0.1535, Learning Rate: 0.00016608338398760707





In [24]:
# Evaluating on the test set to get accuracy
query_ff_sql_pipe = SQLPipeline(dataset=query_context_test_dataset,model=list(query_trained_ff_model),embed_type="context")
query_ff_results, query_ff_acc = query_ff_sql_pipe.batch_predict(dataloader=query_context_test_loader)
print(f"\nAccuracy of Feedforward model on query split test set: {query_ff_acc:.2%}")

Generating SQL queries: 100%|██████████| 11/11 [00:00<00:00, 11.79it/s]


Accuracy of Feedforward model on query split test set: 0.00%





### Training LSTM model using question and query split

In [25]:
# Question split
question_lstm_model = LSTMTaggerClassifer(
    input_dim=question_context_train_val_dataset.get_vector_dim(),
    hidden_dim=256,
    tag_vocab_size=question_context_train_val_dataset.get_tag_vocab_size(),
    sql_vocab_size=question_context_train_val_dataset.get_sql_vocab_size(),
    num_layers=3
)
# Training
question_trained_lstm_model = train_model(
    model=question_lstm_model,
    train_loader=question_context_train_val_loader,
    epochs=80,
    lr=1e-3,
    weight_decay=0.1,
    amsgrad=True,
    gamma=0.90,
    warmup_epochs=25
)


Epoch 1/80: 100%|██████████| 152/152 [00:02<00:00, 51.34it/s]


Epoch 1/80, Training Loss: 5.9574, Learning Rate: 0.001


Epoch 2/80: 100%|██████████| 152/152 [00:02<00:00, 57.83it/s]


Epoch 2/80, Training Loss: 4.4973, Learning Rate: 0.001


Epoch 3/80: 100%|██████████| 152/152 [00:02<00:00, 53.40it/s]


Epoch 3/80, Training Loss: 3.6300, Learning Rate: 0.001


Epoch 4/80: 100%|██████████| 152/152 [00:03<00:00, 46.51it/s]


Epoch 4/80, Training Loss: 2.9324, Learning Rate: 0.001


Epoch 5/80: 100%|██████████| 152/152 [00:02<00:00, 57.73it/s]


Epoch 5/80, Training Loss: 2.3151, Learning Rate: 0.001


Epoch 6/80: 100%|██████████| 152/152 [00:02<00:00, 56.77it/s]


Epoch 6/80, Training Loss: 1.8025, Learning Rate: 0.001


Epoch 7/80: 100%|██████████| 152/152 [00:02<00:00, 58.81it/s]


Epoch 7/80, Training Loss: 1.4203, Learning Rate: 0.001


Epoch 8/80: 100%|██████████| 152/152 [00:03<00:00, 44.97it/s]


Epoch 8/80, Training Loss: 1.1164, Learning Rate: 0.001


Epoch 9/80: 100%|██████████| 152/152 [00:02<00:00, 56.96it/s]


Epoch 9/80, Training Loss: 0.8992, Learning Rate: 0.001


Epoch 10/80: 100%|██████████| 152/152 [00:02<00:00, 56.91it/s]


Epoch 10/80, Training Loss: 0.7408, Learning Rate: 0.001


Epoch 11/80: 100%|██████████| 152/152 [00:02<00:00, 56.98it/s]


Epoch 11/80, Training Loss: 0.5164, Learning Rate: 0.001


Epoch 12/80: 100%|██████████| 152/152 [00:03<00:00, 50.06it/s]


Epoch 12/80, Training Loss: 0.4112, Learning Rate: 0.001


Epoch 13/80: 100%|██████████| 152/152 [00:02<00:00, 51.22it/s]


Epoch 13/80, Training Loss: 0.3374, Learning Rate: 0.001


Epoch 14/80: 100%|██████████| 152/152 [00:02<00:00, 57.00it/s]


Epoch 14/80, Training Loss: 0.2701, Learning Rate: 0.001


Epoch 15/80: 100%|██████████| 152/152 [00:02<00:00, 57.99it/s]


Epoch 15/80, Training Loss: 0.2333, Learning Rate: 0.001


Epoch 16/80: 100%|██████████| 152/152 [00:02<00:00, 57.77it/s]


Epoch 16/80, Training Loss: 0.2017, Learning Rate: 0.001


Epoch 17/80: 100%|██████████| 152/152 [00:03<00:00, 45.54it/s]


Epoch 17/80, Training Loss: 0.1711, Learning Rate: 0.001


Epoch 18/80: 100%|██████████| 152/152 [00:02<00:00, 58.27it/s]


Epoch 18/80, Training Loss: 0.1577, Learning Rate: 0.001


Epoch 19/80: 100%|██████████| 152/152 [00:02<00:00, 57.11it/s]


Epoch 19/80, Training Loss: 0.1592, Learning Rate: 0.001


Epoch 20/80: 100%|██████████| 152/152 [00:02<00:00, 57.80it/s]


Epoch 20/80, Training Loss: 0.1417, Learning Rate: 0.001


Epoch 21/80: 100%|██████████| 152/152 [00:03<00:00, 44.09it/s]


Epoch 21/80, Training Loss: 0.1193, Learning Rate: 0.001


Epoch 22/80: 100%|██████████| 152/152 [00:02<00:00, 58.14it/s]


Epoch 22/80, Training Loss: 0.1285, Learning Rate: 0.001


Epoch 23/80: 100%|██████████| 152/152 [00:02<00:00, 57.18it/s]


Epoch 23/80, Training Loss: 0.1259, Learning Rate: 0.001


Epoch 24/80: 100%|██████████| 152/152 [00:02<00:00, 56.26it/s]


Epoch 24/80, Training Loss: 0.1047, Learning Rate: 0.001


Epoch 25/80: 100%|██████████| 152/152 [00:02<00:00, 51.40it/s]


Epoch 25/80, Training Loss: 0.0908, Learning Rate: 0.001


Epoch 26/80: 100%|██████████| 152/152 [00:03<00:00, 49.66it/s]


Epoch 26/80, Training Loss: 0.1066, Learning Rate: 0.0009000000000000001


Epoch 27/80: 100%|██████████| 152/152 [00:02<00:00, 58.74it/s]


Epoch 27/80, Training Loss: 0.0975, Learning Rate: 0.0008100000000000001


Epoch 28/80: 100%|██████████| 152/152 [00:02<00:00, 58.04it/s]


Epoch 28/80, Training Loss: 0.0813, Learning Rate: 0.000729


Epoch 29/80: 100%|██████████| 152/152 [00:02<00:00, 57.10it/s]


Epoch 29/80, Training Loss: 0.0626, Learning Rate: 0.0006561000000000001


Epoch 30/80: 100%|██████████| 152/152 [00:03<00:00, 45.55it/s]


Epoch 30/80, Training Loss: 0.0491, Learning Rate: 0.00059049


Epoch 31/80: 100%|██████████| 152/152 [00:02<00:00, 57.73it/s]


Epoch 31/80, Training Loss: 0.0445, Learning Rate: 0.000531441


Epoch 32/80: 100%|██████████| 152/152 [00:02<00:00, 57.91it/s]


Epoch 32/80, Training Loss: 0.0435, Learning Rate: 0.0004782969


Epoch 33/80: 100%|██████████| 152/152 [00:02<00:00, 58.46it/s]


Epoch 33/80, Training Loss: 0.0420, Learning Rate: 0.00043046721


Epoch 34/80: 100%|██████████| 152/152 [00:03<00:00, 46.01it/s]


Epoch 34/80, Training Loss: 0.0377, Learning Rate: 0.000387420489


Epoch 35/80: 100%|██████████| 152/152 [00:02<00:00, 55.21it/s]


Epoch 35/80, Training Loss: 0.0358, Learning Rate: 0.0003486784401


Epoch 36/80: 100%|██████████| 152/152 [00:02<00:00, 56.81it/s]


Epoch 36/80, Training Loss: 0.0336, Learning Rate: 0.00031381059609000004


Epoch 37/80: 100%|██████████| 152/152 [00:02<00:00, 57.84it/s]


Epoch 37/80, Training Loss: 0.0328, Learning Rate: 0.00028242953648100003


Epoch 38/80: 100%|██████████| 152/152 [00:02<00:00, 54.17it/s]


Epoch 38/80, Training Loss: 0.0316, Learning Rate: 0.00025418658283290005


Epoch 39/80: 100%|██████████| 152/152 [00:03<00:00, 48.30it/s]


Epoch 39/80, Training Loss: 0.0314, Learning Rate: 0.00022876792454961005


Epoch 40/80: 100%|██████████| 152/152 [00:02<00:00, 57.21it/s]


Epoch 40/80, Training Loss: 0.0307, Learning Rate: 0.00020589113209464906


Epoch 41/80: 100%|██████████| 152/152 [00:02<00:00, 57.24it/s]


Epoch 41/80, Training Loss: 0.0297, Learning Rate: 0.00018530201888518417


Epoch 42/80: 100%|██████████| 152/152 [00:02<00:00, 57.81it/s]


Epoch 42/80, Training Loss: 0.0297, Learning Rate: 0.00016677181699666576


Epoch 43/80: 100%|██████████| 152/152 [00:03<00:00, 45.24it/s]


Epoch 43/80, Training Loss: 0.0296, Learning Rate: 0.0001500946352969992


Epoch 44/80: 100%|██████████| 152/152 [00:02<00:00, 57.80it/s]


Epoch 44/80, Training Loss: 0.0290, Learning Rate: 0.0001350851717672993


Epoch 45/80: 100%|██████████| 152/152 [00:02<00:00, 57.59it/s]


Epoch 45/80, Training Loss: 0.0291, Learning Rate: 0.00012157665459056936


Epoch 46/80: 100%|██████████| 152/152 [00:02<00:00, 56.31it/s]


Epoch 46/80, Training Loss: 0.0282, Learning Rate: 0.00010941898913151243


Epoch 47/80: 100%|██████████| 152/152 [00:03<00:00, 47.88it/s]


Epoch 47/80, Training Loss: 0.0281, Learning Rate: 9.847709021836118e-05


Epoch 48/80: 100%|██████████| 152/152 [00:02<00:00, 54.91it/s]


Epoch 48/80, Training Loss: 0.0280, Learning Rate: 8.862938119652506e-05


Epoch 49/80: 100%|██████████| 152/152 [00:02<00:00, 58.26it/s]


Epoch 49/80, Training Loss: 0.0270, Learning Rate: 7.976644307687256e-05


Epoch 50/80: 100%|██████████| 152/152 [00:02<00:00, 57.27it/s]


Epoch 50/80, Training Loss: 0.0268, Learning Rate: 7.17897987691853e-05


Epoch 51/80: 100%|██████████| 152/152 [00:02<00:00, 54.18it/s]


Epoch 51/80, Training Loss: 0.0268, Learning Rate: 6.461081889226677e-05


Epoch 52/80: 100%|██████████| 152/152 [00:03<00:00, 47.93it/s]


Epoch 52/80, Training Loss: 0.0272, Learning Rate: 5.81497370030401e-05


Epoch 53/80: 100%|██████████| 152/152 [00:02<00:00, 57.58it/s]


Epoch 53/80, Training Loss: 0.0265, Learning Rate: 5.233476330273609e-05


Epoch 54/80: 100%|██████████| 152/152 [00:02<00:00, 56.68it/s]


Epoch 54/80, Training Loss: 0.0273, Learning Rate: 4.7101286972462485e-05


Epoch 55/80: 100%|██████████| 152/152 [00:02<00:00, 56.36it/s]


Epoch 55/80, Training Loss: 0.0284, Learning Rate: 4.239115827521624e-05


Epoch 56/80: 100%|██████████| 152/152 [00:03<00:00, 44.90it/s]


Epoch 56/80, Training Loss: 0.0269, Learning Rate: 3.8152042447694614e-05


Epoch 57/80: 100%|██████████| 152/152 [00:02<00:00, 58.37it/s]


Epoch 57/80, Training Loss: 0.0265, Learning Rate: 3.433683820292515e-05


Epoch 58/80: 100%|██████████| 152/152 [00:02<00:00, 57.54it/s]


Epoch 58/80, Training Loss: 0.0262, Learning Rate: 3.090315438263264e-05


Epoch 59/80: 100%|██████████| 152/152 [00:02<00:00, 57.44it/s]


Epoch 59/80, Training Loss: 0.0268, Learning Rate: 2.7812838944369376e-05


Epoch 60/80: 100%|██████████| 152/152 [00:03<00:00, 48.71it/s]


Epoch 60/80, Training Loss: 0.0261, Learning Rate: 2.503155504993244e-05


Epoch 61/80: 100%|██████████| 152/152 [00:02<00:00, 52.96it/s]


Epoch 61/80, Training Loss: 0.0262, Learning Rate: 2.2528399544939195e-05


Epoch 62/80: 100%|██████████| 152/152 [00:02<00:00, 58.01it/s]


Epoch 62/80, Training Loss: 0.0263, Learning Rate: 2.0275559590445276e-05


Epoch 63/80: 100%|██████████| 152/152 [00:02<00:00, 58.07it/s]


Epoch 63/80, Training Loss: 0.0260, Learning Rate: 1.8248003631400748e-05


Epoch 64/80: 100%|██████████| 152/152 [00:02<00:00, 55.03it/s]


Epoch 64/80, Training Loss: 0.0261, Learning Rate: 1.6423203268260675e-05


Epoch 65/80: 100%|██████████| 152/152 [00:03<00:00, 46.19it/s]


Epoch 65/80, Training Loss: 0.0256, Learning Rate: 1.4780882941434607e-05


Epoch 66/80: 100%|██████████| 152/152 [00:02<00:00, 57.41it/s]


Epoch 66/80, Training Loss: 0.0261, Learning Rate: 1.3302794647291146e-05


Epoch 67/80: 100%|██████████| 152/152 [00:02<00:00, 57.65it/s]


Epoch 67/80, Training Loss: 0.0257, Learning Rate: 1.1972515182562031e-05


Epoch 68/80: 100%|██████████| 152/152 [00:03<00:00, 50.52it/s]


Epoch 68/80, Training Loss: 0.0262, Learning Rate: 1.0775263664305828e-05


Epoch 69/80: 100%|██████████| 152/152 [00:03<00:00, 41.22it/s]


Epoch 69/80, Training Loss: 0.0258, Learning Rate: 9.697737297875246e-06


Epoch 70/80: 100%|██████████| 152/152 [00:02<00:00, 57.14it/s]


Epoch 70/80, Training Loss: 0.0259, Learning Rate: 8.727963568087722e-06


Epoch 71/80: 100%|██████████| 152/152 [00:02<00:00, 57.65it/s]


Epoch 71/80, Training Loss: 0.0260, Learning Rate: 7.85516721127895e-06


Epoch 72/80: 100%|██████████| 152/152 [00:02<00:00, 56.92it/s]


Epoch 72/80, Training Loss: 0.0259, Learning Rate: 7.069650490151056e-06


Epoch 73/80: 100%|██████████| 152/152 [00:03<00:00, 48.21it/s]


Epoch 73/80, Training Loss: 0.0259, Learning Rate: 6.362685441135951e-06


Epoch 74/80: 100%|██████████| 152/152 [00:02<00:00, 53.32it/s]


Epoch 74/80, Training Loss: 0.0259, Learning Rate: 5.7264168970223554e-06


Epoch 75/80: 100%|██████████| 152/152 [00:02<00:00, 56.75it/s]


Epoch 75/80, Training Loss: 0.0258, Learning Rate: 5.15377520732012e-06


Epoch 76/80: 100%|██████████| 152/152 [00:02<00:00, 56.29it/s]


Epoch 76/80, Training Loss: 0.0262, Learning Rate: 4.638397686588108e-06


Epoch 77/80: 100%|██████████| 152/152 [00:02<00:00, 52.68it/s]


Epoch 77/80, Training Loss: 0.0255, Learning Rate: 4.174557917929298e-06


Epoch 78/80: 100%|██████████| 152/152 [00:03<00:00, 47.76it/s]


Epoch 78/80, Training Loss: 0.0262, Learning Rate: 3.7571021261363684e-06


Epoch 79/80: 100%|██████████| 152/152 [00:02<00:00, 57.17it/s]


Epoch 79/80, Training Loss: 0.0260, Learning Rate: 3.3813919135227317e-06


Epoch 80/80: 100%|██████████| 152/152 [00:02<00:00, 56.86it/s]

Epoch 80/80, Training Loss: 0.0258, Learning Rate: 3.0432527221704586e-06





In [26]:
# Evaluating on the test set to get accuracy
question_lstm_sql_pipe = SQLPipeline(dataset=question_context_test_dataset,model=question_trained_lstm_model,embed_type="context")
question_lstm_results, question_lstm_acc = question_lstm_sql_pipe.batch_predict(dataloader=question_context_test_loader)
print(f"\nAccuracy of LSTM model on question split test set: {question_lstm_acc:.2%}")

Generating SQL queries: 100%|██████████| 14/14 [00:00<00:00, 17.36it/s]


Accuracy of LSTM model on question split test set: 33.33%





In [27]:
# Query split
query_lstm_model = LSTMTaggerClassifer(
    input_dim=query_context_train_val_dataset.get_vector_dim(),
    hidden_dim=256,
    tag_vocab_size=query_context_train_val_dataset.get_tag_vocab_size(),
    sql_vocab_size=query_context_train_val_dataset.get_sql_vocab_size(),
    num_layers=3
)
# Training
query_trained_lstm_model = train_model(
    model=query_lstm_model,
    train_loader=query_context_train_val_loader,
    epochs=150,
    lr=1e-3,
    weight_decay=0.1,
    amsgrad=True,
    gamma=0.90,
    warmup_epochs=25

)

Epoch 1/150: 100%|██████████| 155/155 [00:02<00:00, 54.32it/s]


Epoch 1/150, Training Loss: 5.9136, Learning Rate: 0.001


Epoch 2/150: 100%|██████████| 155/155 [00:03<00:00, 49.08it/s]


Epoch 2/150, Training Loss: 4.3145, Learning Rate: 0.001


Epoch 3/150: 100%|██████████| 155/155 [00:02<00:00, 59.51it/s]


Epoch 3/150, Training Loss: 3.4434, Learning Rate: 0.001


Epoch 4/150: 100%|██████████| 155/155 [00:02<00:00, 58.13it/s]


Epoch 4/150, Training Loss: 2.7522, Learning Rate: 0.001


Epoch 5/150: 100%|██████████| 155/155 [00:02<00:00, 58.46it/s]


Epoch 5/150, Training Loss: 2.1891, Learning Rate: 0.001


Epoch 6/150: 100%|██████████| 155/155 [00:03<00:00, 45.85it/s]


Epoch 6/150, Training Loss: 1.7400, Learning Rate: 0.001


Epoch 7/150: 100%|██████████| 155/155 [00:02<00:00, 58.23it/s]


Epoch 7/150, Training Loss: 1.3732, Learning Rate: 0.001


Epoch 8/150: 100%|██████████| 155/155 [00:02<00:00, 58.53it/s]


Epoch 8/150, Training Loss: 1.0557, Learning Rate: 0.001


Epoch 9/150: 100%|██████████| 155/155 [00:02<00:00, 57.80it/s]


Epoch 9/150, Training Loss: 0.8158, Learning Rate: 0.001


Epoch 10/150: 100%|██████████| 155/155 [00:03<00:00, 48.77it/s]


Epoch 10/150, Training Loss: 0.6260, Learning Rate: 0.001


Epoch 11/150: 100%|██████████| 155/155 [00:02<00:00, 54.03it/s]


Epoch 11/150, Training Loss: 0.4915, Learning Rate: 0.001


Epoch 12/150: 100%|██████████| 155/155 [00:02<00:00, 58.09it/s]


Epoch 12/150, Training Loss: 0.3974, Learning Rate: 0.001


Epoch 13/150: 100%|██████████| 155/155 [00:02<00:00, 59.11it/s]


Epoch 13/150, Training Loss: 0.3092, Learning Rate: 0.001


Epoch 14/150: 100%|██████████| 155/155 [00:02<00:00, 54.78it/s]


Epoch 14/150, Training Loss: 0.2667, Learning Rate: 0.001


Epoch 15/150: 100%|██████████| 155/155 [00:03<00:00, 47.75it/s]


Epoch 15/150, Training Loss: 0.2586, Learning Rate: 0.001


Epoch 16/150: 100%|██████████| 155/155 [00:02<00:00, 57.84it/s]


Epoch 16/150, Training Loss: 0.2720, Learning Rate: 0.001


Epoch 17/150: 100%|██████████| 155/155 [00:02<00:00, 57.59it/s]


Epoch 17/150, Training Loss: 0.1858, Learning Rate: 0.001


Epoch 18/150: 100%|██████████| 155/155 [00:02<00:00, 58.70it/s]


Epoch 18/150, Training Loss: 0.1668, Learning Rate: 0.001


Epoch 19/150: 100%|██████████| 155/155 [00:03<00:00, 45.40it/s]


Epoch 19/150, Training Loss: 0.1335, Learning Rate: 0.001


Epoch 20/150: 100%|██████████| 155/155 [00:02<00:00, 58.38it/s]


Epoch 20/150, Training Loss: 0.1279, Learning Rate: 0.001


Epoch 21/150: 100%|██████████| 155/155 [00:02<00:00, 58.28it/s]


Epoch 21/150, Training Loss: 0.1224, Learning Rate: 0.001


Epoch 22/150: 100%|██████████| 155/155 [00:02<00:00, 57.48it/s]


Epoch 22/150, Training Loss: 0.1130, Learning Rate: 0.001


Epoch 23/150: 100%|██████████| 155/155 [00:03<00:00, 48.04it/s]


Epoch 23/150, Training Loss: 0.0974, Learning Rate: 0.001


Epoch 24/150: 100%|██████████| 155/155 [00:02<00:00, 55.88it/s]


Epoch 24/150, Training Loss: 0.0887, Learning Rate: 0.001


Epoch 25/150: 100%|██████████| 155/155 [00:02<00:00, 57.83it/s]


Epoch 25/150, Training Loss: 0.1002, Learning Rate: 0.001


Epoch 26/150: 100%|██████████| 155/155 [00:02<00:00, 57.23it/s]


Epoch 26/150, Training Loss: 0.0994, Learning Rate: 0.0009000000000000001


Epoch 27/150: 100%|██████████| 155/155 [00:02<00:00, 53.96it/s]


Epoch 27/150, Training Loss: 0.0792, Learning Rate: 0.0008100000000000001


Epoch 28/150: 100%|██████████| 155/155 [00:03<00:00, 49.51it/s]


Epoch 28/150, Training Loss: 0.0649, Learning Rate: 0.000729


Epoch 29/150: 100%|██████████| 155/155 [00:02<00:00, 57.19it/s]


Epoch 29/150, Training Loss: 0.0543, Learning Rate: 0.0006561000000000001


Epoch 30/150: 100%|██████████| 155/155 [00:02<00:00, 57.57it/s]


Epoch 30/150, Training Loss: 0.0485, Learning Rate: 0.00059049


Epoch 31/150: 100%|██████████| 155/155 [00:02<00:00, 58.55it/s]


Epoch 31/150, Training Loss: 0.0450, Learning Rate: 0.000531441


Epoch 32/150: 100%|██████████| 155/155 [00:03<00:00, 46.08it/s]


Epoch 32/150, Training Loss: 0.0396, Learning Rate: 0.0004782969


Epoch 33/150: 100%|██████████| 155/155 [00:02<00:00, 58.71it/s]


Epoch 33/150, Training Loss: 0.0369, Learning Rate: 0.00043046721


Epoch 34/150: 100%|██████████| 155/155 [00:02<00:00, 57.54it/s]


Epoch 34/150, Training Loss: 0.0352, Learning Rate: 0.000387420489


Epoch 35/150: 100%|██████████| 155/155 [00:02<00:00, 58.86it/s]


Epoch 35/150, Training Loss: 0.0351, Learning Rate: 0.0003486784401


Epoch 36/150: 100%|██████████| 155/155 [00:03<00:00, 46.18it/s]


Epoch 36/150, Training Loss: 0.0325, Learning Rate: 0.00031381059609000004


Epoch 37/150: 100%|██████████| 155/155 [00:02<00:00, 56.06it/s]


Epoch 37/150, Training Loss: 0.0313, Learning Rate: 0.00028242953648100003


Epoch 38/150: 100%|██████████| 155/155 [00:02<00:00, 58.78it/s]


Epoch 38/150, Training Loss: 0.0306, Learning Rate: 0.00025418658283290005


Epoch 39/150: 100%|██████████| 155/155 [00:02<00:00, 58.44it/s]


Epoch 39/150, Training Loss: 0.0295, Learning Rate: 0.00022876792454961005


Epoch 40/150: 100%|██████████| 155/155 [00:03<00:00, 51.65it/s]


Epoch 40/150, Training Loss: 0.0289, Learning Rate: 0.00020589113209464906


Epoch 41/150: 100%|██████████| 155/155 [00:03<00:00, 49.85it/s]


Epoch 41/150, Training Loss: 0.0288, Learning Rate: 0.00018530201888518417


Epoch 42/150: 100%|██████████| 155/155 [00:02<00:00, 57.82it/s]


Epoch 42/150, Training Loss: 0.0287, Learning Rate: 0.00016677181699666576


Epoch 43/150: 100%|██████████| 155/155 [00:02<00:00, 58.69it/s]


Epoch 43/150, Training Loss: 0.0277, Learning Rate: 0.0001500946352969992


Epoch 44/150: 100%|██████████| 155/155 [00:02<00:00, 57.44it/s]


Epoch 44/150, Training Loss: 0.0292, Learning Rate: 0.0001350851717672993


Epoch 45/150: 100%|██████████| 155/155 [00:03<00:00, 45.91it/s]


Epoch 45/150, Training Loss: 0.0270, Learning Rate: 0.00012157665459056936


Epoch 46/150: 100%|██████████| 155/155 [00:02<00:00, 57.60it/s]


Epoch 46/150, Training Loss: 0.0267, Learning Rate: 0.00010941898913151243


Epoch 47/150: 100%|██████████| 155/155 [00:02<00:00, 58.06it/s]


Epoch 47/150, Training Loss: 0.0265, Learning Rate: 9.847709021836118e-05


Epoch 48/150: 100%|██████████| 155/155 [00:02<00:00, 58.41it/s]


Epoch 48/150, Training Loss: 0.0264, Learning Rate: 8.862938119652506e-05


Epoch 49/150: 100%|██████████| 155/155 [00:03<00:00, 45.70it/s]


Epoch 49/150, Training Loss: 0.0270, Learning Rate: 7.976644307687256e-05


Epoch 50/150: 100%|██████████| 155/155 [00:02<00:00, 56.99it/s]


Epoch 50/150, Training Loss: 0.0260, Learning Rate: 7.17897987691853e-05


Epoch 51/150: 100%|██████████| 155/155 [00:02<00:00, 57.79it/s]


Epoch 51/150, Training Loss: 0.0258, Learning Rate: 6.461081889226677e-05


Epoch 52/150: 100%|██████████| 155/155 [00:02<00:00, 57.70it/s]


Epoch 52/150, Training Loss: 0.0259, Learning Rate: 5.81497370030401e-05


Epoch 53/150: 100%|██████████| 155/155 [00:03<00:00, 51.14it/s]


Epoch 53/150, Training Loss: 0.0258, Learning Rate: 5.233476330273609e-05


Epoch 54/150: 100%|██████████| 155/155 [00:03<00:00, 51.34it/s]


Epoch 54/150, Training Loss: 0.0250, Learning Rate: 4.7101286972462485e-05


Epoch 55/150: 100%|██████████| 155/155 [00:02<00:00, 57.66it/s]


Epoch 55/150, Training Loss: 0.0256, Learning Rate: 4.239115827521624e-05


Epoch 56/150: 100%|██████████| 155/155 [00:02<00:00, 57.83it/s]


Epoch 56/150, Training Loss: 0.0254, Learning Rate: 3.8152042447694614e-05


Epoch 57/150: 100%|██████████| 155/155 [00:02<00:00, 57.56it/s]


Epoch 57/150, Training Loss: 0.0259, Learning Rate: 3.433683820292515e-05


Epoch 58/150: 100%|██████████| 155/155 [00:03<00:00, 46.83it/s]


Epoch 58/150, Training Loss: 0.0253, Learning Rate: 3.090315438263264e-05


Epoch 59/150: 100%|██████████| 155/155 [00:02<00:00, 57.70it/s]


Epoch 59/150, Training Loss: 0.0252, Learning Rate: 2.7812838944369376e-05


Epoch 60/150: 100%|██████████| 155/155 [00:02<00:00, 57.36it/s]


Epoch 60/150, Training Loss: 0.0252, Learning Rate: 2.503155504993244e-05


Epoch 61/150: 100%|██████████| 155/155 [00:02<00:00, 57.18it/s]


Epoch 61/150, Training Loss: 0.0254, Learning Rate: 2.2528399544939195e-05


Epoch 62/150: 100%|██████████| 155/155 [00:03<00:00, 46.31it/s]


Epoch 62/150, Training Loss: 0.0252, Learning Rate: 2.0275559590445276e-05


Epoch 63/150: 100%|██████████| 155/155 [00:03<00:00, 45.72it/s]


Epoch 63/150, Training Loss: 0.0256, Learning Rate: 1.8248003631400748e-05


Epoch 64/150: 100%|██████████| 155/155 [00:02<00:00, 58.04it/s]


Epoch 64/150, Training Loss: 0.0249, Learning Rate: 1.6423203268260675e-05


Epoch 65/150: 100%|██████████| 155/155 [00:02<00:00, 57.94it/s]


Epoch 65/150, Training Loss: 0.0251, Learning Rate: 1.4780882941434607e-05


Epoch 66/150: 100%|██████████| 155/155 [00:03<00:00, 45.88it/s]


Epoch 66/150, Training Loss: 0.0253, Learning Rate: 1.3302794647291146e-05


Epoch 67/150: 100%|██████████| 155/155 [00:02<00:00, 57.51it/s]


Epoch 67/150, Training Loss: 0.0248, Learning Rate: 1.1972515182562031e-05


Epoch 68/150: 100%|██████████| 155/155 [00:02<00:00, 58.90it/s]


Epoch 68/150, Training Loss: 0.0252, Learning Rate: 1.0775263664305828e-05


Epoch 69/150: 100%|██████████| 155/155 [00:02<00:00, 58.19it/s]


Epoch 69/150, Training Loss: 0.0251, Learning Rate: 9.697737297875246e-06


Epoch 70/150: 100%|██████████| 155/155 [00:03<00:00, 50.73it/s]


Epoch 70/150, Training Loss: 0.0257, Learning Rate: 8.727963568087722e-06


Epoch 71/150: 100%|██████████| 155/155 [00:03<00:00, 51.66it/s]


Epoch 71/150, Training Loss: 0.0251, Learning Rate: 7.85516721127895e-06


Epoch 72/150: 100%|██████████| 155/155 [00:02<00:00, 58.02it/s]


Epoch 72/150, Training Loss: 0.0255, Learning Rate: 7.069650490151056e-06


Epoch 73/150: 100%|██████████| 155/155 [00:02<00:00, 58.69it/s]


Epoch 73/150, Training Loss: 0.0248, Learning Rate: 6.362685441135951e-06


Epoch 74/150: 100%|██████████| 155/155 [00:02<00:00, 56.69it/s]


Epoch 74/150, Training Loss: 0.0252, Learning Rate: 5.7264168970223554e-06


Epoch 75/150: 100%|██████████| 155/155 [00:03<00:00, 46.25it/s]


Epoch 75/150, Training Loss: 0.0248, Learning Rate: 5.15377520732012e-06


Epoch 76/150: 100%|██████████| 155/155 [00:02<00:00, 58.09it/s]


Epoch 76/150, Training Loss: 0.0250, Learning Rate: 4.638397686588108e-06


Epoch 77/150: 100%|██████████| 155/155 [00:02<00:00, 57.71it/s]


Epoch 77/150, Training Loss: 0.0247, Learning Rate: 4.174557917929298e-06


Epoch 78/150: 100%|██████████| 155/155 [00:02<00:00, 57.98it/s]


Epoch 78/150, Training Loss: 0.0256, Learning Rate: 3.7571021261363684e-06


Epoch 79/150: 100%|██████████| 155/155 [00:03<00:00, 45.78it/s]


Epoch 79/150, Training Loss: 0.0254, Learning Rate: 3.3813919135227317e-06


Epoch 80/150: 100%|██████████| 155/155 [00:02<00:00, 57.93it/s]


Epoch 80/150, Training Loss: 0.0250, Learning Rate: 3.0432527221704586e-06


Epoch 81/150: 100%|██████████| 155/155 [00:02<00:00, 57.91it/s]


Epoch 81/150, Training Loss: 0.0253, Learning Rate: 2.7389274499534128e-06


Epoch 82/150: 100%|██████████| 155/155 [00:02<00:00, 56.74it/s]


Epoch 82/150, Training Loss: 0.0250, Learning Rate: 2.4650347049580716e-06


Epoch 83/150: 100%|██████████| 155/155 [00:03<00:00, 48.65it/s]


Epoch 83/150, Training Loss: 0.0247, Learning Rate: 2.2185312344622644e-06


Epoch 84/150: 100%|██████████| 155/155 [00:02<00:00, 54.68it/s]


Epoch 84/150, Training Loss: 0.0249, Learning Rate: 1.996678111016038e-06


Epoch 85/150: 100%|██████████| 155/155 [00:02<00:00, 57.67it/s]


Epoch 85/150, Training Loss: 0.0251, Learning Rate: 1.7970102999144342e-06


Epoch 86/150: 100%|██████████| 155/155 [00:02<00:00, 57.44it/s]


Epoch 86/150, Training Loss: 0.0251, Learning Rate: 1.6173092699229909e-06


Epoch 87/150: 100%|██████████| 155/155 [00:02<00:00, 54.07it/s]


Epoch 87/150, Training Loss: 0.0248, Learning Rate: 1.4555783429306917e-06


Epoch 88/150: 100%|██████████| 155/155 [00:03<00:00, 49.46it/s]


Epoch 88/150, Training Loss: 0.0247, Learning Rate: 1.3100205086376225e-06


Epoch 89/150: 100%|██████████| 155/155 [00:02<00:00, 57.54it/s]


Epoch 89/150, Training Loss: 0.0249, Learning Rate: 1.1790184577738603e-06


Epoch 90/150: 100%|██████████| 155/155 [00:02<00:00, 57.57it/s]


Epoch 90/150, Training Loss: 0.0254, Learning Rate: 1.0611166119964742e-06


Epoch 91/150: 100%|██████████| 155/155 [00:02<00:00, 57.31it/s]


Epoch 91/150, Training Loss: 0.0249, Learning Rate: 9.550049507968269e-07


Epoch 92/150: 100%|██████████| 155/155 [00:03<00:00, 45.43it/s]


Epoch 92/150, Training Loss: 0.0250, Learning Rate: 8.595044557171442e-07


Epoch 93/150: 100%|██████████| 155/155 [00:02<00:00, 57.81it/s]


Epoch 93/150, Training Loss: 0.0256, Learning Rate: 7.735540101454298e-07


Epoch 94/150: 100%|██████████| 155/155 [00:02<00:00, 57.87it/s]


Epoch 94/150, Training Loss: 0.0249, Learning Rate: 6.961986091308869e-07


Epoch 95/150: 100%|██████████| 155/155 [00:02<00:00, 57.90it/s]


Epoch 95/150, Training Loss: 0.0247, Learning Rate: 6.265787482177982e-07


Epoch 96/150: 100%|██████████| 155/155 [00:03<00:00, 45.76it/s]


Epoch 96/150, Training Loss: 0.0249, Learning Rate: 5.639208733960183e-07


Epoch 97/150: 100%|██████████| 155/155 [00:02<00:00, 57.26it/s]


Epoch 97/150, Training Loss: 0.0246, Learning Rate: 5.075287860564165e-07


Epoch 98/150: 100%|██████████| 155/155 [00:02<00:00, 58.77it/s]


Epoch 98/150, Training Loss: 0.0251, Learning Rate: 4.567759074507748e-07


Epoch 99/150: 100%|██████████| 155/155 [00:02<00:00, 58.05it/s]


Epoch 99/150, Training Loss: 0.0251, Learning Rate: 4.1109831670569735e-07


Epoch 100/150: 100%|██████████| 155/155 [00:03<00:00, 51.30it/s]


Epoch 100/150, Training Loss: 0.0250, Learning Rate: 3.699884850351276e-07


Epoch 101/150: 100%|██████████| 155/155 [00:03<00:00, 50.46it/s]


Epoch 101/150, Training Loss: 0.0250, Learning Rate: 3.329896365316149e-07


Epoch 102/150: 100%|██████████| 155/155 [00:02<00:00, 58.01it/s]


Epoch 102/150, Training Loss: 0.0250, Learning Rate: 2.9969067287845343e-07


Epoch 103/150: 100%|██████████| 155/155 [00:02<00:00, 58.26it/s]


Epoch 103/150, Training Loss: 0.0251, Learning Rate: 2.697216055906081e-07


Epoch 104/150: 100%|██████████| 155/155 [00:02<00:00, 57.16it/s]


Epoch 104/150, Training Loss: 0.0250, Learning Rate: 2.427494450315473e-07


Epoch 105/150: 100%|██████████| 155/155 [00:03<00:00, 45.39it/s]


Epoch 105/150, Training Loss: 0.0248, Learning Rate: 2.1847450052839256e-07


Epoch 106/150: 100%|██████████| 155/155 [00:02<00:00, 56.79it/s]


Epoch 106/150, Training Loss: 0.0251, Learning Rate: 1.966270504755533e-07


Epoch 107/150: 100%|██████████| 155/155 [00:02<00:00, 57.22it/s]


Epoch 107/150, Training Loss: 0.0250, Learning Rate: 1.7696434542799797e-07


Epoch 108/150: 100%|██████████| 155/155 [00:02<00:00, 58.08it/s]


Epoch 108/150, Training Loss: 0.0249, Learning Rate: 1.5926791088519817e-07


Epoch 109/150: 100%|██████████| 155/155 [00:03<00:00, 45.66it/s]


Epoch 109/150, Training Loss: 0.0251, Learning Rate: 1.4334111979667837e-07


Epoch 110/150: 100%|██████████| 155/155 [00:02<00:00, 57.01it/s]


Epoch 110/150, Training Loss: 0.0250, Learning Rate: 1.2900700781701054e-07


Epoch 111/150: 100%|██████████| 155/155 [00:02<00:00, 57.55it/s]


Epoch 111/150, Training Loss: 0.0250, Learning Rate: 1.1610630703530949e-07


Epoch 112/150: 100%|██████████| 155/155 [00:02<00:00, 57.99it/s]


Epoch 112/150, Training Loss: 0.0248, Learning Rate: 1.0449567633177854e-07


Epoch 113/150: 100%|██████████| 155/155 [00:03<00:00, 49.61it/s]


Epoch 113/150, Training Loss: 0.0254, Learning Rate: 9.40461086986007e-08


Epoch 114/150: 100%|██████████| 155/155 [00:02<00:00, 52.27it/s]


Epoch 114/150, Training Loss: 0.0251, Learning Rate: 8.464149782874062e-08


Epoch 115/150: 100%|██████████| 155/155 [00:02<00:00, 57.16it/s]


Epoch 115/150, Training Loss: 0.0254, Learning Rate: 7.617734804586656e-08


Epoch 116/150: 100%|██████████| 155/155 [00:02<00:00, 57.34it/s]


Epoch 116/150, Training Loss: 0.0253, Learning Rate: 6.85596132412799e-08


Epoch 117/150: 100%|██████████| 155/155 [00:02<00:00, 54.15it/s]


Epoch 117/150, Training Loss: 0.0253, Learning Rate: 6.170365191715191e-08


Epoch 118/150: 100%|██████████| 155/155 [00:03<00:00, 47.54it/s]


Epoch 118/150, Training Loss: 0.0248, Learning Rate: 5.553328672543672e-08


Epoch 119/150: 100%|██████████| 155/155 [00:02<00:00, 57.54it/s]


Epoch 119/150, Training Loss: 0.0246, Learning Rate: 4.9979958052893047e-08


Epoch 120/150: 100%|██████████| 155/155 [00:02<00:00, 56.97it/s]


Epoch 120/150, Training Loss: 0.0251, Learning Rate: 4.4981962247603745e-08


Epoch 121/150: 100%|██████████| 155/155 [00:02<00:00, 57.28it/s]


Epoch 121/150, Training Loss: 0.0252, Learning Rate: 4.048376602284337e-08


Epoch 122/150: 100%|██████████| 155/155 [00:03<00:00, 45.50it/s]


Epoch 122/150, Training Loss: 0.0248, Learning Rate: 3.6435389420559035e-08


Epoch 123/150: 100%|██████████| 155/155 [00:02<00:00, 57.94it/s]


Epoch 123/150, Training Loss: 0.0247, Learning Rate: 3.279185047850313e-08


Epoch 124/150: 100%|██████████| 155/155 [00:02<00:00, 57.11it/s]


Epoch 124/150, Training Loss: 0.0257, Learning Rate: 2.951266543065282e-08


Epoch 125/150: 100%|██████████| 155/155 [00:02<00:00, 57.68it/s]


Epoch 125/150, Training Loss: 0.0250, Learning Rate: 2.6561398887587538e-08


Epoch 126/150: 100%|██████████| 155/155 [00:03<00:00, 45.80it/s]


Epoch 126/150, Training Loss: 0.0250, Learning Rate: 2.3905258998828786e-08


Epoch 127/150: 100%|██████████| 155/155 [00:02<00:00, 57.55it/s]


Epoch 127/150, Training Loss: 0.0251, Learning Rate: 2.1514733098945908e-08


Epoch 128/150: 100%|██████████| 155/155 [00:02<00:00, 58.07it/s]


Epoch 128/150, Training Loss: 0.0251, Learning Rate: 1.9363259789051318e-08


Epoch 129/150: 100%|██████████| 155/155 [00:02<00:00, 57.56it/s]


Epoch 129/150, Training Loss: 0.0248, Learning Rate: 1.7426933810146187e-08


Epoch 130/150: 100%|██████████| 155/155 [00:02<00:00, 52.38it/s]


Epoch 130/150, Training Loss: 0.0251, Learning Rate: 1.5684240429131568e-08


Epoch 131/150: 100%|██████████| 155/155 [00:03<00:00, 50.20it/s]


Epoch 131/150, Training Loss: 0.0247, Learning Rate: 1.4115816386218411e-08


Epoch 132/150: 100%|██████████| 155/155 [00:02<00:00, 58.81it/s]


Epoch 132/150, Training Loss: 0.0249, Learning Rate: 1.2704234747596571e-08


Epoch 133/150: 100%|██████████| 155/155 [00:02<00:00, 57.60it/s]


Epoch 133/150, Training Loss: 0.0250, Learning Rate: 1.1433811272836913e-08


Epoch 134/150: 100%|██████████| 155/155 [00:02<00:00, 57.89it/s]


Epoch 134/150, Training Loss: 0.0250, Learning Rate: 1.0290430145553223e-08


Epoch 135/150: 100%|██████████| 155/155 [00:03<00:00, 45.53it/s]


Epoch 135/150, Training Loss: 0.0247, Learning Rate: 9.2613871309979e-09


Epoch 136/150: 100%|██████████| 155/155 [00:02<00:00, 57.84it/s]


Epoch 136/150, Training Loss: 0.0250, Learning Rate: 8.33524841789811e-09


Epoch 137/150: 100%|██████████| 155/155 [00:02<00:00, 58.55it/s]


Epoch 137/150, Training Loss: 0.0247, Learning Rate: 7.501723576108299e-09


Epoch 138/150: 100%|██████████| 155/155 [00:02<00:00, 59.19it/s]


Epoch 138/150, Training Loss: 0.0249, Learning Rate: 6.751551218497469e-09


Epoch 139/150: 100%|██████████| 155/155 [00:03<00:00, 43.77it/s]


Epoch 139/150, Training Loss: 0.0254, Learning Rate: 6.076396096647722e-09


Epoch 140/150: 100%|██████████| 155/155 [00:03<00:00, 44.44it/s]


Epoch 140/150, Training Loss: 0.0251, Learning Rate: 5.46875648698295e-09


Epoch 141/150: 100%|██████████| 155/155 [00:02<00:00, 57.29it/s]


Epoch 141/150, Training Loss: 0.0248, Learning Rate: 4.921880838284655e-09


Epoch 142/150: 100%|██████████| 155/155 [00:02<00:00, 57.82it/s]


Epoch 142/150, Training Loss: 0.0251, Learning Rate: 4.42969275445619e-09


Epoch 143/150: 100%|██████████| 155/155 [00:02<00:00, 52.10it/s]


Epoch 143/150, Training Loss: 0.0251, Learning Rate: 3.9867234790105715e-09


Epoch 144/150: 100%|██████████| 155/155 [00:03<00:00, 49.79it/s]


Epoch 144/150, Training Loss: 0.0251, Learning Rate: 3.5880511311095144e-09


Epoch 145/150: 100%|██████████| 155/155 [00:02<00:00, 56.99it/s]


Epoch 145/150, Training Loss: 0.0250, Learning Rate: 3.229246017998563e-09


Epoch 146/150: 100%|██████████| 155/155 [00:02<00:00, 57.82it/s]


Epoch 146/150, Training Loss: 0.0253, Learning Rate: 2.906321416198707e-09


Epoch 147/150: 100%|██████████| 155/155 [00:02<00:00, 57.14it/s]


Epoch 147/150, Training Loss: 0.0250, Learning Rate: 2.6156892745788363e-09


Epoch 148/150: 100%|██████████| 155/155 [00:03<00:00, 45.84it/s]


Epoch 148/150, Training Loss: 0.0250, Learning Rate: 2.3541203471209526e-09


Epoch 149/150: 100%|██████████| 155/155 [00:02<00:00, 57.74it/s]


Epoch 149/150, Training Loss: 0.0247, Learning Rate: 2.1187083124088576e-09


Epoch 150/150: 100%|██████████| 155/155 [00:02<00:00, 57.50it/s]

Epoch 150/150, Training Loss: 0.0248, Learning Rate: 1.906837481167972e-09





In [187]:
# Evaluating on the test set to get accuracy
query_lstm_sql_pipe = SQLPipeline(dataset=query_context_test_dataset,model=query_trained_lstm_model,embed_type="context")
query_lstm_results, query_lstm_acc = query_lstm_sql_pipe.batch_predict(dataloader=query_context_test_loader)
print(f"\nAccuracy of LSTM model on query split test set: {query_lstm_acc:.2%}")

Generating SQL queries: 100%|██████████| 11/11 [00:01<00:00,  8.99it/s]


Accuracy of LSTM model on query split test set: 0.00%





### Training Transformer model using question and query split

In [29]:
# Question split
question_transformer_model = TransformerTaggerClassifier(
    input_dim=question_context_train_val_dataset.get_vector_dim(),
    hidden_dim=256,
    num_heads=8,
    num_layers=2,
    tag_vocab_size=question_context_train_val_dataset.get_tag_vocab_size(),
    sql_vocab_size=question_context_train_val_dataset.get_sql_vocab_size()
)
# Training
question_trained_transformer_model = train_model(
    model=question_transformer_model,
    train_loader=question_context_train_val_loader,
    weight_decay=0.1,
    epochs=150,
    amsgrad=True,
    gamma=0.95,
    lr=1e-3
)

Epoch 1/150: 100%|██████████| 152/152 [00:02<00:00, 70.60it/s]


Epoch 1/150, Training Loss: 4.9607, Learning Rate: 0.001


Epoch 2/150: 100%|██████████| 152/152 [00:02<00:00, 64.99it/s]


Epoch 2/150, Training Loss: 3.3980, Learning Rate: 0.001


Epoch 3/150: 100%|██████████| 152/152 [00:01<00:00, 78.32it/s]


Epoch 3/150, Training Loss: 2.6928, Learning Rate: 0.001


Epoch 4/150: 100%|██████████| 152/152 [00:01<00:00, 84.19it/s]


Epoch 4/150, Training Loss: 2.2788, Learning Rate: 0.001


Epoch 5/150: 100%|██████████| 152/152 [00:01<00:00, 83.47it/s]


Epoch 5/150, Training Loss: 1.9487, Learning Rate: 0.001


Epoch 6/150: 100%|██████████| 152/152 [00:01<00:00, 83.95it/s]


Epoch 6/150, Training Loss: 1.6468, Learning Rate: 0.001


Epoch 7/150: 100%|██████████| 152/152 [00:01<00:00, 82.69it/s]


Epoch 7/150, Training Loss: 1.4763, Learning Rate: 0.001


Epoch 8/150: 100%|██████████| 152/152 [00:02<00:00, 73.13it/s]


Epoch 8/150, Training Loss: 1.2858, Learning Rate: 0.001


Epoch 9/150: 100%|██████████| 152/152 [00:02<00:00, 67.41it/s]


Epoch 9/150, Training Loss: 1.2585, Learning Rate: 0.001


Epoch 10/150: 100%|██████████| 152/152 [00:01<00:00, 84.30it/s]


Epoch 10/150, Training Loss: 1.0558, Learning Rate: 0.001


Epoch 11/150: 100%|██████████| 152/152 [00:01<00:00, 83.58it/s]


Epoch 11/150, Training Loss: 0.8664, Learning Rate: 0.00095


Epoch 12/150: 100%|██████████| 152/152 [00:01<00:00, 83.06it/s]


Epoch 12/150, Training Loss: 0.7316, Learning Rate: 0.0009025


Epoch 13/150: 100%|██████████| 152/152 [00:01<00:00, 83.66it/s]


Epoch 13/150, Training Loss: 0.7160, Learning Rate: 0.000857375


Epoch 14/150: 100%|██████████| 152/152 [00:01<00:00, 84.09it/s]


Epoch 14/150, Training Loss: 0.5208, Learning Rate: 0.0008145062499999999


Epoch 15/150: 100%|██████████| 152/152 [00:02<00:00, 62.56it/s]


Epoch 15/150, Training Loss: 0.4187, Learning Rate: 0.0007737809374999998


Epoch 16/150: 100%|██████████| 152/152 [00:01<00:00, 83.89it/s]


Epoch 16/150, Training Loss: 0.3620, Learning Rate: 0.0007350918906249997


Epoch 17/150: 100%|██████████| 152/152 [00:01<00:00, 85.36it/s]


Epoch 17/150, Training Loss: 0.3505, Learning Rate: 0.0006983372960937497


Epoch 18/150: 100%|██████████| 152/152 [00:01<00:00, 84.99it/s]


Epoch 18/150, Training Loss: 0.3873, Learning Rate: 0.0006634204312890621


Epoch 19/150: 100%|██████████| 152/152 [00:01<00:00, 84.36it/s]


Epoch 19/150, Training Loss: 0.3025, Learning Rate: 0.000630249409724609


Epoch 20/150: 100%|██████████| 152/152 [00:01<00:00, 84.24it/s]


Epoch 20/150, Training Loss: 0.2638, Learning Rate: 0.0005987369392383785


Epoch 21/150: 100%|██████████| 152/152 [00:02<00:00, 69.28it/s]


Epoch 21/150, Training Loss: 0.2641, Learning Rate: 0.0005688000922764595


Epoch 22/150: 100%|██████████| 152/152 [00:02<00:00, 73.27it/s]


Epoch 22/150, Training Loss: 0.2206, Learning Rate: 0.0005403600876626365


Epoch 23/150: 100%|██████████| 152/152 [00:01<00:00, 80.43it/s]


Epoch 23/150, Training Loss: 0.1869, Learning Rate: 0.0005133420832795047


Epoch 24/150: 100%|██████████| 152/152 [00:01<00:00, 84.63it/s]


Epoch 24/150, Training Loss: 0.1706, Learning Rate: 0.00048767497911552944


Epoch 25/150: 100%|██████████| 152/152 [00:01<00:00, 82.78it/s]


Epoch 25/150, Training Loss: 0.1667, Learning Rate: 0.00046329123015975297


Epoch 26/150: 100%|██████████| 152/152 [00:01<00:00, 83.92it/s]


Epoch 26/150, Training Loss: 0.1557, Learning Rate: 0.0004401266686517653


Epoch 27/150: 100%|██████████| 152/152 [00:02<00:00, 75.96it/s]


Epoch 27/150, Training Loss: 0.1418, Learning Rate: 0.00041812033521917703


Epoch 28/150: 100%|██████████| 152/152 [00:02<00:00, 68.06it/s]


Epoch 28/150, Training Loss: 0.1312, Learning Rate: 0.00039721431845821814


Epoch 29/150: 100%|██████████| 152/152 [00:01<00:00, 83.38it/s]


Epoch 29/150, Training Loss: 0.1234, Learning Rate: 0.0003773536025353072


Epoch 30/150: 100%|██████████| 152/152 [00:01<00:00, 83.88it/s]


Epoch 30/150, Training Loss: 0.1272, Learning Rate: 0.0003584859224085418


Epoch 31/150: 100%|██████████| 152/152 [00:01<00:00, 84.24it/s]


Epoch 31/150, Training Loss: 0.1123, Learning Rate: 0.0003405616262881147


Epoch 32/150: 100%|██████████| 152/152 [00:01<00:00, 84.16it/s]


Epoch 32/150, Training Loss: 0.1167, Learning Rate: 0.00032353354497370894


Epoch 33/150: 100%|██████████| 152/152 [00:01<00:00, 83.98it/s]


Epoch 33/150, Training Loss: 0.1142, Learning Rate: 0.00030735686772502346


Epoch 34/150: 100%|██████████| 152/152 [00:02<00:00, 63.56it/s]


Epoch 34/150, Training Loss: 0.1091, Learning Rate: 0.00029198902433877225


Epoch 35/150: 100%|██████████| 152/152 [00:01<00:00, 82.20it/s]


Epoch 35/150, Training Loss: 0.1062, Learning Rate: 0.00027738957312183364


Epoch 36/150: 100%|██████████| 152/152 [00:01<00:00, 82.51it/s]


Epoch 36/150, Training Loss: 0.1057, Learning Rate: 0.0002635200944657419


Epoch 37/150: 100%|██████████| 152/152 [00:01<00:00, 82.26it/s]


Epoch 37/150, Training Loss: 0.0947, Learning Rate: 0.0002503440897424548


Epoch 38/150: 100%|██████████| 152/152 [00:01<00:00, 81.48it/s]


Epoch 38/150, Training Loss: 0.0922, Learning Rate: 0.00023782688525533205


Epoch 39/150: 100%|██████████| 152/152 [00:01<00:00, 83.65it/s]


Epoch 39/150, Training Loss: 0.0912, Learning Rate: 0.00022593554099256544


Epoch 40/150: 100%|██████████| 152/152 [00:02<00:00, 72.28it/s]


Epoch 40/150, Training Loss: 0.0904, Learning Rate: 0.00021463876394293716


Epoch 41/150: 100%|██████████| 152/152 [00:02<00:00, 69.10it/s]


Epoch 41/150, Training Loss: 0.0856, Learning Rate: 0.0002039068257457903


Epoch 42/150: 100%|██████████| 152/152 [00:01<00:00, 82.84it/s]


Epoch 42/150, Training Loss: 0.0895, Learning Rate: 0.00019371148445850077


Epoch 43/150: 100%|██████████| 152/152 [00:01<00:00, 84.13it/s]


Epoch 43/150, Training Loss: 0.1002, Learning Rate: 0.00018402591023557573


Epoch 44/150: 100%|██████████| 152/152 [00:01<00:00, 80.89it/s]


Epoch 44/150, Training Loss: 0.0876, Learning Rate: 0.00017482461472379692


Epoch 45/150: 100%|██████████| 152/152 [00:01<00:00, 83.31it/s]


Epoch 45/150, Training Loss: 0.0828, Learning Rate: 0.00016608338398760707


Epoch 46/150: 100%|██████████| 152/152 [00:01<00:00, 79.06it/s]


Epoch 46/150, Training Loss: 0.0856, Learning Rate: 0.0001577792147882267


Epoch 47/150: 100%|██████████| 152/152 [00:02<00:00, 66.34it/s]


Epoch 47/150, Training Loss: 0.0832, Learning Rate: 0.00014989025404881537


Epoch 48/150: 100%|██████████| 152/152 [00:01<00:00, 83.56it/s]


Epoch 48/150, Training Loss: 0.0795, Learning Rate: 0.00014239574134637458


Epoch 49/150: 100%|██████████| 152/152 [00:01<00:00, 83.77it/s]


Epoch 49/150, Training Loss: 0.0821, Learning Rate: 0.00013527595427905584


Epoch 50/150: 100%|██████████| 152/152 [00:01<00:00, 83.58it/s]


Epoch 50/150, Training Loss: 0.0804, Learning Rate: 0.00012851215656510304


Epoch 51/150: 100%|██████████| 152/152 [00:01<00:00, 83.62it/s]


Epoch 51/150, Training Loss: 0.0822, Learning Rate: 0.00012208654873684788


Epoch 52/150: 100%|██████████| 152/152 [00:01<00:00, 83.46it/s]


Epoch 52/150, Training Loss: 0.0807, Learning Rate: 0.00011598222130000548


Epoch 53/150: 100%|██████████| 152/152 [00:02<00:00, 64.35it/s]


Epoch 53/150, Training Loss: 0.0748, Learning Rate: 0.00011018311023500519


Epoch 54/150: 100%|██████████| 152/152 [00:01<00:00, 79.96it/s]


Epoch 54/150, Training Loss: 0.0744, Learning Rate: 0.00010467395472325493


Epoch 55/150: 100%|██████████| 152/152 [00:01<00:00, 83.67it/s]


Epoch 55/150, Training Loss: 0.0756, Learning Rate: 9.944025698709218e-05


Epoch 56/150: 100%|██████████| 152/152 [00:01<00:00, 83.65it/s]


Epoch 56/150, Training Loss: 0.0724, Learning Rate: 9.446824413773756e-05


Epoch 57/150: 100%|██████████| 152/152 [00:01<00:00, 83.37it/s]


Epoch 57/150, Training Loss: 0.0755, Learning Rate: 8.974483193085068e-05


Epoch 58/150: 100%|██████████| 152/152 [00:01<00:00, 83.62it/s]


Epoch 58/150, Training Loss: 0.0722, Learning Rate: 8.525759033430814e-05


Epoch 59/150: 100%|██████████| 152/152 [00:02<00:00, 70.51it/s]


Epoch 59/150, Training Loss: 0.0716, Learning Rate: 8.099471081759274e-05


Epoch 60/150: 100%|██████████| 152/152 [00:02<00:00, 70.89it/s]


Epoch 60/150, Training Loss: 0.0718, Learning Rate: 7.69449752767131e-05


Epoch 61/150: 100%|██████████| 152/152 [00:01<00:00, 84.61it/s]


Epoch 61/150, Training Loss: 0.0709, Learning Rate: 7.309772651287744e-05


Epoch 62/150: 100%|██████████| 152/152 [00:01<00:00, 83.36it/s]


Epoch 62/150, Training Loss: 0.0680, Learning Rate: 6.944284018723356e-05


Epoch 63/150: 100%|██████████| 152/152 [00:01<00:00, 82.53it/s]


Epoch 63/150, Training Loss: 0.0703, Learning Rate: 6.597069817787189e-05


Epoch 64/150: 100%|██████████| 152/152 [00:01<00:00, 81.22it/s]


Epoch 64/150, Training Loss: 0.0653, Learning Rate: 6.267216326897829e-05


Epoch 65/150: 100%|██████████| 152/152 [00:01<00:00, 80.39it/s]


Epoch 65/150, Training Loss: 0.0689, Learning Rate: 5.953855510552937e-05


Epoch 66/150: 100%|██████████| 152/152 [00:02<00:00, 64.56it/s]


Epoch 66/150, Training Loss: 0.0768, Learning Rate: 5.65616273502529e-05


Epoch 67/150: 100%|██████████| 152/152 [00:01<00:00, 83.24it/s]


Epoch 67/150, Training Loss: 0.0756, Learning Rate: 5.373354598274025e-05


Epoch 68/150: 100%|██████████| 152/152 [00:01<00:00, 83.96it/s]


Epoch 68/150, Training Loss: 0.0673, Learning Rate: 5.104686868360323e-05


Epoch 69/150: 100%|██████████| 152/152 [00:01<00:00, 83.63it/s]


Epoch 69/150, Training Loss: 0.0657, Learning Rate: 4.849452524942307e-05


Epoch 70/150: 100%|██████████| 152/152 [00:01<00:00, 82.46it/s]


Epoch 70/150, Training Loss: 0.0677, Learning Rate: 4.606979898695191e-05


Epoch 71/150: 100%|██████████| 152/152 [00:01<00:00, 83.05it/s]


Epoch 71/150, Training Loss: 0.0645, Learning Rate: 4.376630903760431e-05


Epoch 72/150: 100%|██████████| 152/152 [00:02<00:00, 66.02it/s]


Epoch 72/150, Training Loss: 0.0637, Learning Rate: 4.157799358572409e-05


Epoch 73/150: 100%|██████████| 152/152 [00:01<00:00, 77.94it/s]


Epoch 73/150, Training Loss: 0.0651, Learning Rate: 3.9499093906437885e-05


Epoch 74/150: 100%|██████████| 152/152 [00:01<00:00, 84.12it/s]


Epoch 74/150, Training Loss: 0.0681, Learning Rate: 3.752413921111599e-05


Epoch 75/150: 100%|██████████| 152/152 [00:01<00:00, 83.93it/s]


Epoch 75/150, Training Loss: 0.0677, Learning Rate: 3.564793225056019e-05


Epoch 76/150: 100%|██████████| 152/152 [00:01<00:00, 84.68it/s]


Epoch 76/150, Training Loss: 0.0667, Learning Rate: 3.3865535638032174e-05


Epoch 77/150: 100%|██████████| 152/152 [00:01<00:00, 83.82it/s]


Epoch 77/150, Training Loss: 0.0655, Learning Rate: 3.2172258856130564e-05


Epoch 78/150: 100%|██████████| 152/152 [00:02<00:00, 72.89it/s]


Epoch 78/150, Training Loss: 0.0647, Learning Rate: 3.056364591332403e-05


Epoch 79/150: 100%|██████████| 152/152 [00:02<00:00, 68.39it/s]


Epoch 79/150, Training Loss: 0.0661, Learning Rate: 2.903546361765783e-05


Epoch 80/150: 100%|██████████| 152/152 [00:01<00:00, 82.64it/s]


Epoch 80/150, Training Loss: 0.0637, Learning Rate: 2.758369043677494e-05


Epoch 81/150: 100%|██████████| 152/152 [00:02<00:00, 71.95it/s]


Epoch 81/150, Training Loss: 0.0624, Learning Rate: 2.620450591493619e-05


Epoch 82/150: 100%|██████████| 152/152 [00:01<00:00, 84.08it/s]


Epoch 82/150, Training Loss: 0.0656, Learning Rate: 2.489428061918938e-05


Epoch 83/150: 100%|██████████| 152/152 [00:01<00:00, 83.31it/s]


Epoch 83/150, Training Loss: 0.0679, Learning Rate: 2.364956658822991e-05


Epoch 84/150: 100%|██████████| 152/152 [00:01<00:00, 77.55it/s]


Epoch 84/150, Training Loss: 0.0644, Learning Rate: 2.2467088258818413e-05


Epoch 85/150: 100%|██████████| 152/152 [00:02<00:00, 64.40it/s]


Epoch 85/150, Training Loss: 0.0629, Learning Rate: 2.134373384587749e-05


Epoch 86/150: 100%|██████████| 152/152 [00:01<00:00, 82.48it/s]


Epoch 86/150, Training Loss: 0.0663, Learning Rate: 2.0276547153583614e-05


Epoch 87/150: 100%|██████████| 152/152 [00:01<00:00, 83.13it/s]


Epoch 87/150, Training Loss: 0.0648, Learning Rate: 1.9262719795904432e-05


Epoch 88/150: 100%|██████████| 152/152 [00:01<00:00, 84.42it/s]


Epoch 88/150, Training Loss: 0.0625, Learning Rate: 1.829958380610921e-05


Epoch 89/150: 100%|██████████| 152/152 [00:01<00:00, 82.50it/s]


Epoch 89/150, Training Loss: 0.0644, Learning Rate: 1.738460461580375e-05


Epoch 90/150: 100%|██████████| 152/152 [00:01<00:00, 82.92it/s]


Epoch 90/150, Training Loss: 0.0647, Learning Rate: 1.6515374385013564e-05


Epoch 91/150: 100%|██████████| 152/152 [00:02<00:00, 63.99it/s]


Epoch 91/150, Training Loss: 0.0632, Learning Rate: 1.5689605665762886e-05


Epoch 92/150: 100%|██████████| 152/152 [00:01<00:00, 79.59it/s]


Epoch 92/150, Training Loss: 0.0637, Learning Rate: 1.490512538247474e-05


Epoch 93/150: 100%|██████████| 152/152 [00:01<00:00, 83.21it/s]


Epoch 93/150, Training Loss: 0.0659, Learning Rate: 1.4159869113351003e-05


Epoch 94/150: 100%|██████████| 152/152 [00:01<00:00, 81.55it/s]


Epoch 94/150, Training Loss: 0.0614, Learning Rate: 1.3451875657683452e-05


Epoch 95/150: 100%|██████████| 152/152 [00:01<00:00, 83.07it/s]


Epoch 95/150, Training Loss: 0.0644, Learning Rate: 1.277928187479928e-05


Epoch 96/150: 100%|██████████| 152/152 [00:01<00:00, 83.75it/s]


Epoch 96/150, Training Loss: 0.0643, Learning Rate: 1.2140317781059316e-05


Epoch 97/150: 100%|██████████| 152/152 [00:02<00:00, 62.17it/s]


Epoch 97/150, Training Loss: 0.0629, Learning Rate: 1.153330189200635e-05


Epoch 98/150: 100%|██████████| 152/152 [00:02<00:00, 54.56it/s]


Epoch 98/150, Training Loss: 0.0639, Learning Rate: 1.0956636797406032e-05


Epoch 99/150: 100%|██████████| 152/152 [00:01<00:00, 83.67it/s]


Epoch 99/150, Training Loss: 0.0644, Learning Rate: 1.0408804957535729e-05


Epoch 100/150: 100%|██████████| 152/152 [00:01<00:00, 81.08it/s]


Epoch 100/150, Training Loss: 0.0623, Learning Rate: 9.888364709658941e-06


Epoch 101/150: 100%|██████████| 152/152 [00:01<00:00, 83.49it/s]


Epoch 101/150, Training Loss: 0.0607, Learning Rate: 9.393946474175994e-06


Epoch 102/150: 100%|██████████| 152/152 [00:01<00:00, 83.27it/s]


Epoch 102/150, Training Loss: 0.0619, Learning Rate: 8.924249150467194e-06


Epoch 103/150: 100%|██████████| 152/152 [00:01<00:00, 82.88it/s]


Epoch 103/150, Training Loss: 0.0611, Learning Rate: 8.478036692943835e-06


Epoch 104/150: 100%|██████████| 152/152 [00:02<00:00, 63.31it/s]


Epoch 104/150, Training Loss: 0.0626, Learning Rate: 8.054134858296643e-06


Epoch 105/150: 100%|██████████| 152/152 [00:01<00:00, 83.46it/s]


Epoch 105/150, Training Loss: 0.0623, Learning Rate: 7.65142811538181e-06


Epoch 106/150: 100%|██████████| 152/152 [00:01<00:00, 82.69it/s]


Epoch 106/150, Training Loss: 0.0663, Learning Rate: 7.26885670961272e-06


Epoch 107/150: 100%|██████████| 152/152 [00:01<00:00, 83.53it/s]


Epoch 107/150, Training Loss: 0.0631, Learning Rate: 6.905413874132084e-06


Epoch 108/150: 100%|██████████| 152/152 [00:01<00:00, 82.77it/s]


Epoch 108/150, Training Loss: 0.0654, Learning Rate: 6.5601431804254795e-06


Epoch 109/150: 100%|██████████| 152/152 [00:01<00:00, 82.55it/s]


Epoch 109/150, Training Loss: 0.0614, Learning Rate: 6.232136021404205e-06


Epoch 110/150: 100%|██████████| 152/152 [00:02<00:00, 69.70it/s]


Epoch 110/150, Training Loss: 0.0621, Learning Rate: 5.920529220333994e-06


Epoch 111/150: 100%|██████████| 152/152 [00:02<00:00, 72.78it/s]


Epoch 111/150, Training Loss: 0.0660, Learning Rate: 5.624502759317295e-06


Epoch 112/150: 100%|██████████| 152/152 [00:01<00:00, 83.76it/s]


Epoch 112/150, Training Loss: 0.0615, Learning Rate: 5.34327762135143e-06


Epoch 113/150: 100%|██████████| 152/152 [00:01<00:00, 82.86it/s]


Epoch 113/150, Training Loss: 0.0608, Learning Rate: 5.076113740283858e-06


Epoch 114/150: 100%|██████████| 152/152 [00:01<00:00, 83.77it/s]


Epoch 114/150, Training Loss: 0.0660, Learning Rate: 4.8223080532696655e-06


Epoch 115/150: 100%|██████████| 152/152 [00:01<00:00, 83.28it/s]


Epoch 115/150, Training Loss: 0.0601, Learning Rate: 4.581192650606182e-06


Epoch 116/150: 100%|██████████| 152/152 [00:02<00:00, 75.44it/s]


Epoch 116/150, Training Loss: 0.0612, Learning Rate: 4.3521330180758725e-06


Epoch 117/150: 100%|██████████| 152/152 [00:02<00:00, 65.58it/s]


Epoch 117/150, Training Loss: 0.0627, Learning Rate: 4.1345263671720786e-06


Epoch 118/150: 100%|██████████| 152/152 [00:01<00:00, 83.32it/s]


Epoch 118/150, Training Loss: 0.0597, Learning Rate: 3.927800048813474e-06


Epoch 119/150: 100%|██████████| 152/152 [00:01<00:00, 83.99it/s]


Epoch 119/150, Training Loss: 0.0618, Learning Rate: 3.7314100463728006e-06


Epoch 120/150: 100%|██████████| 152/152 [00:01<00:00, 84.10it/s]


Epoch 120/150, Training Loss: 0.0625, Learning Rate: 3.5448395440541604e-06


Epoch 121/150: 100%|██████████| 152/152 [00:01<00:00, 82.30it/s]


Epoch 121/150, Training Loss: 0.0612, Learning Rate: 3.3675975668514524e-06


Epoch 122/150: 100%|██████████| 152/152 [00:01<00:00, 82.72it/s]


Epoch 122/150, Training Loss: 0.0621, Learning Rate: 3.1992176885088796e-06


Epoch 123/150: 100%|██████████| 152/152 [00:02<00:00, 63.20it/s]


Epoch 123/150, Training Loss: 0.0609, Learning Rate: 3.0392568040834356e-06


Epoch 124/150: 100%|██████████| 152/152 [00:01<00:00, 81.47it/s]


Epoch 124/150, Training Loss: 0.0611, Learning Rate: 2.8872939638792635e-06


Epoch 125/150: 100%|██████████| 152/152 [00:01<00:00, 80.72it/s]


Epoch 125/150, Training Loss: 0.0642, Learning Rate: 2.7429292656853003e-06


Epoch 126/150: 100%|██████████| 152/152 [00:01<00:00, 82.61it/s]


Epoch 126/150, Training Loss: 0.0608, Learning Rate: 2.605782802401035e-06


Epoch 127/150: 100%|██████████| 152/152 [00:01<00:00, 83.59it/s]


Epoch 127/150, Training Loss: 0.0607, Learning Rate: 2.475493662280983e-06


Epoch 128/150: 100%|██████████| 152/152 [00:01<00:00, 82.72it/s]


Epoch 128/150, Training Loss: 0.0650, Learning Rate: 2.351718979166934e-06


Epoch 129/150: 100%|██████████| 152/152 [00:02<00:00, 69.43it/s]


Epoch 129/150, Training Loss: 0.0645, Learning Rate: 2.234133030208587e-06


Epoch 130/150: 100%|██████████| 152/152 [00:02<00:00, 71.45it/s]


Epoch 130/150, Training Loss: 0.0615, Learning Rate: 2.1224263786981576e-06


Epoch 131/150: 100%|██████████| 152/152 [00:01<00:00, 81.36it/s]


Epoch 131/150, Training Loss: 0.0609, Learning Rate: 2.0163050597632494e-06


Epoch 132/150: 100%|██████████| 152/152 [00:01<00:00, 82.13it/s]


Epoch 132/150, Training Loss: 0.0604, Learning Rate: 1.915489806775087e-06


Epoch 133/150: 100%|██████████| 152/152 [00:01<00:00, 77.17it/s]


Epoch 133/150, Training Loss: 0.0630, Learning Rate: 1.8197153164363325e-06


Epoch 134/150: 100%|██████████| 152/152 [00:01<00:00, 83.98it/s]


Epoch 134/150, Training Loss: 0.0645, Learning Rate: 1.7287295506145157e-06


Epoch 135/150: 100%|██████████| 152/152 [00:02<00:00, 73.54it/s]


Epoch 135/150, Training Loss: 0.0603, Learning Rate: 1.6422930730837899e-06


Epoch 136/150: 100%|██████████| 152/152 [00:02<00:00, 66.69it/s]


Epoch 136/150, Training Loss: 0.0615, Learning Rate: 1.5601784194296004e-06


Epoch 137/150: 100%|██████████| 152/152 [00:01<00:00, 82.02it/s]


Epoch 137/150, Training Loss: 0.0637, Learning Rate: 1.4821694984581202e-06


Epoch 138/150: 100%|██████████| 152/152 [00:01<00:00, 82.87it/s]


Epoch 138/150, Training Loss: 0.0641, Learning Rate: 1.4080610235352142e-06


Epoch 139/150: 100%|██████████| 152/152 [00:01<00:00, 82.40it/s]


Epoch 139/150, Training Loss: 0.0644, Learning Rate: 1.3376579723584535e-06


Epoch 140/150: 100%|██████████| 152/152 [00:01<00:00, 81.59it/s]


Epoch 140/150, Training Loss: 0.0654, Learning Rate: 1.2707750737405307e-06


Epoch 141/150: 100%|██████████| 152/152 [00:01<00:00, 81.89it/s]


Epoch 141/150, Training Loss: 0.0600, Learning Rate: 1.2072363200535042e-06


Epoch 142/150: 100%|██████████| 152/152 [00:02<00:00, 63.18it/s]


Epoch 142/150, Training Loss: 0.0647, Learning Rate: 1.146874504050829e-06


Epoch 143/150: 100%|██████████| 152/152 [00:01<00:00, 82.20it/s]


Epoch 143/150, Training Loss: 0.0654, Learning Rate: 1.0895307788482876e-06


Epoch 144/150: 100%|██████████| 152/152 [00:01<00:00, 82.54it/s]


Epoch 144/150, Training Loss: 0.0633, Learning Rate: 1.0350542399058731e-06


Epoch 145/150: 100%|██████████| 152/152 [00:01<00:00, 82.87it/s]


Epoch 145/150, Training Loss: 0.0636, Learning Rate: 9.833015279105794e-07


Epoch 146/150: 100%|██████████| 152/152 [00:01<00:00, 81.96it/s]


Epoch 146/150, Training Loss: 0.0616, Learning Rate: 9.341364515150503e-07


Epoch 147/150: 100%|██████████| 152/152 [00:01<00:00, 81.14it/s]


Epoch 147/150, Training Loss: 0.0638, Learning Rate: 8.874296289392978e-07


Epoch 148/150: 100%|██████████| 152/152 [00:02<00:00, 65.62it/s]


Epoch 148/150, Training Loss: 0.0595, Learning Rate: 8.430581474923329e-07


Epoch 149/150: 100%|██████████| 152/152 [00:01<00:00, 76.50it/s]


Epoch 149/150, Training Loss: 0.0621, Learning Rate: 8.009052401177162e-07


Epoch 150/150: 100%|██████████| 152/152 [00:01<00:00, 83.22it/s]

Epoch 150/150, Training Loss: 0.0599, Learning Rate: 7.608599781118303e-07





In [30]:
# Evaluating on the test set to get accuracy
question_transformer_sql_pipe = SQLPipeline(dataset=question_context_test_dataset,model=question_trained_transformer_model,embed_type="context")
question_transformer_results, question_transformer_acc = question_transformer_sql_pipe.batch_predict(dataloader=question_context_test_loader)
print(f"\nAccuracy of Transformer model on question split test set: {question_transformer_acc:.2%}")

  output = torch._nested_tensor_from_mask(
Generating SQL queries: 100%|██████████| 14/14 [00:00<00:00, 14.65it/s]


Accuracy of Transformer model on question split test set: 36.24%





In [31]:
# Query split
query_transformer_model = TransformerTaggerClassifier(
    input_dim=query_context_train_val_dataset.get_vector_dim(),
    hidden_dim=256,
    num_heads=8,
    num_layers=2,
    tag_vocab_size=query_context_train_val_dataset.get_tag_vocab_size(),
    sql_vocab_size=query_context_train_val_dataset.get_sql_vocab_size()
)
# Training
query_trained_transformer_model = train_model(
    model=query_transformer_model,
    train_loader=query_context_train_val_loader,
    weight_decay=0.1,
    epochs=150,
    amsgrad=True,
    gamma=0.95,
    lr=1e-3
)

Epoch 1/150: 100%|██████████| 155/155 [00:01<00:00, 82.41it/s]


Epoch 1/150, Training Loss: 4.8744, Learning Rate: 0.001


Epoch 2/150: 100%|██████████| 155/155 [00:01<00:00, 83.02it/s]


Epoch 2/150, Training Loss: 3.2214, Learning Rate: 0.001


Epoch 3/150: 100%|██████████| 155/155 [00:01<00:00, 83.69it/s]


Epoch 3/150, Training Loss: 2.6058, Learning Rate: 0.001


Epoch 4/150: 100%|██████████| 155/155 [00:02<00:00, 62.84it/s]


Epoch 4/150, Training Loss: 2.2006, Learning Rate: 0.001


Epoch 5/150: 100%|██████████| 155/155 [00:01<00:00, 82.77it/s]


Epoch 5/150, Training Loss: 1.8574, Learning Rate: 0.001


Epoch 6/150: 100%|██████████| 155/155 [00:01<00:00, 84.85it/s]


Epoch 6/150, Training Loss: 1.6506, Learning Rate: 0.001


Epoch 7/150: 100%|██████████| 155/155 [00:01<00:00, 83.89it/s]


Epoch 7/150, Training Loss: 1.3916, Learning Rate: 0.001


Epoch 8/150: 100%|██████████| 155/155 [00:01<00:00, 82.96it/s]


Epoch 8/150, Training Loss: 1.2388, Learning Rate: 0.001


Epoch 9/150: 100%|██████████| 155/155 [00:01<00:00, 84.64it/s]


Epoch 9/150, Training Loss: 1.0843, Learning Rate: 0.001


Epoch 10/150: 100%|██████████| 155/155 [00:02<00:00, 63.35it/s]


Epoch 10/150, Training Loss: 0.9514, Learning Rate: 0.001


Epoch 11/150: 100%|██████████| 155/155 [00:01<00:00, 79.09it/s]


Epoch 11/150, Training Loss: 0.8619, Learning Rate: 0.00095


Epoch 12/150: 100%|██████████| 155/155 [00:01<00:00, 84.89it/s]


Epoch 12/150, Training Loss: 0.7469, Learning Rate: 0.0009025


Epoch 13/150: 100%|██████████| 155/155 [00:01<00:00, 84.53it/s]


Epoch 13/150, Training Loss: 0.6112, Learning Rate: 0.000857375


Epoch 14/150: 100%|██████████| 155/155 [00:01<00:00, 84.54it/s]


Epoch 14/150, Training Loss: 0.4821, Learning Rate: 0.0008145062499999999


Epoch 15/150: 100%|██████████| 155/155 [00:01<00:00, 83.90it/s]


Epoch 15/150, Training Loss: 0.4460, Learning Rate: 0.0007737809374999998


Epoch 16/150: 100%|██████████| 155/155 [00:02<00:00, 72.54it/s]


Epoch 16/150, Training Loss: 0.3721, Learning Rate: 0.0007350918906249997


Epoch 17/150: 100%|██████████| 155/155 [00:02<00:00, 72.48it/s]


Epoch 17/150, Training Loss: 0.3362, Learning Rate: 0.0006983372960937497


Epoch 18/150: 100%|██████████| 155/155 [00:01<00:00, 83.14it/s]


Epoch 18/150, Training Loss: 0.2979, Learning Rate: 0.0006634204312890621


Epoch 19/150: 100%|██████████| 155/155 [00:01<00:00, 85.81it/s]


Epoch 19/150, Training Loss: 0.2492, Learning Rate: 0.000630249409724609


Epoch 20/150: 100%|██████████| 155/155 [00:01<00:00, 82.94it/s]


Epoch 20/150, Training Loss: 0.2218, Learning Rate: 0.0005987369392383785


Epoch 21/150: 100%|██████████| 155/155 [00:01<00:00, 83.73it/s]


Epoch 21/150, Training Loss: 0.2043, Learning Rate: 0.0005688000922764595


Epoch 22/150: 100%|██████████| 155/155 [00:02<00:00, 76.62it/s]


Epoch 22/150, Training Loss: 0.1951, Learning Rate: 0.0005403600876626365


Epoch 23/150: 100%|██████████| 155/155 [00:02<00:00, 66.54it/s]


Epoch 23/150, Training Loss: 0.1865, Learning Rate: 0.0005133420832795047


Epoch 24/150: 100%|██████████| 155/155 [00:01<00:00, 83.35it/s]


Epoch 24/150, Training Loss: 0.1606, Learning Rate: 0.00048767497911552944


Epoch 25/150: 100%|██████████| 155/155 [00:01<00:00, 83.61it/s]


Epoch 25/150, Training Loss: 0.1562, Learning Rate: 0.00046329123015975297


Epoch 26/150: 100%|██████████| 155/155 [00:01<00:00, 84.96it/s]


Epoch 26/150, Training Loss: 0.1490, Learning Rate: 0.0004401266686517653


Epoch 27/150: 100%|██████████| 155/155 [00:01<00:00, 83.33it/s]


Epoch 27/150, Training Loss: 0.1343, Learning Rate: 0.00041812033521917703


Epoch 28/150: 100%|██████████| 155/155 [00:01<00:00, 83.80it/s]


Epoch 28/150, Training Loss: 0.1311, Learning Rate: 0.00039721431845821814


Epoch 29/150: 100%|██████████| 155/155 [00:02<00:00, 61.64it/s]


Epoch 29/150, Training Loss: 0.1241, Learning Rate: 0.0003773536025353072


Epoch 30/150: 100%|██████████| 155/155 [00:01<00:00, 83.91it/s]


Epoch 30/150, Training Loss: 0.1186, Learning Rate: 0.0003584859224085418


Epoch 31/150: 100%|██████████| 155/155 [00:01<00:00, 84.32it/s]


Epoch 31/150, Training Loss: 0.1078, Learning Rate: 0.0003405616262881147


Epoch 32/150: 100%|██████████| 155/155 [00:01<00:00, 83.66it/s]


Epoch 32/150, Training Loss: 0.1109, Learning Rate: 0.00032353354497370894


Epoch 33/150: 100%|██████████| 155/155 [00:01<00:00, 84.57it/s]


Epoch 33/150, Training Loss: 0.1029, Learning Rate: 0.00030735686772502346


Epoch 34/150: 100%|██████████| 155/155 [00:01<00:00, 84.21it/s]


Epoch 34/150, Training Loss: 0.0974, Learning Rate: 0.00029198902433877225


Epoch 35/150: 100%|██████████| 155/155 [00:02<00:00, 66.55it/s]


Epoch 35/150, Training Loss: 0.0967, Learning Rate: 0.00027738957312183364


Epoch 36/150: 100%|██████████| 155/155 [00:01<00:00, 77.79it/s]


Epoch 36/150, Training Loss: 0.0902, Learning Rate: 0.0002635200944657419


Epoch 37/150: 100%|██████████| 155/155 [00:01<00:00, 83.65it/s]


Epoch 37/150, Training Loss: 0.0881, Learning Rate: 0.0002503440897424548


Epoch 38/150: 100%|██████████| 155/155 [00:01<00:00, 83.75it/s]


Epoch 38/150, Training Loss: 0.0858, Learning Rate: 0.00023782688525533205


Epoch 39/150: 100%|██████████| 155/155 [00:01<00:00, 83.30it/s]


Epoch 39/150, Training Loss: 0.0845, Learning Rate: 0.00022593554099256544


Epoch 40/150: 100%|██████████| 155/155 [00:01<00:00, 83.93it/s]


Epoch 40/150, Training Loss: 0.0796, Learning Rate: 0.00021463876394293716


Epoch 41/150: 100%|██████████| 155/155 [00:02<00:00, 73.80it/s]


Epoch 41/150, Training Loss: 0.0819, Learning Rate: 0.0002039068257457903


Epoch 42/150: 100%|██████████| 155/155 [00:02<00:00, 72.66it/s]


Epoch 42/150, Training Loss: 0.0790, Learning Rate: 0.00019371148445850077


Epoch 43/150: 100%|██████████| 155/155 [00:01<00:00, 82.77it/s]


Epoch 43/150, Training Loss: 0.0781, Learning Rate: 0.00018402591023557573


Epoch 44/150: 100%|██████████| 155/155 [00:01<00:00, 84.88it/s]


Epoch 44/150, Training Loss: 0.0763, Learning Rate: 0.00017482461472379692


Epoch 45/150: 100%|██████████| 155/155 [00:01<00:00, 83.64it/s]


Epoch 45/150, Training Loss: 0.0764, Learning Rate: 0.00016608338398760707


Epoch 46/150: 100%|██████████| 155/155 [00:01<00:00, 83.49it/s]


Epoch 46/150, Training Loss: 0.0730, Learning Rate: 0.0001577792147882267


Epoch 47/150: 100%|██████████| 155/155 [00:02<00:00, 77.11it/s]


Epoch 47/150, Training Loss: 0.0657, Learning Rate: 0.00014989025404881537


Epoch 48/150: 100%|██████████| 155/155 [00:02<00:00, 67.30it/s]


Epoch 48/150, Training Loss: 0.0717, Learning Rate: 0.00014239574134637458


Epoch 49/150: 100%|██████████| 155/155 [00:01<00:00, 84.13it/s]


Epoch 49/150, Training Loss: 0.0696, Learning Rate: 0.00013527595427905584


Epoch 50/150: 100%|██████████| 155/155 [00:01<00:00, 84.43it/s]


Epoch 50/150, Training Loss: 0.0697, Learning Rate: 0.00012851215656510304


Epoch 51/150: 100%|██████████| 155/155 [00:01<00:00, 84.46it/s]


Epoch 51/150, Training Loss: 0.0697, Learning Rate: 0.00012208654873684788


Epoch 52/150: 100%|██████████| 155/155 [00:01<00:00, 83.05it/s]


Epoch 52/150, Training Loss: 0.0688, Learning Rate: 0.00011598222130000548


Epoch 53/150: 100%|██████████| 155/155 [00:01<00:00, 83.19it/s]


Epoch 53/150, Training Loss: 0.0700, Learning Rate: 0.00011018311023500519


Epoch 54/150: 100%|██████████| 155/155 [00:02<00:00, 62.34it/s]


Epoch 54/150, Training Loss: 0.0667, Learning Rate: 0.00010467395472325493


Epoch 55/150: 100%|██████████| 155/155 [00:01<00:00, 83.93it/s]


Epoch 55/150, Training Loss: 0.0643, Learning Rate: 9.944025698709218e-05


Epoch 56/150: 100%|██████████| 155/155 [00:01<00:00, 84.81it/s]


Epoch 56/150, Training Loss: 0.0648, Learning Rate: 9.446824413773756e-05


Epoch 57/150: 100%|██████████| 155/155 [00:01<00:00, 84.25it/s]


Epoch 57/150, Training Loss: 0.0634, Learning Rate: 8.974483193085068e-05


Epoch 58/150: 100%|██████████| 155/155 [00:01<00:00, 82.74it/s]


Epoch 58/150, Training Loss: 0.0636, Learning Rate: 8.525759033430814e-05


Epoch 59/150: 100%|██████████| 155/155 [00:01<00:00, 83.71it/s]


Epoch 59/150, Training Loss: 0.0626, Learning Rate: 8.099471081759274e-05


Epoch 60/150: 100%|██████████| 155/155 [00:02<00:00, 55.17it/s]


Epoch 60/150, Training Loss: 0.0606, Learning Rate: 7.69449752767131e-05


Epoch 61/150: 100%|██████████| 155/155 [00:02<00:00, 64.48it/s]


Epoch 61/150, Training Loss: 0.0607, Learning Rate: 7.309772651287744e-05


Epoch 62/150: 100%|██████████| 155/155 [00:01<00:00, 84.89it/s]


Epoch 62/150, Training Loss: 0.0634, Learning Rate: 6.944284018723356e-05


Epoch 63/150: 100%|██████████| 155/155 [00:01<00:00, 83.68it/s]


Epoch 63/150, Training Loss: 0.0638, Learning Rate: 6.597069817787189e-05


Epoch 64/150: 100%|██████████| 155/155 [00:01<00:00, 84.53it/s]


Epoch 64/150, Training Loss: 0.0637, Learning Rate: 6.267216326897829e-05


Epoch 65/150: 100%|██████████| 155/155 [00:01<00:00, 83.50it/s]


Epoch 65/150, Training Loss: 0.0592, Learning Rate: 5.953855510552937e-05


Epoch 66/150: 100%|██████████| 155/155 [00:01<00:00, 79.61it/s]


Epoch 66/150, Training Loss: 0.0586, Learning Rate: 5.65616273502529e-05


Epoch 67/150: 100%|██████████| 155/155 [00:02<00:00, 65.57it/s]


Epoch 67/150, Training Loss: 0.0633, Learning Rate: 5.373354598274025e-05


Epoch 68/150: 100%|██████████| 155/155 [00:01<00:00, 83.39it/s]


Epoch 68/150, Training Loss: 0.0593, Learning Rate: 5.104686868360323e-05


Epoch 69/150: 100%|██████████| 155/155 [00:01<00:00, 84.62it/s]


Epoch 69/150, Training Loss: 0.0604, Learning Rate: 4.849452524942307e-05


Epoch 70/150: 100%|██████████| 155/155 [00:01<00:00, 84.81it/s]


Epoch 70/150, Training Loss: 0.0592, Learning Rate: 4.606979898695191e-05


Epoch 71/150: 100%|██████████| 155/155 [00:01<00:00, 83.51it/s]


Epoch 71/150, Training Loss: 0.0584, Learning Rate: 4.376630903760431e-05


Epoch 72/150: 100%|██████████| 155/155 [00:01<00:00, 83.32it/s]


Epoch 72/150, Training Loss: 0.0594, Learning Rate: 4.157799358572409e-05


Epoch 73/150: 100%|██████████| 155/155 [00:02<00:00, 63.56it/s]


Epoch 73/150, Training Loss: 0.0593, Learning Rate: 3.9499093906437885e-05


Epoch 74/150: 100%|██████████| 155/155 [00:01<00:00, 80.90it/s]


Epoch 74/150, Training Loss: 0.0551, Learning Rate: 3.752413921111599e-05


Epoch 75/150: 100%|██████████| 155/155 [00:01<00:00, 83.99it/s]


Epoch 75/150, Training Loss: 0.0582, Learning Rate: 3.564793225056019e-05


Epoch 76/150: 100%|██████████| 155/155 [00:01<00:00, 82.88it/s]


Epoch 76/150, Training Loss: 0.0571, Learning Rate: 3.3865535638032174e-05


Epoch 77/150: 100%|██████████| 155/155 [00:01<00:00, 83.48it/s]


Epoch 77/150, Training Loss: 0.0589, Learning Rate: 3.2172258856130564e-05


Epoch 78/150: 100%|██████████| 155/155 [00:01<00:00, 83.47it/s]


Epoch 78/150, Training Loss: 0.0565, Learning Rate: 3.056364591332403e-05


Epoch 79/150: 100%|██████████| 155/155 [00:02<00:00, 71.13it/s]


Epoch 79/150, Training Loss: 0.0596, Learning Rate: 2.903546361765783e-05


Epoch 80/150: 100%|██████████| 155/155 [00:02<00:00, 71.93it/s]


Epoch 80/150, Training Loss: 0.0577, Learning Rate: 2.758369043677494e-05


Epoch 81/150: 100%|██████████| 155/155 [00:01<00:00, 82.50it/s]


Epoch 81/150, Training Loss: 0.0592, Learning Rate: 2.620450591493619e-05


Epoch 82/150: 100%|██████████| 155/155 [00:01<00:00, 81.55it/s]


Epoch 82/150, Training Loss: 0.0591, Learning Rate: 2.489428061918938e-05


Epoch 83/150: 100%|██████████| 155/155 [00:01<00:00, 83.67it/s]


Epoch 83/150, Training Loss: 0.0568, Learning Rate: 2.364956658822991e-05


Epoch 84/150: 100%|██████████| 155/155 [00:01<00:00, 84.30it/s]


Epoch 84/150, Training Loss: 0.0546, Learning Rate: 2.2467088258818413e-05


Epoch 85/150: 100%|██████████| 155/155 [00:02<00:00, 74.85it/s]


Epoch 85/150, Training Loss: 0.0602, Learning Rate: 2.134373384587749e-05


Epoch 86/150: 100%|██████████| 155/155 [00:02<00:00, 65.59it/s]


Epoch 86/150, Training Loss: 0.0582, Learning Rate: 2.0276547153583614e-05


Epoch 87/150: 100%|██████████| 155/155 [00:01<00:00, 82.37it/s]


Epoch 87/150, Training Loss: 0.0541, Learning Rate: 1.9262719795904432e-05


Epoch 88/150: 100%|██████████| 155/155 [00:01<00:00, 81.45it/s]


Epoch 88/150, Training Loss: 0.0564, Learning Rate: 1.829958380610921e-05


Epoch 89/150: 100%|██████████| 155/155 [00:01<00:00, 82.76it/s]


Epoch 89/150, Training Loss: 0.0570, Learning Rate: 1.738460461580375e-05


Epoch 90/150: 100%|██████████| 155/155 [00:01<00:00, 83.96it/s]


Epoch 90/150, Training Loss: 0.0592, Learning Rate: 1.6515374385013564e-05


Epoch 91/150: 100%|██████████| 155/155 [00:04<00:00, 35.03it/s]


Epoch 91/150, Training Loss: 0.0555, Learning Rate: 1.5689605665762886e-05


Epoch 92/150: 100%|██████████| 155/155 [00:01<00:00, 81.81it/s]


Epoch 92/150, Training Loss: 0.0571, Learning Rate: 1.490512538247474e-05


Epoch 93/150: 100%|██████████| 155/155 [00:01<00:00, 82.62it/s]


Epoch 93/150, Training Loss: 0.0564, Learning Rate: 1.4159869113351003e-05


Epoch 94/150: 100%|██████████| 155/155 [00:01<00:00, 82.55it/s]


Epoch 94/150, Training Loss: 0.0550, Learning Rate: 1.3451875657683452e-05


Epoch 95/150: 100%|██████████| 155/155 [00:01<00:00, 82.91it/s]


Epoch 95/150, Training Loss: 0.0549, Learning Rate: 1.277928187479928e-05


Epoch 96/150: 100%|██████████| 155/155 [00:01<00:00, 82.55it/s]


Epoch 96/150, Training Loss: 0.0568, Learning Rate: 1.2140317781059316e-05


Epoch 97/150: 100%|██████████| 155/155 [00:02<00:00, 67.49it/s]


Epoch 97/150, Training Loss: 0.0554, Learning Rate: 1.153330189200635e-05


Epoch 98/150: 100%|██████████| 155/155 [00:02<00:00, 76.29it/s]


Epoch 98/150, Training Loss: 0.0548, Learning Rate: 1.0956636797406032e-05


Epoch 99/150: 100%|██████████| 155/155 [00:01<00:00, 82.34it/s]


Epoch 99/150, Training Loss: 0.0566, Learning Rate: 1.0408804957535729e-05


Epoch 100/150: 100%|██████████| 155/155 [00:01<00:00, 82.76it/s]


Epoch 100/150, Training Loss: 0.0561, Learning Rate: 9.888364709658941e-06


Epoch 101/150: 100%|██████████| 155/155 [00:01<00:00, 81.64it/s]


Epoch 101/150, Training Loss: 0.0553, Learning Rate: 9.393946474175994e-06


Epoch 102/150: 100%|██████████| 155/155 [00:01<00:00, 81.52it/s]


Epoch 102/150, Training Loss: 0.0577, Learning Rate: 8.924249150467194e-06


Epoch 103/150: 100%|██████████| 155/155 [00:02<00:00, 66.16it/s]


Epoch 103/150, Training Loss: 0.0544, Learning Rate: 8.478036692943835e-06


Epoch 104/150: 100%|██████████| 155/155 [00:02<00:00, 72.43it/s]


Epoch 104/150, Training Loss: 0.0526, Learning Rate: 8.054134858296643e-06


Epoch 105/150: 100%|██████████| 155/155 [00:01<00:00, 82.47it/s]


Epoch 105/150, Training Loss: 0.0553, Learning Rate: 7.65142811538181e-06


Epoch 106/150: 100%|██████████| 155/155 [00:01<00:00, 82.36it/s]


Epoch 106/150, Training Loss: 0.0589, Learning Rate: 7.26885670961272e-06


Epoch 107/150: 100%|██████████| 155/155 [00:01<00:00, 83.05it/s]


Epoch 107/150, Training Loss: 0.0583, Learning Rate: 6.905413874132084e-06


Epoch 108/150: 100%|██████████| 155/155 [00:01<00:00, 81.78it/s]


Epoch 108/150, Training Loss: 0.0552, Learning Rate: 6.5601431804254795e-06


Epoch 109/150: 100%|██████████| 155/155 [00:02<00:00, 73.12it/s]


Epoch 109/150, Training Loss: 0.0542, Learning Rate: 6.232136021404205e-06


Epoch 110/150: 100%|██████████| 155/155 [00:02<00:00, 68.16it/s]


Epoch 110/150, Training Loss: 0.0536, Learning Rate: 5.920529220333994e-06


Epoch 111/150: 100%|██████████| 155/155 [00:01<00:00, 83.47it/s]


Epoch 111/150, Training Loss: 0.0561, Learning Rate: 5.624502759317295e-06


Epoch 112/150: 100%|██████████| 155/155 [00:01<00:00, 83.96it/s]


Epoch 112/150, Training Loss: 0.0547, Learning Rate: 5.34327762135143e-06


Epoch 113/150: 100%|██████████| 155/155 [00:01<00:00, 83.46it/s]


Epoch 113/150, Training Loss: 0.0558, Learning Rate: 5.076113740283858e-06


Epoch 114/150: 100%|██████████| 155/155 [00:01<00:00, 82.60it/s]


Epoch 114/150, Training Loss: 0.0557, Learning Rate: 4.8223080532696655e-06


Epoch 115/150: 100%|██████████| 155/155 [00:01<00:00, 78.79it/s]


Epoch 115/150, Training Loss: 0.0536, Learning Rate: 4.581192650606182e-06


Epoch 116/150: 100%|██████████| 155/155 [00:02<00:00, 65.06it/s]


Epoch 116/150, Training Loss: 0.0555, Learning Rate: 4.3521330180758725e-06


Epoch 117/150: 100%|██████████| 155/155 [00:01<00:00, 83.25it/s]


Epoch 117/150, Training Loss: 0.0544, Learning Rate: 4.1345263671720786e-06


Epoch 118/150: 100%|██████████| 155/155 [00:01<00:00, 82.29it/s]


Epoch 118/150, Training Loss: 0.0533, Learning Rate: 3.927800048813474e-06


Epoch 119/150: 100%|██████████| 155/155 [00:01<00:00, 82.74it/s]


Epoch 119/150, Training Loss: 0.0538, Learning Rate: 3.7314100463728006e-06


Epoch 120/150: 100%|██████████| 155/155 [00:01<00:00, 83.05it/s]


Epoch 120/150, Training Loss: 0.0526, Learning Rate: 3.5448395440541604e-06


Epoch 121/150: 100%|██████████| 155/155 [00:01<00:00, 82.46it/s]


Epoch 121/150, Training Loss: 0.0523, Learning Rate: 3.3675975668514524e-06


Epoch 122/150: 100%|██████████| 155/155 [00:02<00:00, 61.27it/s]


Epoch 122/150, Training Loss: 0.0553, Learning Rate: 3.1992176885088796e-06


Epoch 123/150: 100%|██████████| 155/155 [00:01<00:00, 83.32it/s]


Epoch 123/150, Training Loss: 0.0571, Learning Rate: 3.0392568040834356e-06


Epoch 124/150: 100%|██████████| 155/155 [00:01<00:00, 80.63it/s]


Epoch 124/150, Training Loss: 0.0555, Learning Rate: 2.8872939638792635e-06


Epoch 125/150: 100%|██████████| 155/155 [00:01<00:00, 83.20it/s]


Epoch 125/150, Training Loss: 0.0559, Learning Rate: 2.7429292656853003e-06


Epoch 126/150: 100%|██████████| 155/155 [00:01<00:00, 82.85it/s]


Epoch 126/150, Training Loss: 0.0528, Learning Rate: 2.605782802401035e-06


Epoch 127/150: 100%|██████████| 155/155 [00:01<00:00, 82.46it/s]


Epoch 127/150, Training Loss: 0.0551, Learning Rate: 2.475493662280983e-06


Epoch 128/150: 100%|██████████| 155/155 [00:02<00:00, 64.93it/s]


Epoch 128/150, Training Loss: 0.0540, Learning Rate: 2.351718979166934e-06


Epoch 129/150: 100%|██████████| 155/155 [00:01<00:00, 78.58it/s]


Epoch 129/150, Training Loss: 0.0556, Learning Rate: 2.234133030208587e-06


Epoch 130/150: 100%|██████████| 155/155 [00:01<00:00, 83.38it/s]


Epoch 130/150, Training Loss: 0.0546, Learning Rate: 2.1224263786981576e-06


Epoch 131/150: 100%|██████████| 155/155 [00:01<00:00, 83.43it/s]


Epoch 131/150, Training Loss: 0.0545, Learning Rate: 2.0163050597632494e-06


Epoch 132/150: 100%|██████████| 155/155 [00:01<00:00, 83.59it/s]


Epoch 132/150, Training Loss: 0.0554, Learning Rate: 1.915489806775087e-06


Epoch 133/150: 100%|██████████| 155/155 [00:01<00:00, 82.07it/s]


Epoch 133/150, Training Loss: 0.0551, Learning Rate: 1.8197153164363325e-06


Epoch 134/150: 100%|██████████| 155/155 [00:02<00:00, 70.13it/s]


Epoch 134/150, Training Loss: 0.0531, Learning Rate: 1.7287295506145157e-06


Epoch 135/150: 100%|██████████| 155/155 [00:02<00:00, 72.58it/s]


Epoch 135/150, Training Loss: 0.0546, Learning Rate: 1.6422930730837899e-06


Epoch 136/150: 100%|██████████| 155/155 [00:01<00:00, 82.95it/s]


Epoch 136/150, Training Loss: 0.0575, Learning Rate: 1.5601784194296004e-06


Epoch 137/150: 100%|██████████| 155/155 [00:01<00:00, 82.91it/s]


Epoch 137/150, Training Loss: 0.0547, Learning Rate: 1.4821694984581202e-06


Epoch 138/150: 100%|██████████| 155/155 [00:01<00:00, 82.65it/s]


Epoch 138/150, Training Loss: 0.0529, Learning Rate: 1.4080610235352142e-06


Epoch 139/150: 100%|██████████| 155/155 [00:01<00:00, 84.11it/s]


Epoch 139/150, Training Loss: 0.0565, Learning Rate: 1.3376579723584535e-06


Epoch 140/150: 100%|██████████| 155/155 [00:02<00:00, 74.55it/s]


Epoch 140/150, Training Loss: 0.0512, Learning Rate: 1.2707750737405307e-06


Epoch 141/150: 100%|██████████| 155/155 [00:02<00:00, 67.73it/s]


Epoch 141/150, Training Loss: 0.0548, Learning Rate: 1.2072363200535042e-06


Epoch 142/150: 100%|██████████| 155/155 [00:01<00:00, 84.03it/s]


Epoch 142/150, Training Loss: 0.0547, Learning Rate: 1.146874504050829e-06


Epoch 143/150: 100%|██████████| 155/155 [00:01<00:00, 83.55it/s]


Epoch 143/150, Training Loss: 0.0560, Learning Rate: 1.0895307788482876e-06


Epoch 144/150: 100%|██████████| 155/155 [00:01<00:00, 82.09it/s]


Epoch 144/150, Training Loss: 0.0528, Learning Rate: 1.0350542399058731e-06


Epoch 145/150: 100%|██████████| 155/155 [00:01<00:00, 82.56it/s]


Epoch 145/150, Training Loss: 0.0532, Learning Rate: 9.833015279105794e-07


Epoch 146/150: 100%|██████████| 155/155 [00:01<00:00, 79.76it/s]


Epoch 146/150, Training Loss: 0.0556, Learning Rate: 9.341364515150503e-07


Epoch 147/150: 100%|██████████| 155/155 [00:02<00:00, 64.68it/s]


Epoch 147/150, Training Loss: 0.0544, Learning Rate: 8.874296289392978e-07


Epoch 148/150: 100%|██████████| 155/155 [00:01<00:00, 82.20it/s]


Epoch 148/150, Training Loss: 0.0556, Learning Rate: 8.430581474923329e-07


Epoch 149/150: 100%|██████████| 155/155 [00:01<00:00, 82.80it/s]


Epoch 149/150, Training Loss: 0.0553, Learning Rate: 8.009052401177162e-07


Epoch 150/150: 100%|██████████| 155/155 [00:01<00:00, 81.21it/s]

Epoch 150/150, Training Loss: 0.0518, Learning Rate: 7.608599781118303e-07





In [188]:
# Evaluating on the test set to get accuracy
query_transformer_sql_pipe = SQLPipeline(dataset=query_context_test_dataset,model=query_trained_transformer_model,embed_type="context")
query_transformer_results, query_transformer_acc = query_transformer_sql_pipe.batch_predict(dataloader=query_context_test_loader)
print(f"\nAccuracy of Transformer model on query split test set: {query_lstm_acc:.2%}")

Generating SQL queries: 100%|██████████| 11/11 [00:00<00:00, 11.89it/s]


Accuracy of Transformer model on query split test set: 0.00%





# Experiment

For our experiment, we wanted look how each of our 4 tagging and classification models perform when noise is introduced in the input text in the form of typos or rearranging words in the input sentence.

**Methodology**
* To do this, we took all 4 models and trained them using the static embeddings we used for our Linear models. Linear models did not need re-training as they were already trained on the static embeddings. The reason to use to static embeddings was to keep training data consistent between all 4 models and also so that models do not have access to contextual information in the input.
* We then evaluated the all 4 models on the question split test set to record queries which each model tagged correctly and queries which each model classified correctly
* These queries were then incrementally altered slightly by introducing typos and changing order of the words
* All models were evaluated again on these modified queries and the results were compared against the unmodified version of these queries

In [162]:
# Evaluating Linear Models on the question split test set to get results
question_static_linear_sql_pipe = SQLPipeline(dataset=question_static_test_dataset,model=list(question_trained_linear_models),embed_type="static")
question_static_linear_results, question_static_linear_acc = question_static_linear_sql_pipe.batch_predict(dataloader=question_static_test_loader)

print(f"\nAccuracy of Linear model on question split test set: {question_static_linear_acc:.2%}")

Generating SQL queries: 100%|██████████| 14/14 [00:01<00:00, 13.87it/s]


Accuracy of Linear model on question split test set: 11.19%





In [109]:
# Training feedforward models using static embeddings from the question split training and validation set
question_static_ff_tag_model = FFTagger(input_dim=question_static_train_val_dataset.get_vector_dim(),
    tag_vocab_size=question_static_train_val_dataset.get_tag_vocab_size(),
    hidden_sizes=[512,256,128],
    dropout=0.2
  )
question_static_ff_classifier_model = FFClassifier(
    input_dim=question_static_train_val_dataset.get_vector_dim(),
    sql_vocab_size=question_static_train_val_dataset.get_sql_vocab_size(),
    hidden_sizes=[512,256,128],
    dropout=0.2
    )
question_static_trained_ff_model = train_dual_model(
    tagger_model=question_static_ff_tag_model,
    classifier_model=question_static_ff_classifier_model,
    train_loader=question_static_train_val_loader,
    epochs=50,
    warmup_epochs=15,
    gamma=0.95
)

Epoch 1/50: 100%|██████████| 152/152 [00:03<00:00, 39.66it/s]


Epoch 1/50, Training Loss: 5.4730, Learning Rate: 0.001


Epoch 2/50: 100%|██████████| 152/152 [00:04<00:00, 34.57it/s]


Epoch 2/50, Training Loss: 4.4822, Learning Rate: 0.001


Epoch 3/50: 100%|██████████| 152/152 [00:04<00:00, 32.82it/s]


Epoch 3/50, Training Loss: 3.8196, Learning Rate: 0.001


Epoch 4/50: 100%|██████████| 152/152 [00:03<00:00, 40.49it/s]


Epoch 4/50, Training Loss: 3.4348, Learning Rate: 0.001


Epoch 5/50: 100%|██████████| 152/152 [00:04<00:00, 32.28it/s]


Epoch 5/50, Training Loss: 2.9856, Learning Rate: 0.001


Epoch 6/50: 100%|██████████| 152/152 [00:03<00:00, 40.60it/s]


Epoch 6/50, Training Loss: 2.7014, Learning Rate: 0.001


Epoch 7/50: 100%|██████████| 152/152 [00:03<00:00, 40.57it/s]


Epoch 7/50, Training Loss: 2.6351, Learning Rate: 0.001


Epoch 8/50: 100%|██████████| 152/152 [00:04<00:00, 32.46it/s]


Epoch 8/50, Training Loss: 2.3778, Learning Rate: 0.001


Epoch 9/50: 100%|██████████| 152/152 [00:03<00:00, 39.95it/s]


Epoch 9/50, Training Loss: 2.2089, Learning Rate: 0.001


Epoch 10/50: 100%|██████████| 152/152 [00:03<00:00, 40.04it/s]


Epoch 10/50, Training Loss: 1.9546, Learning Rate: 0.001


Epoch 11/50: 100%|██████████| 152/152 [00:04<00:00, 32.34it/s]


Epoch 11/50, Training Loss: 1.8202, Learning Rate: 0.001


Epoch 12/50: 100%|██████████| 152/152 [00:03<00:00, 40.33it/s]


Epoch 12/50, Training Loss: 1.6739, Learning Rate: 0.001


Epoch 13/50: 100%|██████████| 152/152 [00:03<00:00, 40.84it/s]


Epoch 13/50, Training Loss: 1.5908, Learning Rate: 0.001


Epoch 14/50: 100%|██████████| 152/152 [00:04<00:00, 32.03it/s]


Epoch 14/50, Training Loss: 1.5115, Learning Rate: 0.001


Epoch 15/50: 100%|██████████| 152/152 [00:03<00:00, 40.54it/s]


Epoch 15/50, Training Loss: 1.3902, Learning Rate: 0.001


Epoch 16/50: 100%|██████████| 152/152 [00:03<00:00, 40.52it/s]


Epoch 16/50, Training Loss: 1.2999, Learning Rate: 0.00095


Epoch 17/50: 100%|██████████| 152/152 [00:04<00:00, 31.72it/s]


Epoch 17/50, Training Loss: 1.2301, Learning Rate: 0.0009025


Epoch 18/50: 100%|██████████| 152/152 [00:03<00:00, 40.40it/s]


Epoch 18/50, Training Loss: 1.1483, Learning Rate: 0.000857375


Epoch 19/50: 100%|██████████| 152/152 [00:03<00:00, 40.38it/s]


Epoch 19/50, Training Loss: 1.1292, Learning Rate: 0.0008145062499999999


Epoch 20/50: 100%|██████████| 152/152 [00:04<00:00, 30.97it/s]


Epoch 20/50, Training Loss: 0.9808, Learning Rate: 0.0007737809374999998


Epoch 21/50: 100%|██████████| 152/152 [00:03<00:00, 40.34it/s]


Epoch 21/50, Training Loss: 0.9567, Learning Rate: 0.0007350918906249997


Epoch 22/50: 100%|██████████| 152/152 [00:03<00:00, 40.07it/s]


Epoch 22/50, Training Loss: 0.9629, Learning Rate: 0.0006983372960937497


Epoch 23/50: 100%|██████████| 152/152 [00:05<00:00, 25.69it/s]


Epoch 23/50, Training Loss: 0.8173, Learning Rate: 0.0006634204312890621


Epoch 24/50: 100%|██████████| 152/152 [00:03<00:00, 39.75it/s]


Epoch 24/50, Training Loss: 0.8015, Learning Rate: 0.000630249409724609


Epoch 25/50: 100%|██████████| 152/152 [00:03<00:00, 39.49it/s]


Epoch 25/50, Training Loss: 0.7297, Learning Rate: 0.0005987369392383785


Epoch 26/50: 100%|██████████| 152/152 [00:04<00:00, 31.45it/s]


Epoch 26/50, Training Loss: 0.7217, Learning Rate: 0.0005688000922764595


Epoch 27/50: 100%|██████████| 152/152 [00:03<00:00, 40.33it/s]


Epoch 27/50, Training Loss: 0.6726, Learning Rate: 0.0005403600876626365


Epoch 28/50: 100%|██████████| 152/152 [00:03<00:00, 40.04it/s]


Epoch 28/50, Training Loss: 0.6445, Learning Rate: 0.0005133420832795047


Epoch 29/50: 100%|██████████| 152/152 [00:04<00:00, 31.39it/s]


Epoch 29/50, Training Loss: 0.6445, Learning Rate: 0.00048767497911552944


Epoch 30/50: 100%|██████████| 152/152 [00:03<00:00, 39.19it/s]


Epoch 30/50, Training Loss: 0.5998, Learning Rate: 0.00046329123015975297


Epoch 31/50: 100%|██████████| 152/152 [00:03<00:00, 38.31it/s]


Epoch 31/50, Training Loss: 0.5822, Learning Rate: 0.0004401266686517653


Epoch 32/50: 100%|██████████| 152/152 [00:04<00:00, 30.60it/s]


Epoch 32/50, Training Loss: 0.5363, Learning Rate: 0.00041812033521917703


Epoch 33/50: 100%|██████████| 152/152 [00:03<00:00, 38.69it/s]


Epoch 33/50, Training Loss: 0.5242, Learning Rate: 0.00039721431845821814


Epoch 34/50: 100%|██████████| 152/152 [00:03<00:00, 38.59it/s]


Epoch 34/50, Training Loss: 0.5317, Learning Rate: 0.0003773536025353072


Epoch 35/50: 100%|██████████| 152/152 [00:04<00:00, 30.95it/s]


Epoch 35/50, Training Loss: 0.5158, Learning Rate: 0.0003584859224085418


Epoch 36/50: 100%|██████████| 152/152 [00:03<00:00, 38.48it/s]


Epoch 36/50, Training Loss: 0.4781, Learning Rate: 0.0003405616262881147


Epoch 37/50: 100%|██████████| 152/152 [00:03<00:00, 38.04it/s]


Epoch 37/50, Training Loss: 0.4867, Learning Rate: 0.00032353354497370894


Epoch 38/50: 100%|██████████| 152/152 [00:04<00:00, 31.65it/s]


Epoch 38/50, Training Loss: 0.4690, Learning Rate: 0.00030735686772502346


Epoch 39/50: 100%|██████████| 152/152 [00:03<00:00, 39.13it/s]


Epoch 39/50, Training Loss: 0.4587, Learning Rate: 0.00029198902433877225


Epoch 40/50: 100%|██████████| 152/152 [00:03<00:00, 38.45it/s]


Epoch 40/50, Training Loss: 0.4243, Learning Rate: 0.00027738957312183364


Epoch 41/50: 100%|██████████| 152/152 [00:04<00:00, 32.88it/s]


Epoch 41/50, Training Loss: 0.4230, Learning Rate: 0.0002635200944657419


Epoch 42/50: 100%|██████████| 152/152 [00:03<00:00, 39.26it/s]


Epoch 42/50, Training Loss: 0.4194, Learning Rate: 0.0002503440897424548


Epoch 43/50: 100%|██████████| 152/152 [00:04<00:00, 35.70it/s]


Epoch 43/50, Training Loss: 0.3907, Learning Rate: 0.00023782688525533205


Epoch 44/50: 100%|██████████| 152/152 [00:04<00:00, 33.78it/s]


Epoch 44/50, Training Loss: 0.3929, Learning Rate: 0.00022593554099256544


Epoch 45/50: 100%|██████████| 152/152 [00:03<00:00, 38.46it/s]


Epoch 45/50, Training Loss: 0.4105, Learning Rate: 0.00021463876394293716


Epoch 46/50: 100%|██████████| 152/152 [00:04<00:00, 32.37it/s]


Epoch 46/50, Training Loss: 0.4379, Learning Rate: 0.0002039068257457903


Epoch 47/50: 100%|██████████| 152/152 [00:04<00:00, 36.66it/s]


Epoch 47/50, Training Loss: 0.3735, Learning Rate: 0.00019371148445850077


Epoch 48/50: 100%|██████████| 152/152 [00:03<00:00, 38.65it/s]


Epoch 48/50, Training Loss: 0.3670, Learning Rate: 0.00018402591023557573


Epoch 49/50: 100%|██████████| 152/152 [00:04<00:00, 31.69it/s]


Epoch 49/50, Training Loss: 0.3589, Learning Rate: 0.00017482461472379692


Epoch 50/50: 100%|██████████| 152/152 [00:03<00:00, 39.69it/s]

Epoch 50/50, Training Loss: 0.3357, Learning Rate: 0.00016608338398760707





In [163]:
# Evaluating Feedforward models on the test set
question_static_ff_sql_pipe = SQLPipeline(dataset=question_static_test_dataset,model=list(question_static_trained_ff_model),embed_type="static")
question_static_ff_results, question_static_ff_acc = question_static_ff_sql_pipe.batch_predict(dataloader=question_static_test_loader)
# question_static_ff_sql_pipe = SQLPipeline(dataset=question_static_train_val_dataset,model=list(question_static_trained_ff_model),embed_type="static")
# question_static_ff_results, question_static_ff_acc = question_static_ff_sql_pipe.batch_predict(dataloader=question_static_train_val_loader)
print(f"\nAccuracy of Feedforward model on question split test set: {question_static_ff_acc:.2%}")

Generating SQL queries: 100%|██████████| 14/14 [00:01<00:00,  9.84it/s]


Accuracy of Feedforward model on question split test set: 11.63%





In [111]:
# Training LSTM model using static embeddings from the question split traing and validation set
question_static_lstm_model = LSTMTaggerClassifer(
    input_dim=question_static_train_val_dataset.get_vector_dim(),
    hidden_dim=256,
    tag_vocab_size=question_static_train_val_dataset.get_tag_vocab_size(),
    sql_vocab_size=question_static_train_val_dataset.get_sql_vocab_size(),
    num_layers=3
)
# Training
question_static_trained_lstm_model = train_model(
    model=question_static_lstm_model,
    train_loader=question_static_train_val_loader,
    epochs=50,
    lr=1e-3,
    weight_decay=0.1,
    amsgrad=True,
    gamma=0.99,
    warmup_epochs=25
)

Epoch 1/50: 100%|██████████| 152/152 [00:06<00:00, 25.30it/s]


Epoch 1/50, Training Loss: 5.8671, Learning Rate: 0.001


Epoch 2/50: 100%|██████████| 152/152 [00:06<00:00, 23.84it/s]


Epoch 2/50, Training Loss: 4.3318, Learning Rate: 0.001


Epoch 3/50: 100%|██████████| 152/152 [00:06<00:00, 24.81it/s]


Epoch 3/50, Training Loss: 3.4032, Learning Rate: 0.001


Epoch 4/50: 100%|██████████| 152/152 [00:06<00:00, 24.25it/s]


Epoch 4/50, Training Loss: 2.6017, Learning Rate: 0.001


Epoch 5/50: 100%|██████████| 152/152 [00:06<00:00, 24.36it/s]


Epoch 5/50, Training Loss: 1.9425, Learning Rate: 0.001


Epoch 6/50: 100%|██████████| 152/152 [00:06<00:00, 25.00it/s]


Epoch 6/50, Training Loss: 1.4956, Learning Rate: 0.001


Epoch 7/50: 100%|██████████| 152/152 [00:06<00:00, 24.36it/s]


Epoch 7/50, Training Loss: 1.1529, Learning Rate: 0.001


Epoch 8/50: 100%|██████████| 152/152 [00:06<00:00, 24.58it/s]


Epoch 8/50, Training Loss: 0.8898, Learning Rate: 0.001


Epoch 9/50: 100%|██████████| 152/152 [00:06<00:00, 23.83it/s]


Epoch 9/50, Training Loss: 0.6556, Learning Rate: 0.001


Epoch 10/50: 100%|██████████| 152/152 [00:06<00:00, 24.78it/s]


Epoch 10/50, Training Loss: 0.5166, Learning Rate: 0.001


Epoch 11/50: 100%|██████████| 152/152 [00:06<00:00, 23.12it/s]


Epoch 11/50, Training Loss: 0.4236, Learning Rate: 0.001


Epoch 12/50: 100%|██████████| 152/152 [00:05<00:00, 25.88it/s]


Epoch 12/50, Training Loss: 0.4384, Learning Rate: 0.001


Epoch 13/50: 100%|██████████| 152/152 [00:06<00:00, 22.88it/s]


Epoch 13/50, Training Loss: 0.2997, Learning Rate: 0.001


Epoch 14/50: 100%|██████████| 152/152 [00:05<00:00, 25.96it/s]


Epoch 14/50, Training Loss: 0.2321, Learning Rate: 0.001


Epoch 15/50: 100%|██████████| 152/152 [00:06<00:00, 22.89it/s]


Epoch 15/50, Training Loss: 0.2057, Learning Rate: 0.001


Epoch 16/50: 100%|██████████| 152/152 [00:05<00:00, 26.13it/s]


Epoch 16/50, Training Loss: 0.1842, Learning Rate: 0.001


Epoch 17/50: 100%|██████████| 152/152 [00:06<00:00, 23.00it/s]


Epoch 17/50, Training Loss: 0.1632, Learning Rate: 0.001


Epoch 18/50: 100%|██████████| 152/152 [00:05<00:00, 26.32it/s]


Epoch 18/50, Training Loss: 0.1554, Learning Rate: 0.001


Epoch 19/50: 100%|██████████| 152/152 [00:06<00:00, 22.93it/s]


Epoch 19/50, Training Loss: 0.1484, Learning Rate: 0.001


Epoch 20/50: 100%|██████████| 152/152 [00:05<00:00, 26.07it/s]


Epoch 20/50, Training Loss: 0.1408, Learning Rate: 0.001


Epoch 21/50: 100%|██████████| 152/152 [00:06<00:00, 22.89it/s]


Epoch 21/50, Training Loss: 0.1235, Learning Rate: 0.001


Epoch 22/50: 100%|██████████| 152/152 [00:05<00:00, 26.29it/s]


Epoch 22/50, Training Loss: 0.1111, Learning Rate: 0.001


Epoch 23/50: 100%|██████████| 152/152 [00:06<00:00, 21.88it/s]


Epoch 23/50, Training Loss: 0.0973, Learning Rate: 0.001


Epoch 24/50: 100%|██████████| 152/152 [00:06<00:00, 23.99it/s]


Epoch 24/50, Training Loss: 0.0893, Learning Rate: 0.001


Epoch 25/50: 100%|██████████| 152/152 [00:06<00:00, 22.78it/s]


Epoch 25/50, Training Loss: 0.0824, Learning Rate: 0.001


Epoch 26/50: 100%|██████████| 152/152 [00:05<00:00, 26.30it/s]


Epoch 26/50, Training Loss: 0.0919, Learning Rate: 0.00099


Epoch 27/50: 100%|██████████| 152/152 [00:06<00:00, 22.98it/s]


Epoch 27/50, Training Loss: 0.0915, Learning Rate: 0.0009801


Epoch 28/50: 100%|██████████| 152/152 [00:05<00:00, 26.53it/s]


Epoch 28/50, Training Loss: 0.0724, Learning Rate: 0.000970299


Epoch 29/50: 100%|██████████| 152/152 [00:06<00:00, 22.91it/s]


Epoch 29/50, Training Loss: 0.0657, Learning Rate: 0.0009605960099999999


Epoch 30/50: 100%|██████████| 152/152 [00:05<00:00, 26.19it/s]


Epoch 30/50, Training Loss: 0.0596, Learning Rate: 0.0009509900498999999


Epoch 31/50: 100%|██████████| 152/152 [00:06<00:00, 22.90it/s]


Epoch 31/50, Training Loss: 0.0620, Learning Rate: 0.0009414801494009999


Epoch 32/50: 100%|██████████| 152/152 [00:05<00:00, 26.27it/s]


Epoch 32/50, Training Loss: 0.0536, Learning Rate: 0.0009320653479069899


Epoch 33/50: 100%|██████████| 152/152 [00:06<00:00, 22.87it/s]


Epoch 33/50, Training Loss: 0.0473, Learning Rate: 0.00092274469442792


Epoch 34/50: 100%|██████████| 152/152 [00:05<00:00, 26.33it/s]


Epoch 34/50, Training Loss: 0.0594, Learning Rate: 0.0009135172474836408


Epoch 35/50: 100%|██████████| 152/152 [00:06<00:00, 22.82it/s]


Epoch 35/50, Training Loss: 0.0973, Learning Rate: 0.0009043820750088043


Epoch 36/50: 100%|██████████| 152/152 [00:05<00:00, 26.25it/s]


Epoch 36/50, Training Loss: 0.0658, Learning Rate: 0.0008953382542587163


Epoch 37/50: 100%|██████████| 152/152 [00:06<00:00, 22.83it/s]


Epoch 37/50, Training Loss: 0.0514, Learning Rate: 0.0008863848717161291


Epoch 38/50: 100%|██████████| 152/152 [00:05<00:00, 26.14it/s]


Epoch 38/50, Training Loss: 0.0475, Learning Rate: 0.0008775210229989678


Epoch 39/50: 100%|██████████| 152/152 [00:06<00:00, 22.68it/s]


Epoch 39/50, Training Loss: 0.0409, Learning Rate: 0.0008687458127689781


Epoch 40/50: 100%|██████████| 152/152 [00:05<00:00, 26.13it/s]


Epoch 40/50, Training Loss: 0.0326, Learning Rate: 0.0008600583546412883


Epoch 41/50: 100%|██████████| 152/152 [00:06<00:00, 22.80it/s]


Epoch 41/50, Training Loss: 0.0314, Learning Rate: 0.0008514577710948754


Epoch 42/50: 100%|██████████| 152/152 [00:05<00:00, 26.40it/s]


Epoch 42/50, Training Loss: 0.0286, Learning Rate: 0.0008429431933839266


Epoch 43/50: 100%|██████████| 152/152 [00:06<00:00, 23.12it/s]


Epoch 43/50, Training Loss: 0.0319, Learning Rate: 0.0008345137614500873


Epoch 44/50: 100%|██████████| 152/152 [00:05<00:00, 26.47it/s]


Epoch 44/50, Training Loss: 0.0285, Learning Rate: 0.0008261686238355864


Epoch 45/50: 100%|██████████| 152/152 [00:06<00:00, 23.11it/s]


Epoch 45/50, Training Loss: 0.0311, Learning Rate: 0.0008179069375972306


Epoch 46/50: 100%|██████████| 152/152 [00:05<00:00, 26.43it/s]


Epoch 46/50, Training Loss: 0.0286, Learning Rate: 0.0008097278682212583


Epoch 47/50: 100%|██████████| 152/152 [00:06<00:00, 22.65it/s]


Epoch 47/50, Training Loss: 0.0252, Learning Rate: 0.0008016305895390457


Epoch 48/50: 100%|██████████| 152/152 [00:05<00:00, 25.78it/s]


Epoch 48/50, Training Loss: 0.0266, Learning Rate: 0.0007936142836436553


Epoch 49/50: 100%|██████████| 152/152 [00:06<00:00, 22.98it/s]


Epoch 49/50, Training Loss: 0.0271, Learning Rate: 0.0007856781408072188


Epoch 50/50: 100%|██████████| 152/152 [00:05<00:00, 26.40it/s]

Epoch 50/50, Training Loss: 0.0283, Learning Rate: 0.0007778213593991466





In [164]:
# Evaluating LSTM model on the test set
question_static_lstm_sql_pipe = SQLPipeline(dataset=question_static_test_dataset,model=question_static_trained_lstm_model,embed_type="static")
question_static_lstm_results, question_static_lstm_acc = question_static_lstm_sql_pipe.batch_predict(dataloader=question_static_test_loader)
# question_static_lstm_sql_pipe = SQLPipeline(dataset=question_static_train_val_dataset,model=question_static_trained_lstm_model,embed_type="static")
# question_static_lstm_results, question_static_lstm_acc = question_static_lstm_sql_pipe.batch_predict(dataloader=question_static_train_val_loader)
print(f"\nAccuracy of LSTM model on question split test set: {question_static_lstm_acc:.2%}")

Generating SQL queries: 100%|██████████| 14/14 [00:01<00:00,  7.03it/s]


Accuracy of LSTM model on question split test set: 33.33%





In [240]:
# Training Transformer model using static embeddings from the question split training and validation set
question_static_transformer_model = TransformerTaggerClassifier(
    input_dim=question_static_train_val_dataset.get_vector_dim(),
    hidden_dim=256,
    num_heads=8,
    num_layers=2,
    tag_vocab_size=question_static_train_val_dataset.get_tag_vocab_size(),
    sql_vocab_size=question_static_train_val_dataset.get_sql_vocab_size()
)
# Training
question_static_trained_transformer_model = train_model(
    model=question_static_transformer_model,
    train_loader=question_static_train_val_loader,
    weight_decay=0.01,
    epochs=100,
    amsgrad=True,
    gamma=0.90,
    lr=1e-3,
    warmup_epochs=50
)

Epoch 1/100: 100%|██████████| 152/152 [00:05<00:00, 25.78it/s]


Epoch 1/100, Training Loss: 5.1272, Learning Rate: 0.001


Epoch 2/100: 100%|██████████| 152/152 [00:05<00:00, 27.28it/s]


Epoch 2/100, Training Loss: 3.7137, Learning Rate: 0.001


Epoch 3/100: 100%|██████████| 152/152 [00:05<00:00, 25.89it/s]


Epoch 3/100, Training Loss: 2.9545, Learning Rate: 0.001


Epoch 4/100: 100%|██████████| 152/152 [00:05<00:00, 28.06it/s]


Epoch 4/100, Training Loss: 2.5270, Learning Rate: 0.001


Epoch 5/100: 100%|██████████| 152/152 [00:05<00:00, 28.93it/s]


Epoch 5/100, Training Loss: 2.2148, Learning Rate: 0.001


Epoch 6/100: 100%|██████████| 152/152 [00:05<00:00, 27.55it/s]


Epoch 6/100, Training Loss: 1.9734, Learning Rate: 0.001


Epoch 7/100: 100%|██████████| 152/152 [00:04<00:00, 30.64it/s]


Epoch 7/100, Training Loss: 1.8249, Learning Rate: 0.001


Epoch 8/100: 100%|██████████| 152/152 [00:05<00:00, 25.80it/s]


Epoch 8/100, Training Loss: 1.6429, Learning Rate: 0.001


Epoch 9/100: 100%|██████████| 152/152 [00:05<00:00, 30.35it/s]


Epoch 9/100, Training Loss: 1.6564, Learning Rate: 0.001


Epoch 10/100: 100%|██████████| 152/152 [00:05<00:00, 25.95it/s]


Epoch 10/100, Training Loss: 1.5948, Learning Rate: 0.001


Epoch 11/100: 100%|██████████| 152/152 [00:04<00:00, 30.44it/s]


Epoch 11/100, Training Loss: 1.4236, Learning Rate: 0.001


Epoch 12/100: 100%|██████████| 152/152 [00:05<00:00, 26.99it/s]


Epoch 12/100, Training Loss: 1.3970, Learning Rate: 0.001


Epoch 13/100: 100%|██████████| 152/152 [00:05<00:00, 29.28it/s]


Epoch 13/100, Training Loss: 1.4338, Learning Rate: 0.001


Epoch 14/100: 100%|██████████| 152/152 [00:05<00:00, 30.18it/s]


Epoch 14/100, Training Loss: 1.3223, Learning Rate: 0.001


Epoch 15/100: 100%|██████████| 152/152 [00:05<00:00, 25.83it/s]


Epoch 15/100, Training Loss: 1.1883, Learning Rate: 0.001


Epoch 16/100: 100%|██████████| 152/152 [00:04<00:00, 30.43it/s]


Epoch 16/100, Training Loss: 1.1322, Learning Rate: 0.001


Epoch 17/100: 100%|██████████| 152/152 [00:06<00:00, 24.04it/s]


Epoch 17/100, Training Loss: 1.0971, Learning Rate: 0.001


Epoch 18/100: 100%|██████████| 152/152 [00:04<00:00, 30.55it/s]


Epoch 18/100, Training Loss: 1.1223, Learning Rate: 0.001


Epoch 19/100: 100%|██████████| 152/152 [00:05<00:00, 25.72it/s]


Epoch 19/100, Training Loss: 1.0965, Learning Rate: 0.001


Epoch 20/100: 100%|██████████| 152/152 [00:04<00:00, 30.45it/s]


Epoch 20/100, Training Loss: 1.1073, Learning Rate: 0.001


Epoch 21/100: 100%|██████████| 152/152 [00:05<00:00, 28.17it/s]


Epoch 21/100, Training Loss: 0.9764, Learning Rate: 0.001


Epoch 22/100: 100%|██████████| 152/152 [00:05<00:00, 25.98it/s]


Epoch 22/100, Training Loss: 0.9207, Learning Rate: 0.001


Epoch 23/100: 100%|██████████| 152/152 [00:04<00:00, 30.49it/s]


Epoch 23/100, Training Loss: 0.9353, Learning Rate: 0.001


Epoch 24/100: 100%|██████████| 152/152 [00:05<00:00, 25.85it/s]


Epoch 24/100, Training Loss: 0.8796, Learning Rate: 0.001


Epoch 25/100: 100%|██████████| 152/152 [00:05<00:00, 30.39it/s]


Epoch 25/100, Training Loss: 0.9323, Learning Rate: 0.001


Epoch 26/100: 100%|██████████| 152/152 [00:05<00:00, 26.08it/s]


Epoch 26/100, Training Loss: 0.9239, Learning Rate: 0.001


Epoch 27/100: 100%|██████████| 152/152 [00:04<00:00, 30.51it/s]


Epoch 27/100, Training Loss: 0.8454, Learning Rate: 0.001


Epoch 28/100: 100%|██████████| 152/152 [00:05<00:00, 26.26it/s]


Epoch 28/100, Training Loss: 0.9013, Learning Rate: 0.001


Epoch 29/100: 100%|██████████| 152/152 [00:04<00:00, 30.75it/s]


Epoch 29/100, Training Loss: 0.8396, Learning Rate: 0.001


Epoch 30/100: 100%|██████████| 152/152 [00:05<00:00, 30.13it/s]


Epoch 30/100, Training Loss: 0.9337, Learning Rate: 0.001


Epoch 31/100: 100%|██████████| 152/152 [00:05<00:00, 26.56it/s]


Epoch 31/100, Training Loss: 0.9980, Learning Rate: 0.001


Epoch 32/100: 100%|██████████| 152/152 [00:04<00:00, 30.57it/s]


Epoch 32/100, Training Loss: 0.8075, Learning Rate: 0.001


Epoch 33/100: 100%|██████████| 152/152 [00:05<00:00, 26.16it/s]


Epoch 33/100, Training Loss: 0.7574, Learning Rate: 0.001


Epoch 34/100: 100%|██████████| 152/152 [00:04<00:00, 30.55it/s]


Epoch 34/100, Training Loss: 0.7300, Learning Rate: 0.001


Epoch 35/100: 100%|██████████| 152/152 [00:05<00:00, 26.03it/s]


Epoch 35/100, Training Loss: 0.6720, Learning Rate: 0.001


Epoch 36/100: 100%|██████████| 152/152 [00:04<00:00, 30.47it/s]


Epoch 36/100, Training Loss: 0.6612, Learning Rate: 0.001


Epoch 37/100: 100%|██████████| 152/152 [00:05<00:00, 28.02it/s]


Epoch 37/100, Training Loss: 0.6388, Learning Rate: 0.001


Epoch 38/100: 100%|██████████| 152/152 [00:05<00:00, 27.47it/s]


Epoch 38/100, Training Loss: 0.6543, Learning Rate: 0.001


Epoch 39/100: 100%|██████████| 152/152 [00:05<00:00, 30.10it/s]


Epoch 39/100, Training Loss: 0.7253, Learning Rate: 0.001


Epoch 40/100: 100%|██████████| 152/152 [00:05<00:00, 25.71it/s]


Epoch 40/100, Training Loss: 0.6989, Learning Rate: 0.001


Epoch 41/100: 100%|██████████| 152/152 [00:05<00:00, 30.30it/s]


Epoch 41/100, Training Loss: 0.6420, Learning Rate: 0.001


Epoch 42/100: 100%|██████████| 152/152 [00:05<00:00, 26.06it/s]


Epoch 42/100, Training Loss: 0.5956, Learning Rate: 0.001


Epoch 43/100: 100%|██████████| 152/152 [00:04<00:00, 30.40it/s]


Epoch 43/100, Training Loss: 0.5559, Learning Rate: 0.001


Epoch 44/100: 100%|██████████| 152/152 [00:05<00:00, 26.39it/s]


Epoch 44/100, Training Loss: 0.5532, Learning Rate: 0.001


Epoch 45/100: 100%|██████████| 152/152 [00:05<00:00, 30.08it/s]


Epoch 45/100, Training Loss: 0.5624, Learning Rate: 0.001


Epoch 46/100: 100%|██████████| 152/152 [00:04<00:00, 30.60it/s]


Epoch 46/100, Training Loss: 0.5752, Learning Rate: 0.001


Epoch 47/100: 100%|██████████| 152/152 [00:05<00:00, 26.03it/s]


Epoch 47/100, Training Loss: 0.5467, Learning Rate: 0.001


Epoch 48/100: 100%|██████████| 152/152 [00:04<00:00, 30.67it/s]


Epoch 48/100, Training Loss: 0.5277, Learning Rate: 0.001


Epoch 49/100: 100%|██████████| 152/152 [00:05<00:00, 25.87it/s]


Epoch 49/100, Training Loss: 0.5519, Learning Rate: 0.001


Epoch 50/100: 100%|██████████| 152/152 [00:05<00:00, 28.90it/s]


Epoch 50/100, Training Loss: 0.5222, Learning Rate: 0.001


Epoch 51/100: 100%|██████████| 152/152 [00:06<00:00, 23.70it/s]


Epoch 51/100, Training Loss: 0.5198, Learning Rate: 0.0009000000000000001


Epoch 52/100: 100%|██████████| 152/152 [00:04<00:00, 30.71it/s]


Epoch 52/100, Training Loss: 0.6399, Learning Rate: 0.0008100000000000001


Epoch 53/100: 100%|██████████| 152/152 [00:05<00:00, 26.53it/s]


Epoch 53/100, Training Loss: 0.5054, Learning Rate: 0.000729


Epoch 54/100: 100%|██████████| 152/152 [00:05<00:00, 29.62it/s]


Epoch 54/100, Training Loss: 0.4732, Learning Rate: 0.0006561000000000001


Epoch 55/100: 100%|██████████| 152/152 [00:04<00:00, 30.68it/s]


Epoch 55/100, Training Loss: 0.4242, Learning Rate: 0.00059049


Epoch 56/100: 100%|██████████| 152/152 [00:05<00:00, 26.22it/s]


Epoch 56/100, Training Loss: 0.3768, Learning Rate: 0.000531441


Epoch 57/100: 100%|██████████| 152/152 [00:04<00:00, 30.59it/s]


Epoch 57/100, Training Loss: 0.3621, Learning Rate: 0.0004782969


Epoch 58/100: 100%|██████████| 152/152 [00:05<00:00, 26.29it/s]


Epoch 58/100, Training Loss: 0.3472, Learning Rate: 0.00043046721


Epoch 59/100: 100%|██████████| 152/152 [00:05<00:00, 30.32it/s]


Epoch 59/100, Training Loss: 0.3224, Learning Rate: 0.000387420489


Epoch 60/100: 100%|██████████| 152/152 [00:05<00:00, 26.16it/s]


Epoch 60/100, Training Loss: 0.3058, Learning Rate: 0.0003486784401


Epoch 61/100: 100%|██████████| 152/152 [00:05<00:00, 30.19it/s]


Epoch 61/100, Training Loss: 0.3000, Learning Rate: 0.00031381059609000004


Epoch 62/100: 100%|██████████| 152/152 [00:05<00:00, 29.57it/s]


Epoch 62/100, Training Loss: 0.3046, Learning Rate: 0.00028242953648100003


Epoch 63/100: 100%|██████████| 152/152 [00:05<00:00, 26.87it/s]


Epoch 63/100, Training Loss: 0.2858, Learning Rate: 0.00025418658283290005


Epoch 64/100: 100%|██████████| 152/152 [00:05<00:00, 29.94it/s]


Epoch 64/100, Training Loss: 0.2845, Learning Rate: 0.00022876792454961005


Epoch 65/100: 100%|██████████| 152/152 [00:05<00:00, 25.98it/s]


Epoch 65/100, Training Loss: 0.2772, Learning Rate: 0.00020589113209464906


Epoch 66/100: 100%|██████████| 152/152 [00:04<00:00, 30.64it/s]


Epoch 66/100, Training Loss: 0.2717, Learning Rate: 0.00018530201888518417


Epoch 67/100: 100%|██████████| 152/152 [00:05<00:00, 26.13it/s]


Epoch 67/100, Training Loss: 0.2690, Learning Rate: 0.00016677181699666576


Epoch 68/100: 100%|██████████| 152/152 [00:04<00:00, 30.42it/s]


Epoch 68/100, Training Loss: 0.2634, Learning Rate: 0.0001500946352969992


Epoch 69/100: 100%|██████████| 152/152 [00:05<00:00, 28.09it/s]


Epoch 69/100, Training Loss: 0.2675, Learning Rate: 0.0001350851717672993


Epoch 70/100: 100%|██████████| 152/152 [00:05<00:00, 28.06it/s]


Epoch 70/100, Training Loss: 0.2576, Learning Rate: 0.00012157665459056936


Epoch 71/100: 100%|██████████| 152/152 [00:04<00:00, 30.46it/s]


Epoch 71/100, Training Loss: 0.2618, Learning Rate: 0.00010941898913151243


Epoch 72/100: 100%|██████████| 152/152 [00:05<00:00, 26.23it/s]


Epoch 72/100, Training Loss: 0.2562, Learning Rate: 9.847709021836118e-05


Epoch 73/100: 100%|██████████| 152/152 [00:04<00:00, 30.68it/s]


Epoch 73/100, Training Loss: 0.2564, Learning Rate: 8.862938119652506e-05


Epoch 74/100: 100%|██████████| 152/152 [00:05<00:00, 26.20it/s]


Epoch 74/100, Training Loss: 0.2609, Learning Rate: 7.976644307687256e-05


Epoch 75/100: 100%|██████████| 152/152 [00:05<00:00, 30.40it/s]


Epoch 75/100, Training Loss: 0.2546, Learning Rate: 7.17897987691853e-05


Epoch 76/100: 100%|██████████| 152/152 [00:05<00:00, 26.74it/s]


Epoch 76/100, Training Loss: 0.2604, Learning Rate: 6.461081889226677e-05


Epoch 77/100: 100%|██████████| 152/152 [00:05<00:00, 29.85it/s]


Epoch 77/100, Training Loss: 0.2496, Learning Rate: 5.81497370030401e-05


Epoch 78/100: 100%|██████████| 152/152 [00:04<00:00, 30.92it/s]


Epoch 78/100, Training Loss: 0.2563, Learning Rate: 5.233476330273609e-05


Epoch 79/100: 100%|██████████| 152/152 [00:05<00:00, 25.96it/s]


Epoch 79/100, Training Loss: 0.2509, Learning Rate: 4.7101286972462485e-05


Epoch 80/100: 100%|██████████| 152/152 [00:04<00:00, 30.65it/s]


Epoch 80/100, Training Loss: 0.2466, Learning Rate: 4.239115827521624e-05


Epoch 81/100: 100%|██████████| 152/152 [00:05<00:00, 25.84it/s]


Epoch 81/100, Training Loss: 0.2538, Learning Rate: 3.8152042447694614e-05


Epoch 82/100: 100%|██████████| 152/152 [00:04<00:00, 30.44it/s]


Epoch 82/100, Training Loss: 0.2463, Learning Rate: 3.433683820292515e-05


Epoch 83/100: 100%|██████████| 152/152 [00:05<00:00, 26.03it/s]


Epoch 83/100, Training Loss: 0.2445, Learning Rate: 3.090315438263264e-05


Epoch 84/100: 100%|██████████| 152/152 [00:04<00:00, 30.55it/s]


Epoch 84/100, Training Loss: 0.2483, Learning Rate: 2.7812838944369376e-05


Epoch 85/100: 100%|██████████| 152/152 [00:05<00:00, 30.07it/s]


Epoch 85/100, Training Loss: 0.2483, Learning Rate: 2.503155504993244e-05


Epoch 86/100: 100%|██████████| 152/152 [00:05<00:00, 26.35it/s]


Epoch 86/100, Training Loss: 0.2476, Learning Rate: 2.2528399544939195e-05


Epoch 87/100: 100%|██████████| 152/152 [00:04<00:00, 30.45it/s]


Epoch 87/100, Training Loss: 0.2437, Learning Rate: 2.0275559590445276e-05


Epoch 88/100: 100%|██████████| 152/152 [00:05<00:00, 26.16it/s]


Epoch 88/100, Training Loss: 0.2464, Learning Rate: 1.8248003631400748e-05


Epoch 89/100: 100%|██████████| 152/152 [00:04<00:00, 30.59it/s]


Epoch 89/100, Training Loss: 0.2398, Learning Rate: 1.6423203268260675e-05


Epoch 90/100: 100%|██████████| 152/152 [00:05<00:00, 25.81it/s]


Epoch 90/100, Training Loss: 0.2404, Learning Rate: 1.4780882941434607e-05


Epoch 91/100: 100%|██████████| 152/152 [00:04<00:00, 30.52it/s]


Epoch 91/100, Training Loss: 0.2427, Learning Rate: 1.3302794647291146e-05


Epoch 92/100: 100%|██████████| 152/152 [00:05<00:00, 28.02it/s]


Epoch 92/100, Training Loss: 0.2457, Learning Rate: 1.1972515182562031e-05


Epoch 93/100: 100%|██████████| 152/152 [00:05<00:00, 27.85it/s]


Epoch 93/100, Training Loss: 0.2387, Learning Rate: 1.0775263664305828e-05


Epoch 94/100: 100%|██████████| 152/152 [00:05<00:00, 30.37it/s]


Epoch 94/100, Training Loss: 0.2434, Learning Rate: 9.697737297875246e-06


Epoch 95/100: 100%|██████████| 152/152 [00:05<00:00, 26.09it/s]


Epoch 95/100, Training Loss: 0.2378, Learning Rate: 8.727963568087722e-06


Epoch 96/100: 100%|██████████| 152/152 [00:05<00:00, 30.33it/s]


Epoch 96/100, Training Loss: 0.2425, Learning Rate: 7.85516721127895e-06


Epoch 97/100: 100%|██████████| 152/152 [00:05<00:00, 25.93it/s]


Epoch 97/100, Training Loss: 0.2430, Learning Rate: 7.069650490151056e-06


Epoch 98/100: 100%|██████████| 152/152 [00:05<00:00, 30.28it/s]


Epoch 98/100, Training Loss: 0.2426, Learning Rate: 6.362685441135951e-06


Epoch 99/100: 100%|██████████| 152/152 [00:05<00:00, 26.06it/s]


Epoch 99/100, Training Loss: 0.2414, Learning Rate: 5.7264168970223554e-06


Epoch 100/100: 100%|██████████| 152/152 [00:05<00:00, 30.33it/s]

Epoch 100/100, Training Loss: 0.2507, Learning Rate: 5.15377520732012e-06





In [330]:
# Evaluating Transformer model on the question test set
question_static_transformer_sql_pipe = SQLPipeline(dataset=question_static_test_dataset,model=question_static_trained_transformer_model,embed_type="static")
question_static_transformer_results, question_static_transformer_acc = question_static_transformer_sql_pipe.batch_predict(dataloader=question_static_test_loader)
# question_static_transformer_sql_pipe = SQLPipeline(dataset=question_static_train_val_dataset,model=question_static_trained_transformer_model,embed_type="static")
# question_static_transformer_results, question_static_transformer_acc = question_static_transformer_sql_pipe.batch_predict(dataloader=question_static_train_val_loader)
print(f"\nAccuracy of Transformer model on question split test set: {question_static_transformer_acc:.2%}")

Generating SQL queries: 100%|██████████| 14/14 [00:01<00:00, 13.68it/s]


Accuracy of Transformer model on question split test set: 14.32%





In [242]:
# Getting the examples all models got right for the variable tagging task in the question split test set
correct_linear_tagging_examples = [item for item in question_linear_results if item['predicted_tags'] == item['raw_item']['tagging_labels']]
correct_ff_tagging_examples = [item for item in question_static_ff_results if item['predicted_tags'] == item['raw_item']['tagging_labels']]
correct_lstm_tagging_examples = [item for item in question_static_lstm_results if item['predicted_tags'] == item['raw_item']['tagging_labels']]
correct_transformer_tagging_examples = [item for item in question_static_transformer_results if item['predicted_tags'] == item['raw_item']['tagging_labels']]

In [243]:
# Getting the input text of all correctly tagged queries for all models
linear_correct_tagging_queries = {" ".join(example['tokens']) for example in correct_linear_tagging_examples}
ff_correct_tagging_queries = {" ".join(example['tokens']) for example in correct_ff_tagging_examples}
lstm_correct_tagging_queries = {" ".join(example['tokens']) for example in correct_lstm_tagging_examples}
transformer_correct_tagging_queries = {" ".join(example['tokens']) for example in correct_transformer_tagging_examples}
correct_queries = transformer_correct_tagging_queries & lstm_correct_tagging_queries & ff_correct_tagging_queries & linear_correct_tagging_queries

In [352]:
# Functions to introduce noise in each query by adding typos, rearraging words
import random
import re

def is_uppercase_word(word):
    return word.isupper() or re.fullmatch(r'[A-Z.]+', word)

def is_to_from(word):
  return re.fullmatch(r'to',word) or re.fullmatch(r'from',word)

def typo(word):
    if len(word) > 2:
        i = random.randint(0, len(word)-2)
        return word[:i] + word[i+1] + word[i] + word[i+2:]
    return word

def delete_char(word):
    if len(word) > 1:
        i = random.randint(0, len(word)-1)
        return word[:i] + word[i+1:]
    return word


def change_sentence(sentence, change_type):
    words = sentence.split()
    lowercase_indices = [i for i, w in enumerate(words) if not is_uppercase_word(w)]
    if not lowercase_indices:
        return sentence

    i = random.choice(lowercase_indices)
    word = words[i]

    # operation = random.choice(['typo', 'delete', 'swap'])
    # operation = random.choice(['typo','swap'])
    operation = random.choice(change_type)
    # operation = random.choice(['swap'])

    if operation == 'typo':
        words[i] = typo(word)
    elif operation == 'delete':
        words[i] = delete_char(word)
    elif operation == 'swap' and i < len(words) - 1 and not is_uppercase_word(words[i+1]):
        words[i], words[i+1] = words[i+1], words[i]

    return ' '.join(words)

def modify_sentences(sentences, change_type=['typo', 'swap', 'delete']):

  return [change_sentence(s, change_type) for s in sentences]


In [353]:
# Addimg noise to inputs by swapping the order words in the sentence and evaluating all models
from copy import deepcopy
incorrect_queries = deepcopy(correct_queries)
change_results = []
for i in range(25):
  incorrect_queries = modify_sentences(incorrect_queries, change_type=['swap'])
  # Evaluating all models on the noisy queries that all of them previously tagged correctly
  linear_correct_result = []
  linear_incorrect_result = []
  ff_correct_result = []
  ff_incorrect_result = []
  lstm_correct_result = []
  lstm_incorrect_result = []
  transformer_correct_result = []
  transformer_incorrect_result = []
  linear_equal = 0
  ff_equal = 0
  lstm_equal = 0
  transformer_equal = 0
  tag_result = []
  for correct_query, incorrect_query in zip(correct_queries, incorrect_queries):
    result_item = {"correct_query":correct_query, "incorrect_query":incorrect_query, "linear":0, "ff":0, "lstm":0, "transformer":0}
    lin_correct_predict = question_linear_sql_pipe.predict(correct_query)
    lin_incorrect_predict = question_linear_sql_pipe.predict(incorrect_query)
    if(lin_correct_predict["predicted_tags"] == lin_incorrect_predict["predicted_tags"]):
      linear_equal += 1
      linear_correct_result.append((correct_query, incorrect_query))
      result_item["linear"] = 1
    else:
      linear_incorrect_result.append((correct_query, incorrect_query))

    ff_correct_predict = question_static_ff_sql_pipe.predict(correct_query)
    ff_incorrect_predict = question_static_ff_sql_pipe.predict(incorrect_query)
    if(ff_correct_predict["predicted_tags"] == ff_incorrect_predict["predicted_tags"]):
      ff_equal += 1
      ff_correct_result.append((correct_query, incorrect_query))
      result_item["ff"] = 1
    else:

      ff_incorrect_result.append((correct_query, incorrect_query))

    lstm_correct_predict = question_static_lstm_sql_pipe.predict(correct_query)
    lstm_incorrect_predict = question_static_lstm_sql_pipe.predict(incorrect_query)
    if(lstm_correct_predict["predicted_tags"] == lstm_incorrect_predict["predicted_tags"]):
      lstm_equal += 1
      lstm_correct_result.append((correct_query, incorrect_query))
      result_item["lstm"] = 1
    else:
      lstm_incorrect_result.append((correct_query, incorrect_query))

    transformer_correct_predict = question_static_transformer_sql_pipe.predict(correct_query)
    transformer_incorrect_predict = question_static_transformer_sql_pipe.predict(incorrect_query)
    if(transformer_correct_predict["predicted_tags"] == transformer_incorrect_predict["predicted_tags"]):
      transformer_equal += 1
      transformer_correct_result.append((correct_query, incorrect_query))
      result_item["transformer"] = 1
    else:
      transformer_incorrect_result.append((correct_query, incorrect_query))
    tag_result.append(result_item)
  print(f"For {i=}:")
  print(f"Linear model tagged {linear_equal} queries correctly out of {len(correct_queries)}")
  print(f"Linear Accuracy = {linear_equal/len(correct_queries):.2%}")
  print(f"Feedforward model tagged {ff_equal} queries correctly out of {len(correct_queries)}")
  print(f"Feedforward Accuracy = {ff_equal/len(correct_queries):.2%}")
  print(f"LSTM model tagged {lstm_equal} queries correctly out of {len(correct_queries)}")
  print(f"LSTM Accuracy = {lstm_equal/len(correct_queries):.2%}")
  print(f"Transformer model tagged {transformer_equal} queries correctly out of {len(correct_queries)}")
  print(f"Transformer Accuracy = {transformer_equal/len(correct_queries):.2%}")
  print("*"*50)
  change_results.append({"changes":i+1,"linear":linear_equal/len(correct_queries)*100, "ff":ff_equal/len(correct_queries)*100, "lstm":lstm_equal/len(correct_queries)*100, "transformer":transformer_equal/len(correct_queries)*100})


For i=0:
Linear model tagged 48 queries correctly out of 48
Linear Accuracy = 100.00%
Feedforward model tagged 48 queries correctly out of 48
Feedforward Accuracy = 100.00%
LSTM model tagged 47 queries correctly out of 48
LSTM Accuracy = 97.92%
Transformer model tagged 48 queries correctly out of 48
Transformer Accuracy = 100.00%
**************************************************
For i=1:
Linear model tagged 48 queries correctly out of 48
Linear Accuracy = 100.00%
Feedforward model tagged 48 queries correctly out of 48
Feedforward Accuracy = 100.00%
LSTM model tagged 47 queries correctly out of 48
LSTM Accuracy = 97.92%
Transformer model tagged 48 queries correctly out of 48
Transformer Accuracy = 100.00%
**************************************************
For i=2:
Linear model tagged 48 queries correctly out of 48
Linear Accuracy = 100.00%
Feedforward model tagged 48 queries correctly out of 48
Feedforward Accuracy = 100.00%
LSTM model tagged 48 queries correctly out of 48
LSTM Accurac

In [354]:
import pandas as pd
order_changes_df = pd.DataFrame(change_results)
order_changes_df

Unnamed: 0,changes,linear,ff,lstm,transformer
0,1,100.0,100.0,97.916667,100.0
1,2,100.0,100.0,97.916667,100.0
2,3,100.0,100.0,100.0,100.0
3,4,100.0,100.0,100.0,100.0
4,5,100.0,100.0,97.916667,100.0
5,6,100.0,100.0,97.916667,100.0
6,7,100.0,100.0,97.916667,100.0
7,8,100.0,100.0,97.916667,100.0
8,9,100.0,100.0,97.916667,100.0
9,10,100.0,100.0,97.916667,100.0


In [355]:
# Adding noise to inputs by rearranging letter in words and Evaluating all models
from copy import deepcopy
incorrect_queries = deepcopy(correct_queries)
change_results = []
for i in range(25):
  incorrect_queries = modify_sentences(incorrect_queries, change_type=['typo'])
  # Evaluating all models on the noisy queries that all of them previously tagged correctly
  linear_correct_result = []
  linear_incorrect_result = []
  ff_correct_result = []
  ff_incorrect_result = []
  lstm_correct_result = []
  lstm_incorrect_result = []
  transformer_correct_result = []
  transformer_incorrect_result = []
  linear_equal = 0
  ff_equal = 0
  lstm_equal = 0
  transformer_equal = 0
  tag_result = []
  for correct_query, incorrect_query in zip(correct_queries, incorrect_queries):
    result_item = {"correct_query":correct_query, "incorrect_query":incorrect_query, "linear":0, "ff":0, "lstm":0, "transformer":0}
    lin_correct_predict = question_linear_sql_pipe.predict(correct_query)
    lin_incorrect_predict = question_linear_sql_pipe.predict(incorrect_query)
    if(lin_correct_predict["predicted_tags"] == lin_incorrect_predict["predicted_tags"]):
      linear_equal += 1
      linear_correct_result.append((correct_query, incorrect_query))
      result_item["linear"] = 1
    else:
      linear_incorrect_result.append((correct_query, incorrect_query))

    ff_correct_predict = question_static_ff_sql_pipe.predict(correct_query)
    ff_incorrect_predict = question_static_ff_sql_pipe.predict(incorrect_query)
    if(ff_correct_predict["predicted_tags"] == ff_incorrect_predict["predicted_tags"]):
      ff_equal += 1
      ff_correct_result.append((correct_query, incorrect_query))
      result_item["ff"] = 1
    else:

      ff_incorrect_result.append((correct_query, incorrect_query))

    lstm_correct_predict = question_static_lstm_sql_pipe.predict(correct_query)
    lstm_incorrect_predict = question_static_lstm_sql_pipe.predict(incorrect_query)
    if(lstm_correct_predict["predicted_tags"] == lstm_incorrect_predict["predicted_tags"]):
      lstm_equal += 1
      lstm_correct_result.append((correct_query, incorrect_query))
      result_item["lstm"] = 1
    else:
      lstm_incorrect_result.append((correct_query, incorrect_query))

    transformer_correct_predict = question_static_transformer_sql_pipe.predict(correct_query)
    transformer_incorrect_predict = question_static_transformer_sql_pipe.predict(incorrect_query)
    if(transformer_correct_predict["predicted_tags"] == transformer_incorrect_predict["predicted_tags"]):
      transformer_equal += 1
      transformer_correct_result.append((correct_query, incorrect_query))
      result_item["transformer"] = 1
    else:
      transformer_incorrect_result.append((correct_query, incorrect_query))
    tag_result.append(result_item)
  print(f"For {i=}:")
  print(f"Linear model tagged {linear_equal} queries correctly out of {len(correct_queries)}")
  print(f"Linear Accuracy = {linear_equal/len(correct_queries):.2%}")
  print(f"Feedforward model tagged {ff_equal} queries correctly out of {len(correct_queries)}")
  print(f"Feedforward Accuracy = {ff_equal/len(correct_queries):.2%}")
  print(f"LSTM model tagged {lstm_equal} queries correctly out of {len(correct_queries)}")
  print(f"LSTM Accuracy = {lstm_equal/len(correct_queries):.2%}")
  print(f"Transformer model tagged {transformer_equal} queries correctly out of {len(correct_queries)}")
  print(f"Transformer Accuracy = {transformer_equal/len(correct_queries):.2%}")
  print("*"*50)
  change_results.append({"changes":i+1,"linear":linear_equal/len(correct_queries)*100, "ff":ff_equal/len(correct_queries)*100, "lstm":lstm_equal/len(correct_queries)*100, "transformer":transformer_equal/len(correct_queries)*100})

For i=0:
Linear model tagged 21 queries correctly out of 48
Linear Accuracy = 43.75%
Feedforward model tagged 22 queries correctly out of 48
Feedforward Accuracy = 45.83%
LSTM model tagged 42 queries correctly out of 48
LSTM Accuracy = 87.50%
Transformer model tagged 36 queries correctly out of 48
Transformer Accuracy = 75.00%
**************************************************
For i=1:
Linear model tagged 11 queries correctly out of 48
Linear Accuracy = 22.92%
Feedforward model tagged 13 queries correctly out of 48
Feedforward Accuracy = 27.08%
LSTM model tagged 39 queries correctly out of 48
LSTM Accuracy = 81.25%
Transformer model tagged 32 queries correctly out of 48
Transformer Accuracy = 66.67%
**************************************************
For i=2:
Linear model tagged 6 queries correctly out of 48
Linear Accuracy = 12.50%
Feedforward model tagged 10 queries correctly out of 48
Feedforward Accuracy = 20.83%
LSTM model tagged 39 queries correctly out of 48
LSTM Accuracy = 81.25

In [356]:
import pandas as pd
typo_changes_df = pd.DataFrame(change_results)
typo_changes_df

Unnamed: 0,changes,linear,ff,lstm,transformer
0,1,43.75,45.833333,87.5,75.0
1,2,22.916667,27.083333,81.25,66.666667
2,3,12.5,20.833333,81.25,62.5
3,4,10.416667,12.5,68.75,62.5
4,5,2.083333,6.25,66.666667,50.0
5,6,2.083333,2.083333,66.666667,56.25
6,7,2.083333,2.083333,66.666667,56.25
7,8,0.0,2.083333,64.583333,54.166667
8,9,2.083333,4.166667,70.833333,54.166667
9,10,2.083333,4.166667,66.666667,47.916667
