<a href="https://colab.research.google.com/github/dsubham/shubh/blob/master/Model-Tokenizer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
from transformers import TapasTokenizer, TapasForQuestionAnswering
from transformers import PreTrainedTokenizerFast
import pandas as pd

data = {"Year" : ["2023", "2022", "2023", "2022"], "Entity": ["C", "A", "D", "B"], "Current Value": ["87", "53", "61", "10"]}

queries = [
    "Total number of Years for Entity B",
    "The Years for Entity A are",
    "The year with lowest Current Value is?",
    "Total of Current Values for Entity B",
    "Entities with Current Value greater than 10 are",
    "Entity with highest Current Value is",
    "Sum of Current Value",
]

def load_model_and_tokenizer():
  """
    Load
  """
  # Load pretrained tokenizer: TAPAS finetuned on WikiTable Questions
  tokenizer = TapasTokenizer.from_pretrained("google/tapas-base-finetuned-wtq")


  tokenizer.save_pretrained("WTQ_Tokenizer")

  # tokenizer = TapasTokenizer.from_pretrained("drive/MyDrive/Stock_Files/TQ_Tokenizer")

  # Load pretrained model: TAPAS finetuned on WikiTable Questions
  model = TapasForQuestionAnswering.from_pretrained("google/tapas-base-finetuned-wtq")

  model.save_pretrained('WTQ_Model')

  # model = TapasForQuestionAnswering.from_pretrained("drive/MyDrive/Stock_Files/WTQ_Model")

  # Return tokenizer and model
  return tokenizer, model


def prepare_inputs(data, queries, tokenizer):
  """
    Convert dictionary into data frame and tokenize inputs given queries.
  """
  # Prepare inputs
  table = pd.DataFrame.from_dict(data)
  inputs = tokenizer(table=table, queries=queries, padding='max_length', return_tensors="pt")

  # Return things
  return table, inputs


def generate_predictions(inputs, model, tokenizer):
  """
    Generate predictions for some tokenized input.
  """
  # Generate model results
  outputs = model(**inputs)

  # Convert logit outputs into predictions for table cells and aggregation operators
  predicted_table_cell_coords, predicted_aggregation_operators = tokenizer.convert_logits_to_predictions(
          inputs,
          outputs.logits.detach(),
          outputs.logits_aggregation.detach()
  )

  # Return values
  return predicted_table_cell_coords, predicted_aggregation_operators


def postprocess_predictions(predicted_aggregation_operators, predicted_table_cell_coords, table):
  """
    Compute the predicted operation and nicely structure the answers.
  """
  # Process predicted aggregation operators
  aggregation_operators = {0: "NONE", 1: "SUM", 2: "AVERAGE", 3:"COUNT"}
  aggregation_predictions_string = [aggregation_operators[x] for x in predicted_aggregation_operators]

  # Process predicted table cell coordinates
  answers = []
  for coordinates in predicted_table_cell_coords:
    if len(coordinates) == 1:
      # 1 cell
      answers.append(table.iat[coordinates[0]])
    else:
      # > 1 cell
      cell_values = []
      for coordinate in coordinates:
        cell_values.append(table.iat[coordinate])
      answers.append(", ".join(cell_values))

  # Return values
  return aggregation_predictions_string, answers


def show_answers(queries, answers, aggregation_predictions_string):
  """
    Visualize the postprocessed answers.
  """
  for query, answer, predicted_agg in zip(queries, answers, aggregation_predictions_string):
    print(query)
    if predicted_agg == "NONE":
      print("Predicted answer: " + query + ": " + answer)
    else:
      print("Predicted answer: " + predicted_agg + " > " + query + ": " + answer)


def run_tapas():
  """
    Invoke the TAPAS model.
  """
  tokenizer, model = load_model_and_tokenizer()
  table, inputs = prepare_inputs(data, queries, tokenizer)
  predicted_table_cell_coords, predicted_aggregation_operators = generate_predictions(inputs, model, tokenizer)
  aggregation_predictions_string, answers = postprocess_predictions(predicted_aggregation_operators, predicted_table_cell_coords, table)
  show_answers(queries, answers, aggregation_predictions_string)
  display(table)


if __name__ == '__main__':
  run_tapas()

Non-default generation parameters: {'temperature': 0.0352513}


Total number of Years for Entity B
Predicted answer: COUNT > Total number of Years for Entity B: 2022
The Years for Entity A are
Predicted answer: AVERAGE > The Years for Entity A are: 2022
The year with lowest Current Value is?
Predicted answer: The year with lowest Current Value is?: 2022
Total of Current Values for Entity B
Predicted answer: COUNT > Total of Current Values for Entity B: 10
Entities with Current Value greater than 10 are
Predicted answer: Entities with Current Value greater than 10 are: C, A, D
Entity with highest Current Value is
Predicted answer: Entity with highest Current Value is: C
Sum of Current Value
Predicted answer: SUM > Sum of Current Value: 87, 53, 61, 10


Unnamed: 0,Year,Entity,Current Value
0,2023,C,87
1,2022,A,53
2,2023,D,61
3,2022,B,10
