In [1]:
! pip install transformers
! pip install datasets
! pip install torch-scatter -f https://data.pyg.org/whl/torch-1.12.0+cu113.html # From torch.version.__version__

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in links: https://data.pyg.org/whl/torch-1.12.0+cu113.html


In [2]:
from transformers import TapasTokenizer, TapasForQuestionAnswering

model_name = "google/tapas-small-finetuned-wtq"
model = TapasForQuestionAnswering.from_pretrained(model_name)
tokenizer = TapasTokenizer.from_pretrained(model_name)

Downloading:   0%|          | 0.00/112M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/256k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/154 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/490 [00:00<?, ?B/s]

In [3]:
from prettytable import PrettyTable

def count_parameters(model):
    table = PrettyTable(["Modules", "Parameters"])
    total_params = 0
    for name, parameter in model.named_parameters():
        if not parameter.requires_grad: continue
        params = parameter.numel()
        table.add_row([name, params])
        total_params+=params
    print(table)
    print(f"Total Trainable Params: {total_params}")
    return total_params

In [4]:
count_parameters(model)

+---------------------------------------------------------+------------+
|                         Modules                         | Parameters |
+---------------------------------------------------------+------------+
|                      output_weights                     |    512     |
|                  column_output_weights                  |    512     |
|                       output_bias                       |     1      |
|                    column_output_bias                   |     1      |
|         tapas.embeddings.word_embeddings.weight         |  15627264  |
|       tapas.embeddings.position_embeddings.weight       |   262144   |
|     tapas.embeddings.token_type_embeddings_0.weight     |    1536    |
|     tapas.embeddings.token_type_embeddings_1.weight     |   131072   |
|     tapas.embeddings.token_type_embeddings_2.weight     |   131072   |
|     tapas.embeddings.token_type_embeddings_3.weight     |    1024    |
|     tapas.embeddings.token_type_embeddings_4.weig

29297670

In [5]:
import pandas as pd

data = {"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], "Number of movies": ["87", "53", "69"]}
queries = [
    "What is the name of the first actor?",
    "How many movies has George Clooney played in?",
    "What is the total number of movies?",
]
table = pd.DataFrame.from_dict(data)
inputs = tokenizer(table=table, queries=queries, padding="max_length", return_tensors="pt")
outputs = model(**inputs)
predicted_answer_coordinates, predicted_aggregation_indices = tokenizer.convert_logits_to_predictions(
    inputs, outputs.logits.detach(), outputs.logits_aggregation.detach()
)

# let's print out the results:
id2aggregation = {0: "NONE", 1: "SUM", 2: "AVERAGE", 3: "COUNT"}
aggregation_predictions_string = [id2aggregation[x] for x in predicted_aggregation_indices]

answers = []
for coordinates in predicted_answer_coordinates:
    if len(coordinates) == 1:
        # only a single cell:
        answers.append(table.iat[coordinates[0]])
    else:
        # multiple cells
        cell_values = []
        for coordinate in coordinates:
            cell_values.append(table.iat[coordinate])
        answers.append(", ".join(cell_values))

display(table)
print("")
for query, answer, predicted_agg in zip(queries, answers, aggregation_predictions_string):
    print(query)
    if predicted_agg == "NONE":
        print("Predicted answer: " + answer)
    else:
        print("Predicted answer: " + predicted_agg + " > " + answer)

Unnamed: 0,Actors,Number of movies
0,Brad Pitt,87
1,Leonardo Di Caprio,53
2,George Clooney,69



What is the name of the first actor?
Predicted answer: Brad Pitt
How many movies has George Clooney played in?
Predicted answer: SUM > 69
What is the total number of movies?
Predicted answer: SUM > 87, 53, 69


In [6]:
predicted_answer_coordinates

[[(0, 0)], [(2, 1)], [(0, 1), (1, 1), (2, 1)]]

In [7]:
predicted_aggregation_indices

[0, 1, 1]