In [140]:
print('hello, world!')

hello, world!


In [141]:
import os

import datasets
from transformers import T5ForConditionalGeneration, T5Tokenizer
from tqdm import tqdm

In [142]:
k_model_dir = "model/"
k_test_file = "data/test.source"
k_test_path = "test/"

k_base_model = "mrm8488/t5-base-finetuned-wikiSQL" 
k_base_model_name = "t5-base-finetuned-wikiSQL"
k_base_tokenizer = "mrm8488/t5-base-finetuned-wikiSQL" 
k_tune_tokenizer = "tokenizer/spider_original-01"

with open(k_test_file, "r") as file: 
    k_test = file.readlines()

k_model_dirs = [f for f in os.listdir(k_model_dir) if os.path.isdir(os.path.join(k_model_dir, f))]
k_model_paths = [os.path.join(k_model_dir, model_dir) for model_dir in k_model_dirs]

In [161]:
def process_in_batches(model, tokenizer, test_data, batch_size):
    predictions = []
    total_batches = len(test_data) // batch_size + (1 if len(test_data) % batch_size else 0)

    for i in tqdm(range(total_batches), desc="Processing", unit="batch"):
        batch = test_data[i * batch_size:(i + 1) * batch_size]
        inputs = tokenizer(batch, padding=True, truncation=True, return_tensors="pt", max_length=512)
        output_ids = model.generate(inputs.input_ids, attention_mask=inputs.attention_mask, max_length=512, no_repeat_ngram_size=2)
        batch_predictions = [tokenizer.decode(g, skip_special_tokens=True) for g in output_ids]
        predictions.extend(batch_predictions)
    return predictions

def test_model(test_data, model_path, tokenizer_path, test_path, model_name, batch_size, mock=False): 

    if mock: 
        write_path = f"{test_path}{model_name}.txt"
        print(f"model: {model_path}, tokenizer: {tokenizer_path}, test_path: {test_path}, model_name: {model_name}, write_path: {write_path}")
    else: 
        print(f"loading model from path: {model_path}")
        model = T5ForConditionalGeneration.from_pretrained(model_path)
        tokenizer = T5Tokenizer.from_pretrained(tokenizer_path)
        predictions = process_in_batches(model, tokenizer, test_data, batch_size)
    
        with open(f"{test_path}{model_name}.txt", "w") as file: 
            for pred in predictions:
                file.write(pred + '\n')

In [164]:
mock = False
test_model(
    test_data=k_test, 
    model_path=k_base_model, 
    tokenizer_path=k_base_tokenizer, 
    test_path=k_test_path, 
    model_name=k_base_model_name, 
    batch_size=8, 
    mock=mock
)

for i in range(len(k_model_dirs)): 
    model_path = k_model_paths[i]
    model_name = k_model_dirs[i]
    
    test_model(
        test_data=k_test, 
        model_path=model_path, 
        tokenizer_path=k_tune_tokenizer, 
        test_path=k_test_path, 
        model_name=model_name, 
        batch_size=8, 
        mock=mock
    )

The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.


loading model from path: mrm8488/t5-base-finetuned-wikiSQL


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Processing: 100%|██████████| 109/109 [09:27<00:00,  5.20s/batch]


loading model from path: model/spider_original-01


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Processing: 100%|██████████| 109/109 [05:17<00:00,  2.92s/batch]


loading model from path: model/spider_original-03


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Processing: 100%|██████████| 109/109 [05:34<00:00,  3.07s/batch]


loading model from path: model/spider_original-05


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Processing: 100%|██████████| 109/109 [06:06<00:00,  3.36s/batch]


loading model from path: model/spider_original-07


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Processing: 100%|██████████| 109/109 [05:31<00:00,  3.04s/batch]


loading model from path: model/spider_original-09


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Processing: 100%|██████████| 109/109 [05:40<00:00,  3.13s/batch]


loading model from path: model/spider_original-11


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Processing: 100%|██████████| 109/109 [06:15<00:00,  3.45s/batch]


loading model from path: model/spider_original-13


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Processing: 100%|██████████| 109/109 [06:25<00:00,  3.54s/batch]


loading model from path: model/spider_original-15


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Processing: 100%|██████████| 109/109 [06:15<00:00,  3.44s/batch]


loading model from path: model/spider_original-17


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Processing: 100%|██████████| 109/109 [05:51<00:00,  3.22s/batch]


loading model from path: model/spider_original-19


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Processing: 100%|██████████| 109/109 [06:06<00:00,  3.37s/batch]


loading model from path: model/spider_original-21


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Processing: 100%|██████████| 109/109 [05:44<00:00,  3.16s/batch]


loading model from path: model/spider_original-23


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Processing: 100%|██████████| 109/109 [05:38<00:00,  3.11s/batch]


loading model from path: model/spider_original-25


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Processing: 100%|██████████| 109/109 [06:02<00:00,  3.32s/batch]


loading model from path: model/spider_original-27


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Processing: 100%|██████████| 109/109 [05:32<00:00,  3.05s/batch]


loading model from path: model/spider_original-29


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Processing: 100%|██████████| 109/109 [05:36<00:00,  3.09s/batch]


loading model from path: model/spider_original-31


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Processing: 100%|██████████| 109/109 [05:47<00:00,  3.19s/batch]


loading model from path: model/spider_original-33


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Processing: 100%|██████████| 109/109 [06:21<00:00,  3.50s/batch]


loading model from path: model/spider_original-35


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Processing: 100%|██████████| 109/109 [05:42<00:00,  3.14s/batch]


loading model from path: model/spider_original-37


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Processing: 100%|██████████| 109/109 [05:53<00:00,  3.25s/batch]


loading model from path: model/spider_original-39


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Processing: 100%|██████████| 109/109 [05:48<00:00,  3.20s/batch]


loading model from path: model/spider_original-41


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Processing: 100%|██████████| 109/109 [05:55<00:00,  3.26s/batch]


loading model from path: model/spider_original-43


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Processing: 100%|██████████| 109/109 [05:43<00:00,  3.15s/batch]


loading model from path: model/spider_original-45


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Processing: 100%|██████████| 109/109 [05:47<00:00,  3.19s/batch]


loading model from path: model/spider_original-47


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Processing: 100%|██████████| 109/109 [05:54<00:00,  3.25s/batch]


loading model from path: model/spider_original-49


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Processing: 100%|██████████| 109/109 [05:56<00:00,  3.27s/batch]


loading model from path: model/synthetic_joint-01


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Processing: 100%|██████████| 109/109 [05:25<00:00,  2.99s/batch]


loading model from path: model/synthetic_joint-03


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Processing: 100%|██████████| 109/109 [05:31<00:00,  3.04s/batch]


loading model from path: model/synthetic_joint-05


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Processing: 100%|██████████| 109/109 [05:28<00:00,  3.02s/batch]


loading model from path: model/synthetic_joint-07


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Processing: 100%|██████████| 109/109 [05:08<00:00,  2.83s/batch]


loading model from path: model/synthetic_joint-09


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Processing: 100%|██████████| 109/109 [05:41<00:00,  3.13s/batch]


loading model from path: model/synthetic_joint-11


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Processing: 100%|██████████| 109/109 [05:46<00:00,  3.18s/batch]


loading model from path: model/synthetic_joint-13


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Processing: 100%|██████████| 109/109 [05:30<00:00,  3.03s/batch]


loading model from path: model/synthetic_joint-15


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Processing: 100%|██████████| 109/109 [05:25<00:00,  2.99s/batch]


loading model from path: model/synthetic_joint-17


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Processing: 100%|██████████| 109/109 [05:05<00:00,  2.80s/batch]


loading model from path: model/synthetic_joint-19


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Processing: 100%|██████████| 109/109 [05:25<00:00,  2.99s/batch]


loading model from path: model/synthetic_joint-21


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Processing: 100%|██████████| 109/109 [05:52<00:00,  3.23s/batch]


loading model from path: model/synthetic_joint-23


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Processing: 100%|██████████| 109/109 [04:59<00:00,  2.75s/batch]


loading model from path: model/synthetic_joint-25


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Processing: 100%|██████████| 109/109 [05:09<00:00,  2.84s/batch]


loading model from path: model/synthetic_joint-27


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Processing: 100%|██████████| 109/109 [05:01<00:00,  2.77s/batch]


loading model from path: model/synthetic_joint-29


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Processing: 100%|██████████| 109/109 [05:12<00:00,  2.86s/batch]


loading model from path: model/synthetic_joint-31


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Processing: 100%|██████████| 109/109 [05:27<00:00,  3.01s/batch]


loading model from path: model/synthetic_joint-33


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Processing: 100%|██████████| 109/109 [05:01<00:00,  2.77s/batch]


loading model from path: model/synthetic_joint-35


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Processing: 100%|██████████| 109/109 [05:17<00:00,  2.92s/batch]


loading model from path: model/synthetic_joint-37


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Processing: 100%|██████████| 109/109 [05:14<00:00,  2.89s/batch]


loading model from path: model/synthetic_joint-39


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Processing: 100%|██████████| 109/109 [04:58<00:00,  2.74s/batch]


loading model from path: model/synthetic_joint-41


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Processing: 100%|██████████| 109/109 [05:11<00:00,  2.85s/batch]


loading model from path: model/synthetic_joint-43


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Processing: 100%|██████████| 109/109 [05:04<00:00,  2.79s/batch]


loading model from path: model/synthetic_joint-45


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Processing: 100%|██████████| 109/109 [05:10<00:00,  2.85s/batch]


loading model from path: model/synthetic_joint-47


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Processing: 100%|██████████| 109/109 [05:29<00:00,  3.02s/batch]


loading model from path: model/synthetic_joint-49


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Processing: 100%|██████████| 109/109 [05:26<00:00,  3.00s/batch]


In [160]:
from transformers import T5ForConditionalGeneration
import torch


# Load the models from the saved checkpoint directories
model1 = T5ForConditionalGeneration.from_pretrained('model/synthetic_joint-01')
model2 = T5ForConditionalGeneration.from_pretrained('model/synthetic_joint-49')

# Function to compare two models
def are_models_identical(model1, model2):
    model1_dict = model1.state_dict()
    model2_dict = model2.state_dict()
    print(len(model1_dict))
    print(len(model2_dict))

    if set(model1_dict.keys()) != set(model2_dict.keys()):
        # Different sets of parameters
        return False

    count = 0 
    for param in model1_dict:
        if not torch.equal(model1_dict[param], model2_dict[param]):
            # Found a mismatch
            count += 1
            print('mismatch')
            return False

    return True

# Check if the models are the same
are_same = are_models_identical(model1, model2)

print(f"The models are {'the same' if are_same else 'different'}.")


260
260
mismatch
The models are different.
