In [11]:
import os
from torch import nn
from transformers import AutoModel

import warnings
warnings.filterwarnings("ignore")

In [12]:
ALL_MODELS_FOLDER = "../models/"

In [13]:
def load_model_from_checkpoint(model_folder: str) -> str:
    """Loads from local checkpoint. Loads the checkpoint with the highest number.

    Args:
        model_folder (str): model folder with may contain multiple checkpoints.

    Returns:
        str: returns the checkpoint with the highest number
    """
    checkpoints = [d for d in os.listdir(model_folder) if d.startswith("checkpoint-")]

    if not checkpoints:
        print(f"No checkpoints found in {model_folder}")
        exit()

    # Find the checkpoint with the highest step number
    checkpoints = sorted(checkpoints, key=lambda x: int(x.split("-")[-1]), reverse=True)
    checkpoint_dir = os.path.join(model_folder, checkpoints[0])

    return checkpoint_dir


def print_num_params(model) -> None:
    """
    Prints the total number of parameters in the model in millions.

    Args:
        model: The model whose parameters are to be counted.
    """
    num_params = sum(p.numel() for p in model.parameters())
    num_params_million = num_params / 1e6
    return (f"{num_params_million:.2f}M")


def process_model_folders(base_dir: str) -> dict:
    """
    Processes folders with the 'language_tokenizer_vs' pattern to load the model with the highest checkpoint and print the parameters.
    
    Args:
        base_dir (str): The base directory containing model folders.
    """
    num_params = {}
    # Iterate through folders starting with 'language_tokenizer_vs'
    for folder_name in os.listdir(base_dir):
        if folder_name[-1].isdigit():
            model_folder = os.path.join(base_dir, folder_name)

            checkpoint_dir = load_model_from_checkpoint(model_folder)
            
            model = AutoModel.from_pretrained(checkpoint_dir)

            # Print number of parameters
            print_num_params(model)
            print(f"Processed model from folder: {model_folder}")

In [14]:
process_model_folders(ALL_MODELS_FOLDER)

Some weights of BertModel were not initialized from the model checkpoint at ../models/model_es_BPE_vs10000\checkpoint-583 and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Number of parameters: 7.95M
Processed model from folder: ../models/model_es_BPE_vs10000


Some weights of BertModel were not initialized from the model checkpoint at ../models/model_es_BPE_vs20000\checkpoint-583 and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Number of parameters: 11.07M
Processed model from folder: ../models/model_es_BPE_vs20000


Some weights of BertModel were not initialized from the model checkpoint at ../models/model_es_BPE_vs30000\checkpoint-583 and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Number of parameters: 14.19M
Processed model from folder: ../models/model_es_BPE_vs30000


Some weights of BertModel were not initialized from the model checkpoint at ../models/model_es_BPE_vs40000\checkpoint-583 and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Number of parameters: 17.31M
Processed model from folder: ../models/model_es_BPE_vs40000


Some weights of BertModel were not initialized from the model checkpoint at ../models/model_es_Unigram_vs10000\checkpoint-583 and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Number of parameters: 7.95M
Processed model from folder: ../models/model_es_Unigram_vs10000


Some weights of BertModel were not initialized from the model checkpoint at ../models/model_es_Unigram_vs20000\checkpoint-583 and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Number of parameters: 11.07M
Processed model from folder: ../models/model_es_Unigram_vs20000


Some weights of BertModel were not initialized from the model checkpoint at ../models/model_es_Unigram_vs30000\checkpoint-583 and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Number of parameters: 14.19M
Processed model from folder: ../models/model_es_Unigram_vs30000


Some weights of BertModel were not initialized from the model checkpoint at ../models/model_es_Unigram_vs40000\checkpoint-583 and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Number of parameters: 17.31M
Processed model from folder: ../models/model_es_Unigram_vs40000


Some weights of BertModel were not initialized from the model checkpoint at ../models/model_es_Wordpiece_vs10000\checkpoint-583 and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Number of parameters: 7.95M
Processed model from folder: ../models/model_es_Wordpiece_vs10000


Some weights of BertModel were not initialized from the model checkpoint at ../models/model_es_Wordpiece_vs20000\checkpoint-583 and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Number of parameters: 11.07M
Processed model from folder: ../models/model_es_Wordpiece_vs20000


Some weights of BertModel were not initialized from the model checkpoint at ../models/model_es_Wordpiece_vs30000\checkpoint-583 and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Number of parameters: 14.19M
Processed model from folder: ../models/model_es_Wordpiece_vs30000


Some weights of BertModel were not initialized from the model checkpoint at ../models/model_es_Wordpiece_vs40000\checkpoint-583 and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Number of parameters: 17.31M
Processed model from folder: ../models/model_es_Wordpiece_vs40000


Some weights of BertModel were not initialized from the model checkpoint at ../models/model_tr_BPE_vs10000\checkpoint-583 and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Number of parameters: 7.95M
Processed model from folder: ../models/model_tr_BPE_vs10000


Some weights of BertModel were not initialized from the model checkpoint at ../models/model_tr_BPE_vs20000\checkpoint-583 and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Number of parameters: 11.07M
Processed model from folder: ../models/model_tr_BPE_vs20000


Some weights of BertModel were not initialized from the model checkpoint at ../models/model_tr_BPE_vs30000\checkpoint-583 and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Number of parameters: 14.19M
Processed model from folder: ../models/model_tr_BPE_vs30000


Some weights of BertModel were not initialized from the model checkpoint at ../models/model_tr_BPE_vs40000\checkpoint-583 and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertModel were not initialized from the model checkpoint at ../models/model_tr_Unigram_vs10000\checkpoint-583 and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Number of parameters: 17.31M
Processed model from folder: ../models/model_tr_BPE_vs40000
Number of parameters: 7.95M
Processed model from folder: ../models/model_tr_Unigram_vs10000


Some weights of BertModel were not initialized from the model checkpoint at ../models/model_tr_Unigram_vs20000\checkpoint-583 and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Number of parameters: 11.07M
Processed model from folder: ../models/model_tr_Unigram_vs20000


Some weights of BertModel were not initialized from the model checkpoint at ../models/model_tr_Unigram_vs30000\checkpoint-583 and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Number of parameters: 14.19M
Processed model from folder: ../models/model_tr_Unigram_vs30000


Some weights of BertModel were not initialized from the model checkpoint at ../models/model_tr_Unigram_vs40000\checkpoint-583 and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Number of parameters: 17.31M
Processed model from folder: ../models/model_tr_Unigram_vs40000


Some weights of BertModel were not initialized from the model checkpoint at ../models/model_tr_Wordpiece_vs10000\checkpoint-583 and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Number of parameters: 7.95M
Processed model from folder: ../models/model_tr_Wordpiece_vs10000


Some weights of BertModel were not initialized from the model checkpoint at ../models/model_tr_Wordpiece_vs20000\checkpoint-583 and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Number of parameters: 11.07M
Processed model from folder: ../models/model_tr_Wordpiece_vs20000


Some weights of BertModel were not initialized from the model checkpoint at ../models/model_tr_Wordpiece_vs30000\checkpoint-583 and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Number of parameters: 14.19M
Processed model from folder: ../models/model_tr_Wordpiece_vs30000


Some weights of BertModel were not initialized from the model checkpoint at ../models/model_tr_Wordpiece_vs40000\checkpoint-583 and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Number of parameters: 17.31M
Processed model from folder: ../models/model_tr_Wordpiece_vs40000
