In [1]:
import os

os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
gpu_index = '0'
os.environ['CUDA_VISIBLE_DEVICES'] = gpu_index
os.environ['HF_HOME'] = '../huggingface_cache/'      # Cache directory for huggingface.

In [2]:
import torch
from transformers import AutoConfig

In [3]:
device: torch.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [4]:
base_dir = './pretrained_weights'

#### ==================================================

### vit-tiny-patch16-224

In [5]:
from transformers import ViTModel
from transformers.models.vit.modeling_vit import ViTLayer

vit_enc_name = 'WinKawaks/vit-tiny-patch16-224'
vit_enc = ViTModel.from_pretrained(vit_enc_name)

Some weights of ViTModel were not initialized from the model checkpoint at WinKawaks/vit-tiny-patch16-224 and are newly initialized: ['vit.pooler.dense.bias', 'vit.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [6]:
vit_weight_dir = os.path.join(base_dir, vit_enc_name.split('/')[-1])
os.makedirs(vit_weight_dir, exist_ok=True)
vit_weight_dir

'./pretrained_weights/vit-tiny-patch16-224'

In [7]:
vit_enc = vit_enc.to(device)
vit_enc

ViTModel(
  (embeddings): ViTEmbeddings(
    (patch_embeddings): ViTPatchEmbeddings(
      (projection): Conv2d(3, 192, kernel_size=(16, 16), stride=(16, 16))
    )
    (dropout): Dropout(p=0.0, inplace=False)
  )
  (encoder): ViTEncoder(
    (layer): ModuleList(
      (0-11): 12 x ViTLayer(
        (attention): ViTSdpaAttention(
          (attention): ViTSdpaSelfAttention(
            (query): Linear(in_features=192, out_features=192, bias=True)
            (key): Linear(in_features=192, out_features=192, bias=True)
            (value): Linear(in_features=192, out_features=192, bias=True)
            (dropout): Dropout(p=0.0, inplace=False)
          )
          (output): ViTSelfOutput(
            (dense): Linear(in_features=192, out_features=192, bias=True)
            (dropout): Dropout(p=0.0, inplace=False)
          )
        )
        (intermediate): ViTIntermediate(
          (dense): Linear(in_features=192, out_features=768, bias=True)
          (intermediate_act_fn): GELUActi

In [10]:
# Save the weights seperately for later use.
torch.save(vit_enc.embeddings.state_dict(), os.path.join(vit_weight_dir, 'vit_embeddings.pth'))

for idx, layer in enumerate(vit_enc.encoder.layer):
    torch.save(layer.state_dict(), os.path.join(vit_weight_dir, 'vit_layer_{}.pth'.format(idx)))

torch.save(vit_enc.layernorm.state_dict(), os.path.join(vit_weight_dir, 'vit_layernorm.pth'))

#### ==================================================

### microsoft/resnet-18

In [19]:
from transformers import ResNetModel

resnet18_enc_name = 'microsoft/resnet-18'
resnet18_enc = ResNetModel.from_pretrained(resnet18_enc_name)

In [20]:
resnet18_weight_dir = os.path.join(base_dir, resnet18_enc_name.split('/')[-1])
os.makedirs(resnet18_weight_dir, exist_ok=True)
resnet18_weight_dir

'./pretrained_weights/resnet-18'

In [21]:
resnet18_enc = resnet18_enc.to(device)
resnet18_enc

ResNetModel(
  (embedder): ResNetEmbeddings(
    (embedder): ResNetConvLayer(
      (convolution): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (normalization): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (activation): ReLU()
    )
    (pooler): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  )
  (encoder): ResNetEncoder(
    (stages): ModuleList(
      (0): ResNetStage(
        (layers): Sequential(
          (0): ResNetBasicLayer(
            (shortcut): Identity()
            (layer): Sequential(
              (0): ResNetConvLayer(
                (convolution): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
                (normalization): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
                (activation): ReLU()
              )
              (1): ResNetConvLayer(
                (convolution): Conv2d(6

In [26]:
# Save the weights seperately for later use.
torch.save(resnet18_enc.embedder.state_dict(), os.path.join(resnet18_weight_dir, 'embedder.pth'))

for idx, layer in enumerate(resnet18_enc.encoder.stages):
    torch.save(layer.state_dict(), os.path.join(resnet18_weight_dir, 'stage_{}.pth'.format(idx)))

#### ==================================================

### microsoft/resnet-34

In [11]:
from transformers import ResNetModel

resnet34_enc_name = 'microsoft/resnet-34'
resnet34_enc = ResNetModel.from_pretrained(resnet34_enc_name)

In [12]:
resnet34_weight_dir = os.path.join(base_dir, resnet34_enc_name.split('/')[-1])
os.makedirs(resnet34_weight_dir, exist_ok=True)
resnet34_weight_dir

'./pretrained_weights/resnet-34'

In [13]:
resnet34_enc

ResNetModel(
  (embedder): ResNetEmbeddings(
    (embedder): ResNetConvLayer(
      (convolution): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (normalization): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (activation): ReLU()
    )
    (pooler): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  )
  (encoder): ResNetEncoder(
    (stages): ModuleList(
      (0): ResNetStage(
        (layers): Sequential(
          (0): ResNetBasicLayer(
            (shortcut): Identity()
            (layer): Sequential(
              (0): ResNetConvLayer(
                (convolution): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
                (normalization): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
                (activation): ReLU()
              )
              (1): ResNetConvLayer(
                (convolution): Conv2d(6

In [14]:
# Save the weights seperately for later use.
torch.save(resnet34_enc.embedder.state_dict(), os.path.join(resnet34_weight_dir, 'embedder.pth'))

for idx, layer in enumerate(resnet34_enc.encoder.stages):
    torch.save(layer.state_dict(), os.path.join(resnet34_weight_dir, 'stage_{}.pth'.format(idx)))

#### ==================================================

### distilbert-base-uncased

In [11]:
from transformers import DistilBertModel, DistilBertTokenizer
from transformers.models.distilbert.modeling_distilbert import TransformerBlock

distilbert_enc_name = 'distilbert-base-uncased'
distilbert_tokenizer = DistilBertTokenizer.from_pretrained(distilbert_enc_name)
distilbert_enc = DistilBertModel.from_pretrained(distilbert_enc_name)

In [12]:
distilbert_weight_dir = os.path.join(base_dir, distilbert_enc_name.split('/')[-1])
os.makedirs(distilbert_weight_dir, exist_ok=True)
distilbert_weight_dir

'./pretrained_weights/distilbert-base-uncased'

In [13]:
distilbert_enc = distilbert_enc.to(device)
distilbert_enc

DistilBertModel(
  (embeddings): Embeddings(
    (word_embeddings): Embedding(30522, 768, padding_idx=0)
    (position_embeddings): Embedding(512, 768)
    (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (transformer): Transformer(
    (layer): ModuleList(
      (0-5): 6 x TransformerBlock(
        (attention): MultiHeadSelfAttention(
          (dropout): Dropout(p=0.1, inplace=False)
          (q_lin): Linear(in_features=768, out_features=768, bias=True)
          (k_lin): Linear(in_features=768, out_features=768, bias=True)
          (v_lin): Linear(in_features=768, out_features=768, bias=True)
          (out_lin): Linear(in_features=768, out_features=768, bias=True)
        )
        (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
        (ffn): FFN(
          (dropout): Dropout(p=0.1, inplace=False)
          (lin1): Linear(in_features=768, out_features=3072, bias=True)
          (lin2): Li

In [14]:
# Save the weights seperately for later use.
torch.save(distilbert_enc.embeddings.state_dict(), os.path.join(distilbert_weight_dir, 'distilbert_embeddings.pth'))

for idx, layer in enumerate(distilbert_enc.transformer.layer):
    torch.save(layer.state_dict(), os.path.join(distilbert_weight_dir, 'distilbert_layer_{}.pth'.format(idx)))

#### ==================================================

### google-bert/bert-base-uncased

In [5]:
from transformers import BertModel

bert_enc_name = 'google-bert/bert-base-uncased'
bert_enc = BertModel.from_pretrained(bert_enc_name)

In [6]:
bert_weight_dir = os.path.join(base_dir, bert_enc_name.split('/')[-1])
os.makedirs(bert_weight_dir, exist_ok=True)
bert_weight_dir

'./pretrained_weights/bert-base-uncased'

In [7]:
bert_enc = bert_enc.to(device)
bert_enc

BertModel(
  (embeddings): BertEmbeddings(
    (word_embeddings): Embedding(30522, 768, padding_idx=0)
    (position_embeddings): Embedding(512, 768)
    (token_type_embeddings): Embedding(2, 768)
    (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): BertEncoder(
    (layer): ModuleList(
      (0): BertLayer(
        (attention): BertAttention(
          (self): BertSelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (output): BertSelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False)
          

In [8]:
# Save the weights seperately for later use.
torch.save(bert_enc.embeddings.state_dict(), os.path.join(bert_weight_dir, 'bert_embeddings.pth'))

for idx, layer in enumerate(bert_enc.encoder.layer):
    torch.save(layer.state_dict(), os.path.join(bert_weight_dir, 'bert_layer_{}.pth'.format(idx)))

#### ==================================================

### 'google/mobilebert-uncased' 

In [13]:
from transformers import MobileBertModel

mobilebert_enc_name = 'google/mobilebert-uncased'
mobilebert_enc = MobileBertModel.from_pretrained(mobilebert_enc_name)

In [14]:
mobilebert_weight_dir = os.path.join(base_dir, mobilebert_enc_name.split('/')[-1])
os.makedirs(mobilebert_weight_dir, exist_ok=True)
mobilebert_weight_dir

'./pretrained_weights/mobilebert-uncased'

In [15]:
mobilebert_enc = mobilebert_enc.to(device)
mobilebert_enc

MobileBertModel(
  (embeddings): MobileBertEmbeddings(
    (word_embeddings): Embedding(30522, 128, padding_idx=0)
    (position_embeddings): Embedding(512, 512)
    (token_type_embeddings): Embedding(2, 512)
    (embedding_transformation): Linear(in_features=384, out_features=512, bias=True)
    (LayerNorm): NoNorm()
    (dropout): Dropout(p=0.0, inplace=False)
  )
  (encoder): MobileBertEncoder(
    (layer): ModuleList(
      (0): MobileBertLayer(
        (attention): MobileBertAttention(
          (self): MobileBertSelfAttention(
            (query): Linear(in_features=128, out_features=128, bias=True)
            (key): Linear(in_features=128, out_features=128, bias=True)
            (value): Linear(in_features=512, out_features=128, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (output): MobileBertSelfOutput(
            (dense): Linear(in_features=128, out_features=128, bias=True)
            (LayerNorm): NoNorm()
          )
        )
    

In [18]:
# Save the weights seperately for later use.
torch.save(mobilebert_enc.embeddings.state_dict(), os.path.join(mobilebert_weight_dir, 'mobilebert_embeddings.pth'))

for idx, layer in enumerate(mobilebert_enc.encoder.layer):
    torch.save(layer.state_dict(), os.path.join(mobilebert_weight_dir, 'mobilebert_layer_{}.pth'.format(idx)))

#### ==================================================

### ast-finetuned-audioset-10-10-0.4593-finetuning-ESC-50-slower-LR

In [5]:
from transformers import ASTModel

# ast_enc_name = 'xpariz10/ast-finetuned-audioset-10-10-0.4593-finetuning-ESC-50-slower-LR'
ast_enc_name = 'MIT/ast-finetuned-audioset-10-10-0.4593'
ast_enc = ASTModel.from_pretrained(ast_enc_name)

In [6]:
ast_weight_dir = os.path.join(base_dir, ast_enc_name.split('/')[-1])
os.makedirs(ast_weight_dir, exist_ok=True)
ast_weight_dir

'./pretrained_weights/ast-finetuned-audioset-10-10-0.4593'

In [None]:
ast_enc = ast_enc.to(device)
ast_enc

In [7]:
# Save the weights seperately for later use.
torch.save(ast_enc.embeddings.state_dict(), os.path.join(ast_weight_dir, 'ast_embeddings.pth'))

for idx, layer in enumerate(ast_enc.encoder.layer):
    torch.save(layer.state_dict(), os.path.join(ast_weight_dir, 'ast_layer_{}.pth'.format(idx)))

torch.save(ast_enc.layernorm.state_dict(), os.path.join(ast_weight_dir, 'ast_layernorm.pth'))

#### ==================================================

### bookbot/distil-ast-audioset

In [5]:
from transformers import ASTModel

ast_enc_name = 'bookbot/distil-ast-audioset'
ast_enc = ASTModel.from_pretrained(ast_enc_name)

In [6]:
ast_weight_dir = os.path.join(base_dir, ast_enc_name.split('/')[-1])
os.makedirs(ast_weight_dir, exist_ok=True)
ast_weight_dir

'./pretrained_weights/distil-ast-audioset'

In [7]:
ast_enc = ast_enc.to(device)
ast_enc

ASTModel(
  (embeddings): ASTEmbeddings(
    (patch_embeddings): ASTPatchEmbeddings(
      (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
    )
    (dropout): Dropout(p=0.0, inplace=False)
  )
  (encoder): ASTEncoder(
    (layer): ModuleList(
      (0): ASTLayer(
        (attention): ASTAttention(
          (attention): ASTSelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.0, inplace=False)
          )
          (output): ASTSelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.0, inplace=False)
          )
        )
        (intermediate): ASTIntermediate(
          (dense): Linear(in_features=768, out_features=3072, bias=True)
          (intermediate_act_fn): GELUActivation()
      

In [8]:
# Save the weights seperately for later use.
torch.save(ast_enc.embeddings.state_dict(), os.path.join(ast_weight_dir, 'ast_embeddings.pth'))

for idx, layer in enumerate(ast_enc.encoder.layer):
    torch.save(layer.state_dict(), os.path.join(ast_weight_dir, 'ast_layer_{}.pth'.format(idx)))

torch.save(ast_enc.layernorm.state_dict(), os.path.join(ast_weight_dir, 'ast_layernorm.pth'))

#### ==================================================

In [15]:
from transformers import ASTModel

ast_enc_name = 'MIT/ast-finetuned-audioset-10-10-0.4593'
ast_enc = ASTModel.from_pretrained(ast_enc_name)

model.safetensors:   0%|          | 0.00/346M [00:00<?, ?B/s]

In [16]:
ast_weight_dir = os.path.join(base_dir, ast_enc_name.split('/')[-1])
os.makedirs(ast_weight_dir, exist_ok=True)
ast_weight_dir

'./pretrained_weights/ast-finetuned-audioset-10-10-0.4593'

In [17]:
# Save the weights seperately for later use.
torch.save(ast_enc.embeddings.state_dict(), os.path.join(ast_weight_dir, 'ast_embeddings.pth'))

for idx, layer in enumerate(ast_enc.encoder.layer):
    torch.save(layer.state_dict(), os.path.join(ast_weight_dir, 'ast_layer_{}.pth'.format(idx)))

torch.save(ast_enc.layernorm.state_dict(), os.path.join(ast_weight_dir, 'ast_layernorm.pth'))