In [1]:
import torch
from torch import nn
from transformers import AutoModel, AutoConfig
from pooling import *

In [2]:
class CustomModel(nn.Module):
    def __init__(self, model, pooling_type, hidden_size=None, config_path=None, pretrained=False):
        super().__init__()
        self.pooling_type = pooling_type
        
        if config_path is None:
            self.config = AutoConfig.from_pretrained(model, output_hidden_states=True)
            self.config.hidden_dropout = 0.
            self.config.hidden_dropout_prob = 0.
            self.config.attention_dropout = 0.
            self.config.attention_probs_dropout_prob = 0.
        else:
            self.config = torch.load(config_path)
            
        if pretrained:
            self.backbone = AutoModel.from_pretrained(model, config=self.config)
        else:
            self.backbone = AutoModel.from_config(self.config)
        
        if pooling_type == 'MeanPooling':
            self.pool = MeanPooling()
        elif pooling_type == 'WeightedLayerPooling':
            self.pool = WeightedLayerPooling(self.config.num_hidden_layers)
        elif pooling_type == 'LSTMPooling':
            self.pool =  LSTMPooling(self.config.num_hidden_layers,
                                       self.config.hidden_size,
                                       hidden_size,
                                       0.1,
                                       is_lstm=True
                           )
        else:
            raise ValueError('Unknown pooling type')
        
        
        if pooling_type == 'GRUPooling':
            self.fc = nn.Linear(hidden_size, 6)
        elif pooling_type == 'LSTMPooling':
            self.fc = nn.Linear(hidden_size, 6)
        else:
            self.fc = nn.Linear(self.config.hidden_size, 6)
        
    def feature(self, inputs):
        outputs = self.backbone(**inputs)
        
        last_hidden_states = outputs[0]
        
        if self.pooling_type == 'MeanPooling':
            feature = self.pool(last_hidden_states, inputs['attention_mask'])
        elif self.pooling_type == 'WeightedLayerPooling':
            all_hidden_states = torch.stack(outputs[1])
            feature = self.pool(all_hidden_states)
        elif self.pooling_type in ['GRUPooling', 'LSTMPooling']:
            all_hidden_states = torch.stack(outputs[1])
            feature = self.pool(all_hidden_states)
        else:
            raise ValueError('Unknown pooling type')
        
        return outputs, feature

    def forward(self, inputs):
        original_outputs, feature = self.feature(inputs)
        output = self.fc(feature)
        return original_outputs, output

In [3]:
m1 = CustomModel("microsoft/deberta-v3-large", "MeanPooling", hidden_size=None, config_path="../../../input/model23/config.pth", pretrained=False)

In [4]:
def freeze(module):
    for parameter in module.parameters():
        parameter.requires_grad = False

In [33]:
a = list(m1.backbone.modules())


In [34]:
len(a)

465

In [35]:
a[-1]

LayerNorm((1024,), eps=1e-07, elementwise_affine=True)

In [36]:
a[-2]

Embedding(512, 1024)

In [42]:
freeze(m1)

In [70]:
m1.fc

Linear(in_features=1024, out_features=6, bias=True)

In [71]:
from torch import nn

m1.fc = nn.Linear(in_features=1024, out_features=1, bias=True)

In [73]:
list(m1.fc.parameters())[0].requires_grad

True

In [79]:
list(m1.backbone.encoder.layer[0].attention.parameters())[0].requires_grad

False