In [1]:
from transformers import BertModel
from transformers import BertTokenizer

import torch

In [2]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [3]:
class TweetBERTTail(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.pooler = torch.nn.Sequential(
            torch.nn.Linear(768, 2048),
            torch.nn.ReLU(),
            torch.nn.Linear(2048, 2048),
            torch.nn.ReLU(),
            torch.nn.Linear(2048, 768)
            #torch.nn.Tanh()
        )

    def forward(self, x):
        return self.pooler(x)

In [9]:
class PredictionModel(torch.nn.Module):
    def __init__(self, model_path: str = None):
        super().__init__()
        self.tokenizer = BertTokenizer.from_pretrained("distilbert-base-cased")
        self.embeding_model = BertModel.from_pretrained("distilbert-base-cased").to(device)
        self.tail = TweetBERTTail().to(device)

        if model_path is not None:
            self.tail.load_state_dict(model_path)

    def forward(self, x):
        tokens = self.tokenizer(x, return_tensors="pt").to(device)
        embeddings = self.embeding_model(**tokens).pooler_output
        embeddings = embeddings.squeeze()
        return self.tail(embeddings)

In [10]:
model = PredictionModel()

The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'DistilBertTokenizer'. 
The class this function is called from is 'BertTokenizer'.
You are using a model of type distilbert to instantiate a model of type bert. This is not supported for all configurations of models and can yield errors.
Some weights of the model checkpoint at distilbert-base-cased were not used when initializing BertModel: ['distilbert.transformer.layer.4.attention.out_lin.weight', 'distilbert.transformer.layer.2.output_layer_norm.bias', 'distilbert.transformer.layer.1.ffn.lin1.weight', 'distilbert.transformer.layer.1.attention.k_lin.bias', 'distilbert.transformer.layer.0.attention.q_lin.weight', 'distilbert.transformer.layer.2.attention.out_lin.bias', 'distilbert.transformer.layer.2.attention.q_lin.bias', 'distilbert.transformer.layer.5.sa_layer_norm.weight', '

In [13]:
model("Marcin ma własne zdanie :)")

tensor([-5.1753e-02,  8.6723e-03,  1.8457e-02,  1.0705e-01,  4.7387e-02,
        -1.2550e-02, -4.1381e-02,  1.7415e-02, -9.4474e-03,  2.6718e-02,
        -6.1540e-02,  4.6875e-02,  1.2291e-02, -1.4739e-02,  6.5017e-03,
        -3.3910e-02, -2.3421e-02,  3.3783e-03, -4.5979e-02,  9.6297e-02,
         1.1235e-02, -6.0300e-02,  5.7197e-03, -5.0415e-02, -1.8855e-02,
         2.5474e-03,  2.9027e-02, -1.5125e-02, -7.9652e-03,  2.8523e-02,
         5.0679e-02, -3.9052e-02, -6.1008e-02,  1.8436e-02,  3.9764e-02,
        -1.1704e-02, -3.7170e-02,  2.9829e-02, -2.1245e-02,  5.8740e-02,
         2.2992e-02,  7.9015e-02, -9.8385e-05, -6.1546e-03,  1.1343e-02,
         9.2477e-03, -5.4877e-02, -5.5621e-02,  1.7883e-02,  1.1044e-01,
        -4.7329e-02, -1.3617e-02,  4.5889e-02,  4.9609e-02,  7.5569e-02,
        -5.7605e-02,  5.4447e-03, -4.9616e-02, -7.9300e-03, -7.9615e-02,
         1.0651e-02,  9.7496e-03,  1.8499e-02,  5.2637e-02,  4.3963e-03,
         6.2632e-02,  3.2581e-02, -2.4365e-02,  7.0