In [11]:
from transformers import pipeline
from transformers.tokenization_utils import TruncationStrategy

import tokenizers
import pandas as pd
import requests



models = {
    "AlephBERT-base": {
        "name_or_path":"onlplab/alephbert-base",
        "description":"AlephBERT base model",
    },
    "HeBERT-base-TAU": {
        "name_or_path":"avichr/heBERT",
        "description":"HeBERT model created by TAU"
    },
    "mBERT-base-multilingual-cased": {
        "name_or_path":"bert-base-multilingual-cased",
        "description":"Multilingual BERT model"
    }
}

def get_json_from_url(url):
    return models
    return requests.get(url).json()

# models = get_json_from_url('https://huggingface.co/spaces/biu-nlp/AlephBERT/raw/main/models.json')



def load_model(model):
    pipe = pipeline('fill-mask', models[model]['name_or_path'])
    def do_tokenize(inputs):
        return pipe.tokenizer(
                inputs,
                add_special_tokens=True,
                return_tensors=pipe.framework,
                padding=True,
                truncation=TruncationStrategy.DO_NOT_TRUNCATE,
            )

    def _parse_and_tokenize(
        inputs, tokenized=False, **kwargs
    ):
        if not tokenized:
            inputs = do_tokenize(inputs)
        return inputs

    pipe._parse_and_tokenize = _parse_and_tokenize
    
    return pipe, do_tokenize






mode = 'Models'

if mode == 'Models':
    model = "AlephBERT-base"
    masking_level = 'Tokens'
    
    model_tags = model.split('-')
    model_tags[0] = 'Model:' + model_tags[0] 


    unmasker, tokenize = load_model(model)
    input_text = " [MASK] אתה טיפש "      
    input_masked = None
    tokenized = tokenize(input_text)
    ids = tokenized['input_ids'].tolist()[0]
    subwords = unmasker.tokenizer.convert_ids_to_tokens(ids)

    if masking_level == 'Tokens':
        tokens = str(input_text).split()
        mask_idx =  '[MASK]'
        if mask_idx is not None:
            input_masked = ' '.join(token if i != mask_idx else '[MASK]' for i, token in enumerate(tokens))
            display_input = input_masked
    if input_masked: 
        ids = tokenized['input_ids'].tolist()[0]
        subwords = unmasker.tokenizer.convert_ids_to_tokens(ids)
        res = unmasker(input_masked,  top_k=5)
        if res:
            print(res)
#             res = [{'Prediction':r['token_str'], 'Completed Sentence':r['sequence'].replace('[SEP]', '').replace('[CLS]', ''), 'Score':r['score']} for r in res]
#             res_table = pd.DataFrame(res)
#             st.table(res_table)











[{'sequence': 'כי אתה טיפש', 'score': 0.4186239242553711, 'token': 1919, 'token_str': 'כי'}, {'sequence': 'כמה אתה טיפש', 'score': 0.05661720782518387, 'token': 2146, 'token_str': 'כמה'}, {'sequence': 'גם אתה טיפש', 'score': 0.030103864148259163, 'token': 1929, 'token_str': 'גם'}, {'sequence': 'אז אתה טיפש', 'score': 0.02940039522945881, 'token': 2039, 'token_str': 'אז'}, {'sequence': 'למה אתה טיפש', 'score': 0.02271442301571369, 'token': 2211, 'token_str': 'למה'}]


In [12]:
res[0]

{'sequence': 'כי אתה טיפש',
 'score': 0.4186239242553711,
 'token': 1919,
 'token_str': 'כי'}

In [25]:
import pandas as pd
import numpy as np
import transformers
import torch
import torch.nn as nn
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torch.optim as optim
import torch.nn.functional as F
from tqdm import tqdm
from transformers import BertModel, BertTokenizerFast
alephbert_tokenizer = BertTokenizerFast.from_pretrained('onlplab/alephbert-base')

class Encoder(nn.Module):
    def __init__(self):
        super(Encoder, self).__init__()
        self.alephbert_model = BertModel.from_pretrained("onlplab/alephbert-base")
        
    def forward(self,ids):
#         _,o2= self.alephbert_model(ids,attention_mask=mask,token_type_ids=token_type_ids, return_dict=False)
        
#         out = self.out(o2)
        
        return self.alephbert_model(ids)
    
model=Encoder()

loss_fn = nn.BCEWithLogitsLoss()

#Initialize Optimizer
optimizer= optim.Adam(model.parameters(),lr= 0.0001)


Some weights of the model checkpoint at onlplab/alephbert-base were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight

In [26]:
class Decoder(nn.Module):
    def __init__(self):
        super(Decoder, self).__init__()
        self.alephbert_model = BertModel.from_pretrained("onlplab/alephbert-base")
        #modeling_utils.ModuleUtilsMixin.get_extended_attention_mask(self,[0],3,device)

    def forward(self,ids):
#         _,o2= self.alephbert_model(ids,attention_mask=mask,token_type_ids=token_type_ids, return_dict=False)
        
#         out = self.out(o2)
        
        return self.alephbert_model(ids)
    
model_dec=Decoder()


Some weights of the model checkpoint at onlplab/alephbert-base were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight

AttributeError: 'list' object has no attribute 'dim'

In [10]:
summary(BertModel.from_pretrained("onlplab/alephbert-base"))

Some weights of the model checkpoint at onlplab/alephbert-base were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight

Layer (type:depth-idx)                        Param #
├─BertEmbeddings: 1-1                         --
|    └─Embedding: 2-1                         39,936,000
|    └─Embedding: 2-2                         393,216
|    └─Embedding: 2-3                         768
|    └─LayerNorm: 2-4                         1,536
|    └─Dropout: 2-5                           --
├─BertEncoder: 1-2                            --
|    └─ModuleList: 2-6                        --
|    |    └─BertLayer: 3-1                    7,087,872
|    |    └─BertLayer: 3-2                    7,087,872
|    |    └─BertLayer: 3-3                    7,087,872
|    |    └─BertLayer: 3-4                    7,087,872
|    |    └─BertLayer: 3-5                    7,087,872
|    |    └─BertLayer: 3-6                    7,087,872
|    |    └─BertLayer: 3-7                    7,087,872
|    |    └─BertLayer: 3-8                    7,087,872
|    |    └─BertLayer: 3-9                    7,087,872
|    |    └─BertLayer: 3-10      

Layer (type:depth-idx)                        Param #
├─BertEmbeddings: 1-1                         --
|    └─Embedding: 2-1                         39,936,000
|    └─Embedding: 2-2                         393,216
|    └─Embedding: 2-3                         768
|    └─LayerNorm: 2-4                         1,536
|    └─Dropout: 2-5                           --
├─BertEncoder: 1-2                            --
|    └─ModuleList: 2-6                        --
|    |    └─BertLayer: 3-1                    7,087,872
|    |    └─BertLayer: 3-2                    7,087,872
|    |    └─BertLayer: 3-3                    7,087,872
|    |    └─BertLayer: 3-4                    7,087,872
|    |    └─BertLayer: 3-5                    7,087,872
|    |    └─BertLayer: 3-6                    7,087,872
|    |    └─BertLayer: 3-7                    7,087,872
|    |    └─BertLayer: 3-8                    7,087,872
|    |    └─BertLayer: 3-9                    7,087,872
|    |    └─BertLayer: 3-10      

In [11]:
summary(BertModel.from_pretrained("avichr/heBERT"))

HBox(children=(HTML(value='Downloading'), FloatProgress(value=0.0, max=505.0), HTML(value='')))




HBox(children=(HTML(value='Downloading'), FloatProgress(value=0.0, max=438146887.0), HTML(value='')))




Some weights of the model checkpoint at avichr/heBERT were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were not initialized from the model checkpoint at avichr/heBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
You should prob

Layer (type:depth-idx)                        Param #
├─BertEmbeddings: 1-1                         --
|    └─Embedding: 2-1                         23,440,896
|    └─Embedding: 2-2                         393,216
|    └─Embedding: 2-3                         1,536
|    └─LayerNorm: 2-4                         1,536
|    └─Dropout: 2-5                           --
├─BertEncoder: 1-2                            --
|    └─ModuleList: 2-6                        --
|    |    └─BertLayer: 3-1                    7,087,872
|    |    └─BertLayer: 3-2                    7,087,872
|    |    └─BertLayer: 3-3                    7,087,872
|    |    └─BertLayer: 3-4                    7,087,872
|    |    └─BertLayer: 3-5                    7,087,872
|    |    └─BertLayer: 3-6                    7,087,872
|    |    └─BertLayer: 3-7                    7,087,872
|    |    └─BertLayer: 3-8                    7,087,872
|    |    └─BertLayer: 3-9                    7,087,872
|    |    └─BertLayer: 3-10    

Layer (type:depth-idx)                        Param #
├─BertEmbeddings: 1-1                         --
|    └─Embedding: 2-1                         23,440,896
|    └─Embedding: 2-2                         393,216
|    └─Embedding: 2-3                         1,536
|    └─LayerNorm: 2-4                         1,536
|    └─Dropout: 2-5                           --
├─BertEncoder: 1-2                            --
|    └─ModuleList: 2-6                        --
|    |    └─BertLayer: 3-1                    7,087,872
|    |    └─BertLayer: 3-2                    7,087,872
|    |    └─BertLayer: 3-3                    7,087,872
|    |    └─BertLayer: 3-4                    7,087,872
|    |    └─BertLayer: 3-5                    7,087,872
|    |    └─BertLayer: 3-6                    7,087,872
|    |    └─BertLayer: 3-7                    7,087,872
|    |    └─BertLayer: 3-8                    7,087,872
|    |    └─BertLayer: 3-9                    7,087,872
|    |    └─BertLayer: 3-10    

In [None]:
class BertDataset(Dataset):
    def __init__(self, tokenizer,max_length):
        super(BertDataset, self).__init__()
        self.root_dir=root_dir
        self.train_csv=pd.read_csv('https://github.com/clairett/pytorch-sentiment-classification/raw/master/data/SST2/train.tsv', delimiter='\t', header=None)
        self.tokenizer=tokenizer
        self.target=self.train_csv.iloc[:,1]
        self.max_length=max_length
        
    def __len__(self):
        return len(self.train_csv)
    
    def __getitem__(self, index):
        
        text1 = self.train_csv.iloc[index,0]
        
        inputs = self.tokenizer.encode_plus(
            text1 ,
            None,
            pad_to_max_length=True,
            add_special_tokens=True,
            return_attention_mask=True,
            max_length=self.max_length,
        )
        ids = inputs["input_ids"]
        token_type_ids = inputs["token_type_ids"]
        mask = inputs["attention_mask"]

        return {
            'ids': torch.tensor(ids, dtype=torch.long),
            'mask': torch.tensor(mask, dtype=torch.long),
            'token_type_ids': torch.tensor(token_type_ids, dtype=torch.long),
            'target': torch.tensor(self.train_csv.iloc[index, 1], dtype=torch.long)
            }
tokenizer = transformers.BertTokenizer.from_pretrained("bert-base-uncased")

dataset= BertDataset(tokenizer, max_length=100)

dataloader=DataLoader(dataset=dataset,batch_size=32)


In [23]:
encoded_review = alephbert_tokenizer.encode_plus(
"[MASK] הלכתי אתמול",
return_tensors='pt',
)

In [24]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

input_ids = encoded_review['input_ids'].to(device)
output = model(input_ids)


In [27]:
_, prediction = torch.max(output[1], dim=1)
print(prediction)


tensor([742])


In [5]:
from torchsummary import summary

In [19]:
from transformers import modeling_utils

In [20]:
modeling_utils.ModuleUtilsMixin.get_extended_attention_mask()

TypeError: get_extended_attention_mask() missing 4 required positional arguments: 'self', 'attention_mask', 'input_shape', and 'device'