In [1]:
import tensorflow as tf
from transformers import AutoTokenizer,TFAutoModelForSequenceClassification
import numpy as np

In [3]:
checkpoint='distilbert-base-uncased-finetuned-sst-2-english'
tokenizer=AutoTokenizer.from_pretrained(checkpoint)
model=TFAutoModelForSequenceClassification.from_pretrained(checkpoint)

All model checkpoint layers were used when initializing TFDistilBertForSequenceClassification.

All the layers of TFDistilBertForSequenceClassification were initialized from the model checkpoint at distilbert-base-uncased-finetuned-sst-2-english.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFDistilBertForSequenceClassification for predictions without further training.


In [35]:
sents="I hate you"

In [36]:
tokens=tokenizer.tokenize(sents)
ids=tokenizer.convert_tokens_to_ids(tokens)

In [37]:
ids

[1045, 5223, 2017]

In [38]:
input_ids=tf.constant([ids])
output_logits=model(input_ids).logits

In [39]:
output_logits

<tf.Tensor: shape=(1, 2), dtype=float32, numpy=array([[-0.98728615,  1.1817394 ]], dtype=float32)>

In [43]:
tf.argmax(tf.math.softmax(output_logits),axis=1)

<tf.Tensor: shape=(1,), dtype=int64, numpy=array([1], dtype=int64)>

In [41]:
model.config.id2label

{0: 'NEGATIVE', 1: 'POSITIVE'}

In [62]:
sents=["I've been waiting for a HuggingFace course my whole life.","I hate this so much",'I Like you soo much']

In [63]:
tokens=[tokenizer.tokenize(sent) for sent in sents]
ids=[tokenizer.convert_tokens_to_ids(token) for token in tokens]

In [64]:
max_input_length=0
for input_id in ids:
    if len(input_id)>max_input_length:
        max_input_length=len(input_id)
for idx,input_id in enumerate(ids):
    if len(input_id)<max_input_length:
        pad_length=max_input_length-len(input_id)
        ids[idx]+=list(np.full((1,pad_length),tokenizer.pad_token_id,dtype=int).ravel())

In [65]:
ids

[[1045,
  1005,
  2310,
  2042,
  3403,
  2005,
  1037,
  17662,
  12172,
  2607,
  2026,
  2878,
  2166,
  1012],
 [1045, 5223, 2023, 2061, 2172, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [1045, 2066, 2017, 17111, 2172, 0, 0, 0, 0, 0, 0, 0, 0, 0]]

In [66]:
input_ids=tf.constant(ids)

In [67]:
input_ids

<tf.Tensor: shape=(3, 14), dtype=int32, numpy=
array([[ 1045,  1005,  2310,  2042,  3403,  2005,  1037, 17662, 12172,
         2607,  2026,  2878,  2166,  1012],
       [ 1045,  5223,  2023,  2061,  2172,     0,     0,     0,     0,
            0,     0,     0,     0,     0],
       [ 1045,  2066,  2017, 17111,  2172,     0,     0,     0,     0,
            0,     0,     0,     0,     0]])>

In [68]:
output_logits=model(input_ids).logits
print(output_logits)

tf.Tensor(
[[-2.72762    2.8789363]
 [ 2.7984848 -2.4121716]
 [-0.4905542  0.6014412]], shape=(3, 2), dtype=float32)


In [69]:
tf.argmax(tf.math.softmax(output_logits),axis=1)

<tf.Tensor: shape=(3,), dtype=int64, numpy=array([1, 0, 1], dtype=int64)>

### With Attention mask

In [73]:
ids_np=input_ids.numpy()
print(ids_np)

[[ 1045  1005  2310  2042  3403  2005  1037 17662 12172  2607  2026  2878
   2166  1012]
 [ 1045  5223  2023  2061  2172     0     0     0     0     0     0     0
      0     0]
 [ 1045  2066  2017 17111  2172     0     0     0     0     0     0     0
      0     0]]


In [74]:
input_ids

<tf.Tensor: shape=(3, 14), dtype=int32, numpy=
array([[ 1045,  1005,  2310,  2042,  3403,  2005,  1037, 17662, 12172,
         2607,  2026,  2878,  2166,  1012],
       [ 1045,  5223,  2023,  2061,  2172,     0,     0,     0,     0,
            0,     0,     0,     0,     0],
       [ 1045,  2066,  2017, 17111,  2172,     0,     0,     0,     0,
            0,     0,     0,     0,     0]])>

In [83]:
def create_attention_mask(tensor):
    attention_mask=[]
    for arr in tensor:
        ai=[]
        for i in arr:
            if i!=0:
                ai.append(1)
            else:
                ai.append(0)
        attention_mask.append(ai)
    return attention_mask

In [84]:
attention_mask=create_attention_mask(input_ids)

In [85]:
attention_mask

[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
 [1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]]

In [90]:
output_logits=model(input_ids,attention_mask=create_attention_mask(input_ids)).logits

In [91]:
output_logits


<tf.Tensor: shape=(3, 2), dtype=float32, numpy=
array([[-2.72762  ,  2.8789363],
       [ 3.174391 , -2.6848435],
       [-2.141855 ,  2.3865533]], dtype=float32)>

In [93]:
tf.argmax(tf.math.softmax(output_logits),axis=1)

<tf.Tensor: shape=(3,), dtype=int64, numpy=array([1, 0, 1], dtype=int64)>