In [19]:
from transformers import AutoModelForCausalLM, AutoTokenizer

# Load model and tokenizer
model = AutoModelForCausalLM.from_pretrained(
    "microsoft/Phi-3-mini-4k-instruct",
    device_map="cuda",
    torch_dtype="auto",
    trust_remote_code=True,
)
tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [20]:
tokenizer.encode("I love Pizza")

[306, 5360, 349, 24990]

In [21]:
tokenizer.decode([306, 5360, 349, 24990])

'I love Pizza'

In [22]:
from transformers import AutoTokenizer, BertModel
import torch

def generate_word_embeddings_bert(text, model_name="bert-base-uncased"):
    try:
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = BertModel.from_pretrained(model_name)

        # Tokenize the input text
        inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)

        # Get the word embeddings
        with torch.no_grad():
            outputs = model(**inputs)

        # Extract embeddings and corresponding words
        embeddings = outputs.last_hidden_state
        input_ids = inputs["input_ids"][0]
        words = [tokenizer.decode(token_id) for token_id in input_ids]

        # Create a dictionary of word embeddings
        word_embeddings = {}
        for i, word in enumerate(words):
            if word not in ['[CLS]', '[SEP]', '[PAD]']:
                word_embeddings[word] = embeddings[0, i, :].numpy()

        return word_embeddings

    except Exception as e:
        print(f"An error occurred: {e}")
        return None

In [24]:
# Example usage:
text1 = "I was walking on the bank of river",
text2 = "I went to the bank to withdraw money"
embeddings = generate_word_embeddings_bert(text2)

if embeddings:
    for word, embedding in embeddings.items():
        print(f"{word}: {embedding[:5]}...")

i: [ 0.58092195 -0.2949463  -0.2789928   0.5095507   0.00133457]...
went: [ 0.4387233  -0.69758373  0.0393692   0.5055915  -0.04782405]...
to: [ 0.23749965 -0.23734832  0.10270531  0.20308943 -0.301301  ]...
the: [ 0.38239807 -0.6787178  -0.07699291  0.6969222   0.35928556]...
bank: [ 0.43778786 -0.44205457  0.01638133 -0.08081906  0.83776623]...
withdraw: [ 0.4293537   0.2597395   0.5665406  -0.37747672  0.46020743]...
money: [ 0.39729798 -0.47736964 -0.19738731 -0.03174865  0.3494065 ]...


In [None]:
bank: [-0.16297182 -0.1512853  -0.08349326 -0.42740345 -0.15986079]...
bank: [ 0.43778786 -0.44205457  0.01638133 -0.08081906  0.83776623]...

In [25]:
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split

In [26]:
# Training data
sentences = [
    "the fluffy cat that was sleeping under the table woke up and stretched",
    "the old dog that had been chasing squirrels all day finally rested",
    "the small bird that flew away from the nest returned to its mother",
    "the big house that stood on the hill overlooked the entire valley",
    "the curious child who found a hidden door opened it carefully",
    "the talented musician who played the violin performed a beautiful piece",
    "the adventurous traveler who explored the ancient ruins discovered a secret passage",
    "the dedicated scientist who studied the rare flower found a new species"
]

In [27]:
# Create vocabulary
words = set()
for sentence in sentences:
    for word in sentence.split():
        words.add(word)
words = list(words)

In [28]:
words

['cat',
 'door',
 'stretched',
 'table',
 'bird',
 'returned',
 'musician',
 'traveler',
 'woke',
 'had',
 'away',
 'house',
 'beautiful',
 'new',
 'small',
 'it',
 'stood',
 'piece',
 'and',
 'curious',
 'species',
 'sleeping',
 'performed',
 'a',
 'finally',
 'flower',
 'the',
 'on',
 'hidden',
 'carefully',
 'all',
 'scientist',
 'opened',
 'ancient',
 'nest',
 'from',
 'overlooked',
 'valley',
 'passage',
 'explored',
 'was',
 'adventurous',
 'mother',
 'up',
 'ruins',
 'squirrels',
 'violin',
 'been',
 'studied',
 'chasing',
 'under',
 'discovered',
 'dog',
 'secret',
 'to',
 'hill',
 'fluffy',
 'day',
 'big',
 'dedicated',
 'old',
 'played',
 'its',
 'who',
 'talented',
 'rare',
 'found',
 'child',
 'rested',
 'that',
 'entire',
 'flew']

In [29]:
word_to_index = {word: i for i, word in enumerate(words)}
index_to_word = {i: word for i, word in enumerate(words)}

In [30]:
word_to_index

{'cat': 0,
 'door': 1,
 'stretched': 2,
 'table': 3,
 'bird': 4,
 'returned': 5,
 'musician': 6,
 'traveler': 7,
 'woke': 8,
 'had': 9,
 'away': 10,
 'house': 11,
 'beautiful': 12,
 'new': 13,
 'small': 14,
 'it': 15,
 'stood': 16,
 'piece': 17,
 'and': 18,
 'curious': 19,
 'species': 20,
 'sleeping': 21,
 'performed': 22,
 'a': 23,
 'finally': 24,
 'flower': 25,
 'the': 26,
 'on': 27,
 'hidden': 28,
 'carefully': 29,
 'all': 30,
 'scientist': 31,
 'opened': 32,
 'ancient': 33,
 'nest': 34,
 'from': 35,
 'overlooked': 36,
 'valley': 37,
 'passage': 38,
 'explored': 39,
 'was': 40,
 'adventurous': 41,
 'mother': 42,
 'up': 43,
 'ruins': 44,
 'squirrels': 45,
 'violin': 46,
 'been': 47,
 'studied': 48,
 'chasing': 49,
 'under': 50,
 'discovered': 51,
 'dog': 52,
 'secret': 53,
 'to': 54,
 'hill': 55,
 'fluffy': 56,
 'day': 57,
 'big': 58,
 'dedicated': 59,
 'old': 60,
 'played': 61,
 'its': 62,
 'who': 63,
 'talented': 64,
 'rare': 65,
 'found': 66,
 'child': 67,
 'rested': 68,
 'that': 

In [31]:
index_to_word

{0: 'cat',
 1: 'door',
 2: 'stretched',
 3: 'table',
 4: 'bird',
 5: 'returned',
 6: 'musician',
 7: 'traveler',
 8: 'woke',
 9: 'had',
 10: 'away',
 11: 'house',
 12: 'beautiful',
 13: 'new',
 14: 'small',
 15: 'it',
 16: 'stood',
 17: 'piece',
 18: 'and',
 19: 'curious',
 20: 'species',
 21: 'sleeping',
 22: 'performed',
 23: 'a',
 24: 'finally',
 25: 'flower',
 26: 'the',
 27: 'on',
 28: 'hidden',
 29: 'carefully',
 30: 'all',
 31: 'scientist',
 32: 'opened',
 33: 'ancient',
 34: 'nest',
 35: 'from',
 36: 'overlooked',
 37: 'valley',
 38: 'passage',
 39: 'explored',
 40: 'was',
 41: 'adventurous',
 42: 'mother',
 43: 'up',
 44: 'ruins',
 45: 'squirrels',
 46: 'violin',
 47: 'been',
 48: 'studied',
 49: 'chasing',
 50: 'under',
 51: 'discovered',
 52: 'dog',
 53: 'secret',
 54: 'to',
 55: 'hill',
 56: 'fluffy',
 57: 'day',
 58: 'big',
 59: 'dedicated',
 60: 'old',
 61: 'played',
 62: 'its',
 63: 'who',
 64: 'talented',
 65: 'rare',
 66: 'found',
 67: 'child',
 68: 'rested',
 69: 'tha

In [32]:
vocab_size = len(words)

In [33]:
vocab_size

72

In [35]:
X_pairs = []
y_next = []

In [36]:
for sentence in sentences:
    sentence_words = sentence.split()
    for i in range(len(sentence_words) - 2):
        X_pairs.append([word_to_index[sentence_words[i]], word_to_index[sentence_words[i + 1]]])
        y_next.append(word_to_index[sentence_words[i + 2]])

In [37]:
X_pairs

[[26, 56],
 [56, 0],
 [0, 69],
 [69, 40],
 [40, 21],
 [21, 50],
 [50, 26],
 [26, 3],
 [3, 8],
 [8, 43],
 [43, 18],
 [26, 60],
 [60, 52],
 [52, 69],
 [69, 9],
 [9, 47],
 [47, 49],
 [49, 45],
 [45, 30],
 [30, 57],
 [57, 24],
 [26, 14],
 [14, 4],
 [4, 69],
 [69, 71],
 [71, 10],
 [10, 35],
 [35, 26],
 [26, 34],
 [34, 5],
 [5, 54],
 [54, 62],
 [26, 58],
 [58, 11],
 [11, 69],
 [69, 16],
 [16, 27],
 [27, 26],
 [26, 55],
 [55, 36],
 [36, 26],
 [26, 70],
 [26, 19],
 [19, 67],
 [67, 63],
 [63, 66],
 [66, 23],
 [23, 28],
 [28, 1],
 [1, 32],
 [32, 15],
 [26, 64],
 [64, 6],
 [6, 63],
 [63, 61],
 [61, 26],
 [26, 46],
 [46, 22],
 [22, 23],
 [23, 12],
 [26, 41],
 [41, 7],
 [7, 63],
 [63, 39],
 [39, 26],
 [26, 33],
 [33, 44],
 [44, 51],
 [51, 23],
 [23, 53],
 [26, 59],
 [59, 31],
 [31, 63],
 [63, 48],
 [48, 26],
 [26, 65],
 [65, 25],
 [25, 66],
 [66, 23],
 [23, 13]]

In [38]:
y_next

[0,
 69,
 40,
 21,
 50,
 26,
 3,
 8,
 43,
 18,
 2,
 52,
 69,
 9,
 47,
 49,
 45,
 30,
 57,
 24,
 68,
 4,
 69,
 71,
 10,
 35,
 26,
 34,
 5,
 54,
 62,
 42,
 11,
 69,
 16,
 27,
 26,
 55,
 36,
 26,
 70,
 37,
 67,
 63,
 66,
 23,
 28,
 1,
 32,
 15,
 29,
 6,
 63,
 61,
 26,
 46,
 22,
 23,
 12,
 17,
 7,
 63,
 39,
 26,
 33,
 44,
 51,
 23,
 53,
 38,
 31,
 63,
 48,
 26,
 65,
 25,
 66,
 23,
 13,
 20]

In [39]:
X_pairs = np.array(X_pairs)
y_next = np.array(y_next)

In [40]:
# Split data
X_train, X_test, y_train, y_test = train_test_split(X_pairs, y_next, test_size=0.2, random_state=42)

In [42]:
# Model
embedding_dim = 5

In [43]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(8, activation='relu'),
    tf.keras.layers.Dense(vocab_size, activation='softmax')

  ])

In [44]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [45]:
model.fit(X_train, y_train, epochs=20, verbose=1)

Epoch 1/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 31ms/step - accuracy: 0.0000e+00 - loss: 4.2765
Epoch 2/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step - accuracy: 0.0000e+00 - loss: 4.2742
Epoch 3/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step - accuracy: 0.0000e+00 - loss: 4.2722
Epoch 4/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step - accuracy: 0.0208 - loss: 4.2707
Epoch 5/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step - accuracy: 0.0312 - loss: 4.2695
Epoch 6/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step - accuracy: 0.0417 - loss: 4.2658
Epoch 7/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step - accuracy: 0.0312 - loss: 4.2643
Epoch 8/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step - accuracy: 0.0312 - loss: 4.2631
Epoch 9/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

<keras.src.callbacks.history.History at 0x7fa5701de690>

In [46]:
model.summary()

In [57]:
input_pair = np.array([[word_to_index["the"], word_to_index["cat"]]])  # (1, 2)
predictions = model.predict(input_pair)
predictions

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step


array([[0.01328968, 0.01363665, 0.01360989, 0.01427745, 0.01379839,
        0.01339028, 0.01404645, 0.01404294, 0.01384243, 0.01360202,
        0.01389   , 0.0137523 , 0.01399522, 0.01439063, 0.01315064,
        0.01349746, 0.01379659, 0.0142926 , 0.01394381, 0.013308  ,
        0.01401608, 0.01391429, 0.01424812, 0.01498955, 0.01367047,
        0.01394971, 0.01439126, 0.01327554, 0.01425731, 0.01400397,
        0.01388592, 0.0132544 , 0.01416042, 0.01385876, 0.01432145,
        0.0140959 , 0.01409323, 0.01438518, 0.01436249, 0.01396414,
        0.01411854, 0.01343215, 0.01310041, 0.01399444, 0.01457609,
        0.01449963, 0.01399986, 0.01384221, 0.01419556, 0.01363858,
        0.01327474, 0.01417534, 0.0143313 , 0.01365592, 0.01375263,
        0.01403483, 0.01340851, 0.01317674, 0.01352353, 0.01366983,
        0.01336111, 0.01373747, 0.0134014 , 0.01447995, 0.01325776,
        0.0139938 , 0.01464404, 0.01440039, 0.01409761, 0.01389907,
        0.01406753, 0.01360739]], dtype=float32)

In [58]:
predicted_index = np.argmax(predictions[0])
print(f"RNN: Predicted next word for 'the cat': {index_to_word[predicted_index]}")

RNN: Predicted next word for 'the cat': a
