In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import re
import numpy as np


In [2]:
text="""My name is Aditya Shinde, and I am from Niphad.
I have completed my Bachelor’s degree in B.Sc. (Computer 
Science) from K.K.W. College, Chandori. Currently, I am
pursuing my Master’s degree in M.Sc. (Computer Science)
from HAL College, Ozar.
I have a strong foundation in web development and 
backend technologies, including HTML, CSS, JavaScript,
React.js, Python, Flask, and FastAPI. Along with this,
I have hands-on knowledge of Machine Learning, Deep 
Learning, and Neural Networks, where I have worked on
practical projects related to model building and deployment."""

In [3]:
def preprocess_text(text):
    text=text.lower()
    text = re.sub(r"[^a-z\s]", "", text)
    words=text.split()
    return words
words =preprocess_text(text)


In [4]:
vocab=sorted(set(words))
word_to_idx={word:idx for idx,word in enumerate(vocab)}
idx_to_word={idx:word for word,idx in word_to_idx.items()}
vocab_size=len(vocab)


In [23]:
def create_sequences(words,seq_length):
    inputs=[]
    targets=[]
    for i in range(len(words)- seq_length):
        input_seq=words[i:i+seq_length]
        target=words[i+seq_length]
        inputs.append([word_to_idx[w] for w in input_seq])
        targets.append(word_to_idx[target])
    return torch.tensor(inputs),torch.tensor(targets)
SEQ_LENGTH=6
X, y = create_sequences(words, SEQ_LENGTH)


In [24]:
class NextWordRNN(nn.Module):
    def __init__(self,vocab_size,embed_size,hidden_size):
        super(NextWordRNN,self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_size)
        self.rnn=nn.RNN(embed_size,hidden_size,batch_first=True)
        self.fc=nn.Linear(hidden_size,vocab_size)
    def forward(self,x):
        x=self.embedding(x)
        out,hidden=self.rnn(x)
        out = out[:, -1, :] 
        out=self.fc(out)
        return out

In [25]:
EMBED_SIZE = 50
HIDDEN_SIZE = 64
LR = 0.01
EPOCHS = 300
model = NextWordRNN(vocab_size, EMBED_SIZE, HIDDEN_SIZE)
criterion=nn.CrossEntropyLoss()
optimizer=optim.Adam(model.parameters(),lr=LR)


In [26]:
for epoch in range(EPOCHS):
    optimizer.zero_grad()
    outputs = model(X)
    loss = criterion(outputs, y)
    loss.backward()   
    optimizer.step()

    if (epoch + 1) % 50 == 0:
        print(f"Epoch [{epoch+1}/{EPOCHS}], Loss: {loss.item():.4f}")

Epoch [50/300], Loss: 0.0037
Epoch [100/300], Loss: 0.0014
Epoch [150/300], Loss: 0.0010
Epoch [200/300], Loss: 0.0007
Epoch [250/300], Loss: 0.0006
Epoch [300/300], Loss: 0.0005


In [29]:
def predict_next_words(model, input_words, num_words=5):
    model.eval()
    generated = input_words.copy()
    with torch.no_grad():
        for _ in range(num_words):
            input_idx = torch.tensor([[word_to_idx[w] for w in generated[-SEQ_LENGTH:]]])
            output = model(input_idx)
            predicted_idx = torch.argmax(output, dim=1).item()
            predicted_word = idx_to_word[predicted_idx]
            generated.append(predicted_word)

    return generated


In [33]:
test_input = ["aditya"]
result = predict_next_words(model, test_input, num_words=5)
print("Input:", " ".join(test_input))
print("Predicted:", " ".join(result))

Input: aditya
Predicted: aditya foundation in web development and
