In [28]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense, Dropout, Bidirectional

In [29]:
# Load the text file
file_path = '/content/names.txt'
with open(file_path, 'r') as file:
    names = file.read().splitlines()

In [30]:
# Create a set of unique characters in the data
chars = sorted(set(''.join(names)))
char_to_index = {char: idx for idx, char in enumerate(chars)}
index_to_char = {idx: char for idx, char in enumerate(chars)}

In [31]:
# Encode the names into sequences of integers
encoded_names = [[char_to_index[char] for char in name] for name in names]

In [32]:
# Define constants
max_length = max(len(name) for name in names)
vocab_size = len(chars)
embedding_dim = 512
rnn_units = 256

In [33]:
# Prepare training data
def pad_sequences(sequences, max_length):
    return np.array([seq + [0] * (max_length - len(seq)) for seq in sequences])

padded_names = pad_sequences(encoded_names, max_length)
X = padded_names[:, :-1]
y = np.expand_dims(padded_names[:, 1:], -1)

In [41]:
# Define the model
model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=X.shape[1]),
    SimpleRNN(rnn_units, return_sequences=True),
    Dropout(0.5),
    Dense(vocab_size, activation='softmax')
])

model.compile(optimizer='RMSprop', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.summary()

Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_6 (Embedding)     (None, 23, 512)           22528     
                                                                 
 simple_rnn_6 (SimpleRNN)    (None, 23, 256)           196864    
                                                                 
 dropout_1 (Dropout)         (None, 23, 256)           0         
                                                                 
 dense_6 (Dense)             (None, 23, 44)            11308     
                                                                 
Total params: 230700 (901.17 KB)
Trainable params: 230700 (901.17 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [42]:
# Train the model
model.fit(X, y, epochs=10, batch_size=32,validation_split=0.2)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x794ae86a2380>

In [43]:
def generate_name(model, start_char, char_to_index, index_to_char, max_length=24): # Use the correct max_length
    input_seq = [char_to_index[start_char]]
    name = start_char
    #t=84,v=64

    for _ in range(max_length - 1):
        # Pad to the correct max_length - 1 to match training data
        input_padded = np.pad(input_seq, (0, max_length - 1 - len(input_seq)), 'constant')
        input_padded = np.expand_dims(input_padded, 0)

        pred = model.predict(input_padded, verbose=0)
        next_char_index = np.argmax(pred[0][len(input_seq) - 1])
        next_char = index_to_char[next_char_index]

        if next_char == '\n':
            break

        name += next_char
        input_seq.append(next_char_index)

    return name




In [45]:
# Generate new names
for _ in range(10):
    start_char = np.random.choice(chars)
    print(generate_name(model, start_char, char_to_index, index_to_char))

[anani""""""""""""""""""
Nana""""""""""""""""""""
jani""""""""""""""""""""
""""""""""""""""""""""""
zarinder""""""""""""""""
wari""""""""""""""""""""
zarinder""""""""""""""""
[anani""""""""""""""""""
gani""""""""""""""""""""
olal""""""""""""""""""""
