In [5]:
# import statements 
import numpy as np 
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import GRU, Dense, Embedding 
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical 



In [54]:
# defining our sample names/data 
character_names = ['gabriella', 'ryan', 'cecilia', 'kari', 'dimmer', 'allan', 'sarah', 'eliza', 'aaron', 'katnis', 'henry', 'ava', 'alice', 'bob', 'jon', 'alberto', 'marcos', 'vex', 
                   'zach', 'shahbaaz', 'fourohfour', 'pharoah', 'mia', 'katherine', 'farrow', 'ivy', 'jack', 'emily', 'bob', 'charlie', 'radhika', 
                   'quinn', 'patrick', 'eva', 'diana', 'jeffrey', 'noah', 'liam', 'puck', 'burt', 'juan', 'jose', 'chris']


# create a vocabulary based on the sample names
vocab = sorted(set(''.join(character_names)))
char_to_idx = {char: idx for idx, char in enumerate(vocab)}
idx_to_char = {idx: char for char, idx in char_to_idx.items()}

# convert character names to sequences 
sequences = [[char_to_idx[char] for char in name] for name in character_names]

# padding our sequences to ensure equal name length later 
max_name_length = max(len(seq) for seq in sequences)
print(type(max_name_length))
padded_sequences = pad_sequences(sequences, maxlen=max_name_length, padding='post')

# create input + target sequences 
X = padded_sequences[:, :-1]
y = padded_sequences[:, 1:]

# check the shape of our input sequence 
print(X.shape)

#check the shape of our target sequence 
print(y.shape)

# convert our target sequence into a one-hot encoding 
y_one_hot = to_categorical(y, num_classes=len(vocab))

#defining characteristics for our gru-based character generation model 

vocab_size = len(vocab)
embedding_dim = 32

# created the model
# running into issues with input_length = max_name_length-1 in our Embedding function inside the model
# return statement said it did not recognize the variable/wasn't a valid integer, but when printing the value type it returned int

model = tf.keras.Sequential([
    Embedding(input_dim=vocab_size, output_dim=embedding_dim), 
    GRU(128, return_sequences=True), 
    Dense(vocab_size, activation='softmax')
])

# compile the model 
model.compile(optimizer='adam', loss = 'categorical_crossentropy')

# train the model 
model.fit(X, y_one_hot, epochs = 50, verbose=1)




<class 'int'>
(43, 9)
(43, 9)
Epoch 1/50


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - loss: 3.2497
Epoch 2/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 3.1823 
Epoch 3/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 3.1119 
Epoch 4/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 3.0125 
Epoch 5/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 2.8649 
Epoch 6/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 2.6715 
Epoch 7/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 2.4037 
Epoch 8/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 2.0832 
Epoch 9/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 1.7693 
Epoch 10/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 1.6613 
Epoch 11/50
[1m2/2[0m [32m━━

<keras.src.callbacks.history.History at 0x7f528bf85bd0>

In [98]:
# declare seed character 
seed = 'w'

# map our seed characters to indexes 
seed_idx = [char_to_idx[char] for char in seed]

# actually calling our model 
# finding the probabilities from the model 
# generating a name ! 
for _ in range(3):
    temperature = .95 # higher the temp -> the more random output will be 

    predicted_probs = model.predict(np.array(seed_idx).reshape(1, -1))[0]
    predicted_probs = predicted_probs[-1] # take last predicted probability from model 

    predicted_probs = np.power(predicted_probs, 1 /temperature)
    predicted_probs = predicted_probs / np.sum(predicted_probs)

    next_char = np.random.choice(list(char_to_idx.keys()), p = predicted_probs)

    seed += next_char 
    seed_idx.append(char_to_idx[next_char])

print(seed)



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
wrlh
