In [43]:
#With help from ChatGPT

import numpy as np
import tensorflow as tf
from keras import layers
from keras.models import Model

# Predefined list of 100 names
names_list = [
    "Alice", "Bob", "Charlie", "David", "Eva", "Fay", "George", "Hannah", "Ivy", "Jack",
    "Kathy", "Liam", "Mia", "Noah", "Olivia", "Paul", "Quinn", "Rachel", "Sam", "Tina",
    "Ursula", "Victor", "Wendy", "Xander", "Yara", "Zane", "Adam", "Bella", "Carter", "Daisy",
    "Ella", "Finn", "Grace", "Henry", "Iris", "Jake", "Kara", "Leo", "Maya", "Nina",
    "Oscar", "Piper", "Riley", "Sophie", "Tom", "Uma", "Vera", "Will", "Xena", "Yvonne",
    "Zoe", "Amelia", "Benjamin", "Chloe", "Daniel", "Emily", "Frank", "Gina", "Harry", "Jade",
    "Kevin", "Luna", "Mason", "Natalie", "Oliver", "Parker", "Quincy", "Rebecca", "Steven", "Tara",
    "Ulysses", "Violet", "Walter", "Xander", "Yara", "Zane", "Alina", "Beck", "Cara", "Derek",
    "Ella", "Felix", "Gwen", "Hugo", "Isla", "Jasper", "Kira", "Liam", "Molly", "Nico",
    "Opal", "Pearl", "Quinn", "Rory", "Sage", "Theo", "Uma", "Vera", "Will", "Xena",
]

def process_names(names, *, unwanted=['(', ')', '-', '.', '/']):
    names = [name.lower() for name in names]
    print("Total names:", len(names))
    chars = sorted(list(set(''.join(names))))

    def has_unwanted(word):
        return any(char in unwanted for char in word)

    names = [name for name in names if not has_unwanted(name)]
    print("Amount of names after removing those with unwanted characters:", len(names))
    chars = [char for char in chars if char not in unwanted]
    print("Using the following characters:", chars)

    maxlen = max(len(name) for name in names)
    minlen = min(len(name) for name in names)
    print("Longest name is", maxlen, "characters long")
    print("Shortest name is", minlen, "characters long")
    
    # enchar indicates the end of the word
    endchars = '!£$%^&*()-_=+/?.>,<;:@[{}]#~'
    endchar = [ch for ch in endchars if ch not in chars][0]

    # ensures the character isn't already used & present in the training data
    assert endchar not in chars
    chars += [endchar]
    
    return names, chars

names, chars = process_names(names_list)

def make_sequences(names, seqlen):
    sequences, lengths, nextchars = [], [], []
    for name in names:
        if len(name) <= seqlen:
            sequences.append(name + chars[-1] * (seqlen - len(name)))
            nextchars.append(chars[-1])
            lengths.append(len(name))
        else:
            for i in range(len(name) - seqlen + 1):
                sequences.append(name[i:i+seqlen])
                nextchars.append(name[i + seqlen] if i + seqlen < len(name) else chars[-1])
                lengths.append(i + seqlen)

    print(len(sequences), "sequences of length", seqlen, "made")
    
    return sequences, lengths, nextchars

seqlen = 5
sequences, lengths, nextchars = make_sequences(names, seqlen)

def make_onehots(sequences, lengths, nextchars, chars):
    max_seq_length = max(len(seq) for seq in sequences)  # Determine max sequence length
    vocab_size = len(chars)  # Size of the vocabulary

    # Initialize arrays
    x = np.zeros(shape=(len(sequences), max_seq_length), dtype='int32')  # Sequences
    x2 = np.zeros(shape=(len(lengths), max(lengths)), dtype='int32')  # Lengths

    for i, seq in enumerate(sequences):
        for j, char in enumerate(seq):
            x[i, j] = chars.index(char)

    for i, l in enumerate(lengths):
        x2[i, l-1] = 1

    # Convert nextchars to integer indices for sparse categorical crossentropy
    y = np.zeros(shape=(len(nextchars), max_seq_length), dtype='int32')  # Adjust shape
    for i, char in enumerate(nextchars):
        y[i] = chars.index(char)

    return x, x2, y


x, x2, y = make_onehots(sequences=sequences, lengths=lengths, nextchars=nextchars, chars=chars)

class TransformerBlock(layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super().__init__()
        self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = tf.keras.Sequential(
            [layers.Dense(ff_dim, activation="relu"), layers.Dense(embed_dim)]
        )
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(rate)
        self.dropout2 = layers.Dropout(rate)

    def call(self, inputs):
        attn_output = self.att(inputs, inputs)
        attn_output = self.dropout1(attn_output)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output)
        return self.layernorm2(out1 + ffn_output)

class TokenAndPositionEmbedding(layers.Layer):
    def __init__(self, maxlen, vocab_size, embed_dim):
        super().__init__()
        self.token_emb = layers.Embedding(input_dim=vocab_size, output_dim=embed_dim)
        self.pos_emb = layers.Embedding(input_dim=maxlen, output_dim=embed_dim)

    def call(self, x):
        maxlen = tf.shape(x)[-1]
        positions = tf.range(start=0, limit=maxlen, delta=1)
        positions = self.pos_emb(positions)
        x = self.token_emb(x)
        return x + positions

def create_model(input_shape, vocab_size, embed_dim, num_heads, ff_dim):
    inputs = layers.Input(shape=input_shape)
    
    # Example embedding layer
    embedding_layer = TokenAndPositionEmbedding(maxlen=input_shape[0], vocab_size=vocab_size, embed_dim=embed_dim)
    x = embedding_layer(inputs)
    
    # Example transformer block
    transformer_block = TransformerBlock(embed_dim=embed_dim, num_heads=num_heads, ff_dim=ff_dim)
    x = transformer_block(x)
    
    # Output layer with logits
    outputs = layers.Dense(vocab_size)(x)
    
    model = Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

input_shape = (seqlen,)
vocab_size = len(chars)
embed_dim = 64
num_heads = 4
ff_dim = 128
model = create_model(input_shape, vocab_size, embed_dim, num_heads, ff_dim)

# Train the model
model.fit(x=x, y=y, epochs=10, batch_size=64)

def generate_name(model, start, *, chars, temperature=0.4):
    maxlength = model.input_shape[1]  # Get sequence length from model input shape
    seqlen = maxlength
    result = start

    # Prepare initial input sequence
    sequence_input = np.zeros(shape=(1, seqlen), dtype='int32')
    for i, char in enumerate(start):
        sequence_input[0, i] = chars.index(char)

    # Generate name
    for _ in range(seqlen - len(start)):
        predictions = model.predict(sequence_input)
        predictions = predictions[0, -1, :]  # Get the predictions for the last character
        preds = np.log(predictions) / temperature
        exp_preds = np.exp(preds)
        preds = exp_preds / np.sum(exp_preds)
        next_char_index = np.argmax(np.random.multinomial(1, preds, 1))
        next_char = chars[next_char_index]

        result += next_char
        sequence_input[0, -1] = next_char_index  # Update input sequence with predicted character

    return result

def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds + 1e-7) / temperature
    exp_preds = np.exp(preds - np.max(preds))
    preds = exp_preds / np.sum(exp_preds)
    return np.argmax(np.random.multinomial(1, preds, 1))

def get_dictchars(names,seqlen):
    dictchars = [{} for _ in range(seqlen)]

    for name in names:
        if len(name) < seqlen:
            continue
        dictchars[0][name[0]] = dictchars[0].get(name[0],0) + 1
        for i in range(1,seqlen):
            if dictchars[i].get(name[i-1],0) == 0:
                dictchars[i][name[i-1]] = {name[i]: 1}
            elif dictchars[i][name[i-1]].get(name[i],0) == 0:
                dictchars[i][name[i-1]][name[i]] = 1
            else:
                dictchars[i][name[i-1]][name[i]] += 1
    return dictchars

def generate_start_seq(dictchars):
    res = "" # The starting sequence will be stored here
    p = sum([n for n in dictchars[0].values()]) # total amount of letter occurences
    r = np.random.randint(0,p) # random number used to pick the next character
    tot = 0
    for key, item in dictchars[0].items():
        if r >= tot and r < tot + item:
            res += key
            break
        else:
            tot += item

    for i in range(1,len(dictchars)):
        ch = res[-1]
        if dictchars[i].get(ch,0) == 0:
            l = list(dictchars[i].keys())
            ch = l[np.random.randint(0,len(l))]
        p = sum([n for n in dictchars[i][ch].values()])
        r = np.random.randint(0,p)
        tot = 0
        for key, item in dictchars[i][ch].items():
            if r >= tot and r < tot + item:
                res += key
                break
            else:
                tot += item
    return res
                
dictchars = get_dictchars(names,seqlen)

def generate_random_name(model, *, chars, dictchars, temperature=0.4):
    start = generate_start_seq(dictchars)
    return generate_name(model, start, chars=chars, temperature=temperature)

print(generate_random_name(model, chars=chars, dictchars=dictchars, temperature=0.4))

Total names: 100
Amount of names after removing those with unwanted characters: 100
Using the following characters: ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
Longest name is 8 characters long
Shortest name is 3 characters long
131 sequences of length 5 made
Epoch 1/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 33ms/step - accuracy: 0.1190 - loss: 9.4807
Epoch 2/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - accuracy: 0.6657 - loss: 4.5917
Epoch 3/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step - accuracy: 0.7544 - loss: 4.0206 
Epoch 4/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step - accuracy: 0.7762 - loss: 3.4853
Epoch 5/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step - accuracy: 0.7567 - loss: 3.4279
Epoch 6/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m

In [44]:
def try_model(model, *, x=x, y=y, chars=chars, dictchars=dictchars, total_epochs=180, print_every=60, temperature=0.4, verbose=True):
    for i in range(total_epochs // print_every):
        history=model.fit(x=x, y=y, 
                            epochs=print_every,
                            batch_size=64,
                            validation_split=0.05,
                            verbose=0)
                            
        if verbose:
            print("\nEpoch", (i + 1) * print_every)
            print("First loss:            %1.4f" % (history.history['loss'][0]))
            print("Last loss:             %1.4f" % (history.history['loss'][-1]))
            print("First validation loss: %1.4f" % (history.history['val_loss'][0]))
            print("Last validation loss:  %1.4f" % (history.history['val_loss'][-1]))
            print("\nGenerating random names:")
            for _ in range(10):
                print(generate_random_name(model, chars=chars,dictchars=dictchars, temperature=temperature))
    if not verbose:
        print("Model training complete, here are some generated names:")
        for _ in range(20):
            print(generate_random_name(model, chars=chars, dictchars=dictchars, temperature=0.4))

In [45]:
try_model(model)


Epoch 60
First loss:            3.3403
Last loss:             2.8575
First validation loss: 0.2533
Last validation loss:  0.7510

Generating random names:
grsul
chlte
dathi
rainn
renja
ameve
haspe
xarke
bella
ulila

Epoch 120
First loss:            2.8380
Last loss:             2.5144
First validation loss: 0.6780
Last validation loss:  0.0013

Generating random names:
nachy
quice
darke
grsoe
pannn
rende
uranc
solon
alisy
frace

Epoch 180
First loss:            2.4671
Last loss:             2.3824
First validation loss: 0.0000
Last validation loss:  0.0000

Generating random names:
chlon
chace
solli
dannc
quiss
vichy
alila
olina
rarke
quily
