# Name Generative AI

Steps:
1. Set up the imports
2. Read the names from the file
3. Create the vocab
4. Create the training data (previous letter -> next letter)
5. Train a neural network (multilayer perceptron in scikit learn)
6. Generate ~20 test names using a function

In [None]:
# set up imports
import random
import numpy as np
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

In [5]:
# load the names file
with open('names.txt') as file:
    names = file.read().splitlines()

len(names)

32033

In [10]:
# create the 'vocabulary'
all_names = ' '.join(names)

# create a sorted list of the unique characters in names
vocab = sorted(list(set(all_names)))

len(vocab)

27

In [16]:
# build mappings between characters and integers
char_to_int = {ch: i for i, ch in enumerate(vocab)}
int_to_char = {i: ch for i, ch in enumerate(vocab)}

int_to_char[1]

'a'

In [30]:
# create training data
X = []
y = []

# loop over the names
for name in names:
    # add the start and end space characters
    name = ' ' + name + ' '

    # loop over the characters in the name
    for index, char in enumerate(name):
        first_char = char
        
        # check that the next character exists
        if index + 1 < len(name): 
            next_char = name[index + 1]

            # get the integer values of the characters
            first_char_int = char_to_int[first_char]
            next_char_int = char_to_int[next_char]

            # add the integers to the X and y lists
            X.append(first_char_int)
            y.append(next_char_int)

len(X)

228146

In [32]:
# define our neural network model
clf = MLPClassifier(random_state=42)

# reshape the X data to be 2D
X = np.array(X).reshape(-1, 1)

# train the model
clf.fit(X, y)



In [43]:
clf.predict_proba([[1]])[0]

array([0.2097248 , 0.01536762, 0.01317595, 0.01435252, 0.03495945,
       0.01894531, 0.00406621, 0.00371721, 0.06332731, 0.04505359,
       0.00573439, 0.01668456, 0.07541908, 0.03994562, 0.17329411,
       0.00154919, 0.00186683, 0.00140668, 0.07322134, 0.03928328,
       0.01787318, 0.0125836 , 0.02592623, 0.00388824, 0.00580593,
       0.06959244, 0.0132353 ])

In [None]:
# function to generate a name
def generate_name():
    # create an empty string for the name
    name = ''

    # define inital values for first and next characters
    first_char = ' '
    next_char = ''

    # loop and add letters until the end character is reached
    while next_char != ' ':
        # convert the first char to an int
        first_char_int = char_to_int[first_char]

        # generate the probabilities of next character
        probs = clf.predict_proba([[first_char_int]])[0]

        # select the next letter using weighted probabilities
        next_char_int = np.random.choice(range(len(vocab)), p=probs)

        # get the character from the int
        next_char = int_to_char[next_char_int]

        # add to name
        name += next_char

        # update first character
        first_char = next_char

    # return the name
    return name 

for _ in range(20):
    print(generate_name())



va 
n 
ldos 
ennn 
es 
minniniddia 
da 
an 
ksn 
arzleonarynn 
acia 
viroyden 
fnn 
j 
ach 
dr 
bsrce 
zn 
thnin 
zn 
