# Exploring Names
In this notebook, we use daproli in order to explore US first names taken from https://github.com/smashew/NameDatabases.

In [1]:
import sys
sys.path.append("../..")

In [2]:
import numpy as np
import daproli as dp

Let's start by loading the names.

In [3]:
first_names = np.loadtxt(fname="data/first_names.txt", delimiter='\n', dtype=np.str)

In [4]:
', '.join(first_names[:10]) + " ..."

'Aaron, Abbey, Abbie, Abby, Abdul, Abe, Abel, Abigail, Abraham, Abram ...'

Let's do a quick lowercase transformation.

In [5]:
first_names = dp.map(lambda name : name.lower(), first_names)

Now, let's find all names that are symmetric.

In [6]:
def is_symmetric(name):
    half_idx = int(len(name) / 2)
    return name[:half_idx] == name[half_idx:][::-1]

In [7]:
dp.filter(is_symmetric, first_names)

['alla', 'anna', 'hannah', 'otto']

Let's group all names that contain the same letters and have more than 5 variations.

In [8]:
dp.PipelineTransformer(
    dp.SplitTransformer(lambda name : ''.join(sorted(name))),
    dp.FilterTransformer(lambda names : len(names) > 5)
).transform(first_names)

[['adina', 'daina', 'dania', 'diana', 'nadia', 'naida'],
 ['dane', 'dean', 'dena', 'edna', 'enda', 'neda'],
 ['ashlie', 'elisha', 'leisha', 'sheila', 'shelia', 'shiela'],
 ['aline', 'elina', 'laine', 'lanie', 'liane', 'nelia'],
 ['alise', 'elias', 'elisa', 'isela', 'leisa', 'lesia']]

Let's find all names that contain all vowels.

In [9]:
dp.filter(lambda name : all(vowel in name for vowel in 'aeiou'), first_names)

['aurelio', 'eustolia', 'louvenia', 'marylouise']

Let's find all names that contain xy.

In [10]:
dp.filter(lambda name : all(char in name for char in 'xy'), first_names)

['roxy']

Let's find all names of size 9 that only contain unique characters.

In [11]:
dp.PipelineTransformer(
    dp.FilterTransformer(lambda name : len(name) == 9),
    dp.FilterTransformer(lambda name : len(name) == np.unique(list(name)).shape[0])
).transform(first_names)

['christena',
 'cornelius',
 'cristobal',
 'florencia',
 'francoise',
 'hortencia',
 'hortensia',
 'jacquelin',
 'jacquelyn',
 'jacquline',
 'josephina',
 'marcelino',
 'petronila',
 'philomena',
 'thomasine']