# The regular expression pattern used in this function checks for the following:

- The email address starts with one or more alphanumeric characters, dots, underscores, percentage signs, plus signs, or hyphens.
- The "@" symbol follows the username part of the email.
- The domain name of the email contains one or more alphanumeric characters, dots, or hyphens.
- The domain extension contains two or more alphabetical characters.

In [1]:
import re 

In [5]:
%run ./generate_email.ipynb import generate_random_emails

email_list = generate_random_emails()

email_list

['cvhgmqz2@gmail.edu',
 'sk2ec8sd@icloud.com',
 '4wl7yexd@gmail.com',
 'tlomc4zt@yahoo.com',
 '61sep0j3@yahoo.io',
 'l3bl3o1w@hotmail.edu',
 'qoaju9l8@live.net',
 'tvt6bfai@aol.edu',
 'xl3zpcfb@icloud.net',
 'vcs6unz8@live.com',
 'l9o391lg@aol.edu',
 'iohuiz0s@gmail.io',
 '9w1ub5m8@gmail.net',
 'poj8htd1@outlook.edu',
 'pq32app5@yahoo.edu',
 'a5ehuj58@icloud.edu',
 'y8ples4u@yahoo.net',
 'kdpzlhk7@yahoo.edu',
 'jcqkjyui@icloud.edu',
 '3ga0i035@gmail.gov',
 's712sa71@gmail.edu',
 'dxsnhxus@live.gov',
 'n507loe9@gmail.com',
 'plgb9hrr@gmail.com',
 'l4ulc6mq@icloud.net',
 'v71raevh@yahoo.edu',
 'mgey36ju@hotmail.edu',
 'y3ujlixk@gmail.com',
 'vynbn96c@outlook.gov',
 'uy8st6oq@yahoo.edu',
 'A@yahooooo.com',
 'dlcm@notreal.none',
 'GmaSSzek@yahooooo.com',
 'D@geemail.net',
 'kGgimsdx@geemail.net',
 'bErHBPQMjL@geemail.net',
 'IKgwT@notreal.none',
 'ByxUrYkRc@notreal.none',
 'dh@invalid.ion',
 'IxCAdc@yahooooo.com']

- `[a-zA-Z0-9._%+-]+`: one or more alphanumeric characters max 64 characters, underscores, dots, percent signs, plus signs, or hyphens, representing the username part of the email address.

- `@`: a literal at sign, separating the username from the domain name.

- `[a-zA-Z0-9.-]+`: one or more alphanumeric characters, dots, or hyphens, representing the domain name part of the email address.

- `\.`: a literal dot, separating the domain name from the top-level domain.

- `[a-zA-Z]{2,}`: two or more alphabetical characters, representing the top-level domain (e.g., com, net, org).

In [10]:
def validate_email(email):
    pattern = r'[a-zA-Z0-9._%+-]{1,64}@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}'
    return re.match(pattern, email) is not None

In [11]:
# filter out the valid emails
output = [email for email in email_list if validate_email(email)]
output

['cvhgmqz2@gmail.edu',
 'sk2ec8sd@icloud.com',
 '4wl7yexd@gmail.com',
 'tlomc4zt@yahoo.com',
 '61sep0j3@yahoo.io',
 'l3bl3o1w@hotmail.edu',
 'qoaju9l8@live.net',
 'tvt6bfai@aol.edu',
 'xl3zpcfb@icloud.net',
 'vcs6unz8@live.com',
 'l9o391lg@aol.edu',
 'iohuiz0s@gmail.io',
 '9w1ub5m8@gmail.net',
 'poj8htd1@outlook.edu',
 'pq32app5@yahoo.edu',
 'a5ehuj58@icloud.edu',
 'y8ples4u@yahoo.net',
 'kdpzlhk7@yahoo.edu',
 'jcqkjyui@icloud.edu',
 '3ga0i035@gmail.gov',
 's712sa71@gmail.edu',
 'dxsnhxus@live.gov',
 'n507loe9@gmail.com',
 'plgb9hrr@gmail.com',
 'l4ulc6mq@icloud.net',
 'v71raevh@yahoo.edu',
 'mgey36ju@hotmail.edu',
 'y3ujlixk@gmail.com',
 'vynbn96c@outlook.gov',
 'uy8st6oq@yahoo.edu',
 'A@yahooooo.com',
 'dlcm@notreal.none',
 'GmaSSzek@yahooooo.com',
 'D@geemail.net',
 'kGgimsdx@geemail.net',
 'bErHBPQMjL@geemail.net',
 'IKgwT@notreal.none',
 'ByxUrYkRc@notreal.none',
 'dh@invalid.ion',
 'IxCAdc@yahooooo.com']

In [15]:
# invalid email examples from Wikipedia
invalid_emails=[
'Abc.example.com',
'A@b@c@example.com',
'a"b(c)d,e:f;g<h>i[j\k]l@example.com', 
'just"not"right@example.com', 
'this is"not\allowed@example.com',
'this\ still\"not\\allowed@example.com',
'1234567890123456789012345678901234567890123456789012345678901234+x@example.com',
'i_like_underscore@but_its_not_allowed_in_this_part.example.com',
'QA[icon]CHOCOLATE[icon]@test.com',
]

In [20]:
result = ['valid' if validate_email(email) else 'invalid' for email in invalid_emails]
result

['invalid',
 'invalid',
 'invalid',
 'invalid',
 'invalid',
 'invalid',
 'invalid',
 'invalid',
 'invalid']