## Dummy email generator

Using Faker package to create dummy emails which can be used within modelling projects

In [1]:
import re
from faker import Faker
from faker.providers import BaseProvider

import random
from math import floor
from random_word import RandomWords

In [2]:
Faker.seed(0)

fake = Faker('en_GB')


In [3]:
def random_company(sample_size):
    """
    Use faker to generate 50% of company names, and a random word as the other 50%.
    This should produce company names based on people but also give examples closer to `Apple`, `Google` etc.
    Returns a list of size `sample_size`
    """
    fake = Faker()
    rw = RandomWords()
    company = []
    for _ in range(floor(sample_size/2)):
        company.append(fake.company())
    for _ in range(sample_size - floor(sample_size/2)):
        company.append(rw.get_random_word().title())
    random.shuffle(company)
    return company

In [4]:

# Create a list of random companies
companies = random_company(10)
companies

['Faulkner-Howard',
 'Terceroon',
 'Sheppard-Tucker',
 'Musterer',
 'Dewlike',
 'Wagner LLC',
 'Campos PLC',
 'Chang-Fisher',
 'Jumblers',
 'Daboia']

### Generate email domain based off of company names

In [5]:
punctuationlist='[_`.\'?/!%,\]\[:";=+£#@*$()<>&|0-9]'
remove = [r'\bthe\b', r'\band\b', r'\bltd\b', r'\bplc\b', r'\binc\b', r'\bco\b', r'\bcorp\b', r'\bllc\b']

def punctuations_remover(string,punctuationlist):
        '''Remove punctuations/special charecters and append the notes to a list as a list of token
            Remove punctuation from the tokenized notes'''
        try:
            post_punctuation_string=[]
            #for token in string:
            word=re.compile(punctuationlist).sub("",string)
            if len(word)>0:
                post_punctuation_string.append(word)                    
        except Exception as e:
            raise Exception("Error in Class - Text_processor in function - punctuations_remover(). ERROR - "+str(e))  

        if post_punctuation_string:       
            return post_punctuation_string[0]

In [6]:
def company_email(company):
    """
    Generate a company email domain based on a given company name.
    This is done by removing `filler` words, most punctuation and then removing spaces.
    """
    email = company.lower()
    for element in remove:
        email = re.sub(element, '', email)
    email = punctuations_remover(email, punctuationlist)
    email = email.title()
    email = re.sub(' +', '', email)
    suffix = fake.tld()
    email = email + '.' + suffix
    return email

In [7]:
# Generate a list of dictionaries for companies
company_dict = []
for company in companies:
    email = company_email(company)
    row = {'CompanyName': company, 'Email': email}
    company_dict.append(row)

In [8]:
company_dict

[{'CompanyName': 'Faulkner-Howard', 'Email': 'Faulkner-Howard.com'},
 {'CompanyName': 'Terceroon', 'Email': 'Terceroon.net'},
 {'CompanyName': 'Sheppard-Tucker', 'Email': 'Sheppard-Tucker.org'},
 {'CompanyName': 'Musterer', 'Email': 'Musterer.com'},
 {'CompanyName': 'Dewlike', 'Email': 'Dewlike.com'},
 {'CompanyName': 'Wagner LLC', 'Email': 'Wagner.com'},
 {'CompanyName': 'Campos PLC', 'Email': 'Campos.com'},
 {'CompanyName': 'Chang-Fisher', 'Email': 'Chang-Fisher.co.uk'},
 {'CompanyName': 'Jumblers', 'Email': 'Jumblers.com'},
 {'CompanyName': 'Daboia', 'Email': 'Daboia.info'}]