In [1]:
import os

import argparse
import datetime as dt
import regex as re

import yaml

import requests
import smtplib
import ssl
import mimetypes
from email.message import EmailMessage

import pandas as pd

import db_handler

In [17]:

def parse_config(config: dict):
    """
    Parse the configuration yaml.

    Parameters
    -------
    config (dict{any}): Dictionary of congiruations.

    Returns
    -------
    credentials (dict(any)): Dictionary of credentials.
    """
    credentials = config["credentials"]
    ...
    return credentials

def get_csv_as_list(filepath: str) -> list:
    """
    Get list using a path to a csv.

    Parameters
    -------
    filepath (str): Filepath to csv. 

    Returns
    -------
    ([str]): List of values.
    
    """
    recipients = pd.read_csv(filepath, header=None)
    return recipients[0].values.tolist()


class Clearbit():
    """A class for working with Clearbit API."""
    def get_name(self, response: requests.models.Response) -> (str, str):
        """
        Parse response for HTTP request.

        Parameters
        -------
        response (requests.models.Response): Http response with person data.

        Returns
        -------
        name (str, str): Tuple of first and last name. 
        """
        if not isinstance(response, requests.models.Response):
            print("Not a request.")
            return 
        name = (None, None)
        data = response.json()
        person = data["person"]
        first_name = person["name"]["givenName"]
        last_name = person["name"]["familyName"]
        name = (first_name, last_name)
        return name


    def get_names_from_email_list(self, recipients_list:[str], username=None, password=None, api_key=None):
        """
        Given a list of recipients, use Clearbit to retreive their names. Other data may be retreieved but at a later stage. 
        The username is the api_key from Clearbit. Read their docs for more info.

        Parameters
        -------
        username (str): Optional. Username.
        password (str): Optional. Password.
        api_key (str): API key.

        Returns
        -------
        names {}: Dict of emails to names.
        """
        names = {}
        # NOTE: may want to batch this in the future or too many requests will be attempted too quickly.
        for email in recipients_list:
            url = f"https://person.clearbit.com/v2/combined/find?email=:{email}"
            clearbit_response = get_response(url, username=username, password=password, api_key=api_key)
            names[email] = self.get_name(clearbit_response)
        return names


def get_response(url: str, username=None, password=None, api_key=None):
    """
    Get a response from API using HTTP.

    Parameters
    -------
    url (str): Url for API request.
    username (str): Optional. Username.
    password (str): Optional. Password.
    api_key (str): API key.

    Returns
    -------
    api_response (requests.models.Response): Response.
    """
    api_response = None
    api_response = requests.get(url, auth=(username, password))
    return api_response

In [18]:
"""Jmailer method."""

# inputs = parse_args()

# config_path = inputs.config_path
# db_identifier = inputs.db_identifier
# db_table = inputs.db_table
# sender = inputs.sender
# recipients = inputs.recipients
# recipients_path = inputs.recipients_path
# email_config_path = inputs.email_config_path
# subject = inputs.subject
# credentials_path = inputs.credentials_path
# test_mode = inputs.test_mode

# --config_path
config_path = "/Users/jaimemerizalde/Desktop/JOBS 2023/software/jmailer/config.yaml"
# --sender
sender = "jaime.meriz13@gmail.com"
# --db_identifier
db_identifier = "1t1wGAQvZuwEWOOgcgtBaqbZoafG_ZCfTV5QGyMfYHTg"
# --db_table
db_table = "contacts"
# --recipients_path
recipients_path = "/Users/jaimemerizalde/Desktop/JOBS 2023/email_recipients.csv"
# --body_path
body_path = "/Users/jaimemerizalde/Desktop/JOBS 2023/Email Outreaches/generic_1/body.txt"
# --email_config_path
email_config_path = "/Users/jaimemerizalde/Desktop/JOBS 2023/Email Outreaches/generic_1/config.yaml"
# --attachments_path
attachments_path = "/Users/jaimemerizalde/Desktop/JOBS 2023/lopez_jaime_resume_2sep2023.pdf"
# --credentials_path
credentials_path = "/Users/jaimemerizalde/Desktop/JOBS 2023/software/jmailer/secrets/db_secret.json"
# --test_mode
test_mode = "Y"
body = None


if test_mode == "Y":
    print(f"Running in test mode. Emails will be sent to {sender}")
    test_mode = True
else:
    test_mode = False

if body != None and (body_path != None or email_config_path != None):
    print("Error: Provide body or body_path and email_config_path but not both. Defaulting to body provided.")
    # return

print("Parsed args flow complete.")

config = yaml.safe_load(open(config_path))
credentials = parse_config(config)
print("Loaded credentials flow complete.")

gmail_password = credentials["gmail"]["app_password"]
print("Get passwords flow complete.")

recipients = get_csv_as_list(recipients_path)
print("Get recipients flow complete.")

if email_config_path != None:
    email_config = yaml.safe_load(open(email_config_path))
    print("Load email config flow complete.")

clearbit_api_key = credentials["clearbit"]["api_key"]
print("Clearbit user flow complete.")

context = ssl.create_default_context()
smpt_connection = smtplib.SMTP_SSL("smtp.gmail.com", 465, context=context) 
smpt_connection.login(sender, gmail_password)
print("SMPT connection flow complete.")

# # THIS RIGHT NOW IS A PROBLEM. I can't rely on Clearbit to get the names so I have to get a workaround...
# names = Clearbit().get_names_from_email_list(recipients, username=clearbit_api_key)


Running in test mode. Emails will be sent to jaime.meriz13@gmail.com
Parsed args flow complete.
Loaded credentials flow complete.
Get passwords flow complete.
Get recipients flow complete.
Load email config flow complete.
Clearbit user flow complete.
SMPT connection flow complete.


KeyError: 'person'

Here is the problem

In [22]:
recipient = "jaime.meriz13@gmail.com"
url = f"https://person.clearbit.com/v2/combined/find?email=:{recipient}"
clearbit_response = get_response(url, username=clearbit_api_key, password=None, api_key=None)
# names[email] = self.get_name(clearbit_response)

<Response [402]>

In [23]:
clearbit_response.json()

{'error': {'type': 'over_quota',
  'message': 'Your account is over its free quota. Please add a credit card to https://clearbit.com/billing'}}

Here is the solution: Use a different API! Because clearbit is being wonky.  

Let's give it a shot with... people data labs. 

already signed up with an account. 

# Workshop

In [24]:
# i'm given this list:
# recipients

# now we have to get names from here... there's gotta be a service.
# people_data_api_key = ""



In [46]:
# Set the Person Enrichment API URL
PDL_URL = "https://api.peopledatalabs.com/v5/person/enrich"

# Create a parameters JSON object
PARAMS = {
    "api_key": people_data_api_key,
    # "profile": ["linkedin.com/in/seanthorne"]
    "email": "emma.alvarez@sphanalytics.com"
}

# Pass the parameters object to the Person Enrichment API
json_response = requests.get(PDL_URL, params=PARAMS)

# Print the API response in JSON format
# print(json_response)

In [32]:
json_response Success! 
# Except with this new method, we have to format things differently.

{'status': 200,
 'likelihood': 6,
 'data': {'id': 'RSdJGMyFacjaKBi29bdxqw_0000',
  'full_name': 'emma alvarez',
  'first_name': 'emma',
  'middle_initial': 'y',
  'middle_name': 'ybarra',
  'last_initial': 'a',
  'last_name': 'alvarez',
  'gender': 'female',
  'birth_year': None,
  'birth_date': None,
  'linkedin_url': 'linkedin.com/in/emma-alvarez-83540b165',
  'linkedin_username': 'emma-alvarez-83540b165',
  'linkedin_id': '660917993',
  'facebook_url': 'facebook.com/emma.alvarez.50',
  'facebook_username': 'emma.alvarez.50',
  'facebook_id': '100000316219152',
  'twitter_url': None,
  'twitter_username': None,
  'github_url': None,
  'github_username': None,
  'work_email': None,
  'personal_emails': [],
  'recommended_personal_email': None,
  'mobile_phone': None,
  'industry': 'hospital & health care',
  'job_title': 'senior manager, talent acquisition',
  'job_title_role': 'human_resources',
  'job_title_sub_role': 'recruiting',
  'job_title_levels': ['manager', 'senior'],
  'job

# Integrating the Lab work into production

In [None]:
# current_method: 
names = Clearbit().get_names_from_email_list(recipients, username=clearbit_api_key)

# so build a new class



In [47]:
json_response

<Response [200]>

In [38]:
json_response["data"]["first_name"]  #.keys()

json_response["data"].keys()

dict_keys(['id', 'full_name', 'first_name', 'middle_initial', 'middle_name', 'last_initial', 'last_name', 'gender', 'birth_year', 'birth_date', 'linkedin_url', 'linkedin_username', 'linkedin_id', 'facebook_url', 'facebook_username', 'facebook_id', 'twitter_url', 'twitter_username', 'github_url', 'github_username', 'work_email', 'personal_emails', 'recommended_personal_email', 'mobile_phone', 'industry', 'job_title', 'job_title_role', 'job_title_sub_role', 'job_title_levels', 'job_company_id', 'job_company_name', 'job_company_website', 'job_company_size', 'job_company_founded', 'job_company_industry', 'job_company_linkedin_url', 'job_company_linkedin_id', 'job_company_facebook_url', 'job_company_twitter_url', 'job_company_location_name', 'job_company_location_locality', 'job_company_location_metro', 'job_company_location_region', 'job_company_location_geo', 'job_company_location_street_address', 'job_company_location_address_line_2', 'job_company_location_postal_code', 'job_company_loca

In [67]:
class PeopleDataLabs():
    """A class for working with People Data Labs API."""
    
    def get_name(self, response: requests.models.Response) -> (str, str):
        """
        Parse response for HTTP request.

        Parameters
        -------
        response (requests.models.Response): Http response with person data.

        Returns
        -------
        name (str, str): Tuple of first and last name. 
        """
        if not isinstance(response, requests.models.Response):
            print("Not a request.")
            return 
        name = (None, None)
        response_json = response.json()
        data = response_json["data"]
        first_name = data["first_name"]
        last_name = data["last_name"]
        name = (first_name, last_name)
        return name
    

    def get_names_from_email_list(self, recipients_list:[str], username=None, password=None, api_key=None):
        """
        Given a list of recipients, use People Data Labs to retreive their names. 
        Other data may be retreieved but at a later stage. 
        The username is the api_key from People Data Labs. Read their docs for more info.

        Parameters
        -------
        username (str): Username.
        password (str): Password.
        api_key (str): API key.

        Returns
        -------
        names {}: Dict of emails to names.
        """
        names = {}
        # NOTE: may want to batch this in the future or too many requests will be attempted too quickly.
        url = f"https://api.peopledatalabs.com/v5/person/enrich"
        for email in recipients_list:
            params = {
                "api_key": api_key,
                "email": email
            }
            response = requests.get(url, params=params)
            try:
                names[email] = self.get_name(response)
            except Exception as e:
                msg = str(e) + f"\nName fetching failed for: {email}. Skipping this name."
                db_handler.Timers().exec_time(msg)
        return names


In [70]:
# Test this class
json_response
PeopleDataLabs().get_name(json_response) # YES! Success!

names = PeopleDataLabs().get_names_from_email_list(recipients, api_key=people_data_api_key)  # Also successful

'data'
Name fetching failed for: jaime.meriz13@gmail.com. Skipping this name. Timestamp: 15:58:51 - 2023-09-12


In [128]:
names

{'pholland-riddle@themyersgroup.net': ('pam', 'holland-riddle'),
 'emma.alvarez@sphanalytics.com': ('emma', 'alvarez'),
 'mark.logsdon@pressganey.com': ('mark', 'logsdon'),
 'katelynn.hottinger@pressganey.com': ('katelynn', 'hottinger')}

In [498]:
# # names
# # You have names....
# # Now Format them appropriately. 

first = 'pam'
last = '   holland-riddle-more_names' 

name = (first, last)
names1 = {"@email": name}

import regex as re

def format_name(name: str):
    """
    Takes a name with potential special characters and extra spaces to return a properly-formatted name. 
    Capitalizes first letters of place-value locations.

    Parameters
    -------
    name (str): Name with potential special characters or spaces. 

    Returns
    -------
    f_name (str): Formatted name.
    """
    remove_spaces = re.sub("\s+|\s+$", "", name)

    # Split by special Characters.
    pattern = "(^|[^a-zA-Z0-9])([a-zA-Z0-9])"
    f_name = re.sub(pattern, lambda x: x.group(1) + x.group(2).upper(),remove_spaces)
    return f_name


In [500]:
remove_spaces = format_name(first)
remove_spaces

'Pam'

In [497]:
remove_spaces
pattern = "(^|[^a-zA-Z0-9])([a-zA-Z0-9])"
def method_one(s):
    return s.group(1) + s.group(2).upper()

re.sub(pattern, method_one, remove_spaces)

'Holland-Riddle-More_Names'

In [504]:
for key, name in names.items():
    print(format_name(name[0]), format_name(name[1]))



Pam Holland-Riddle
Emma Alvarez
Mark Logsdon
Katelynn Hottinger


In [513]:
class PeopleDataLabs():
    """A class for working with People Data Labs API."""
    
    def get_name(self, response: requests.models.Response) -> (str, str):
        """
        Parse response for HTTP request.

        Parameters
        -------
        response (requests.models.Response): Http response with person data.

        Returns
        -------
        name (str, str): Tuple of first and last name. 
        """
        if not isinstance(response, requests.models.Response):
            print("Not a request.")
            return 
        name = (None, None)
        response_json = response.json()
        data = response_json["data"]
        first_name = self.format_name(data.get("first_name", "<FIRST_NAME>"))
        last_name = self.format_name(data.get("last_name", "<LAST_NAME>"))
        name = (first_name, last_name)
        return name
    

    def format_name(self, name: str):
        """
        Takes a name with potential special characters and extra spaces to return a properly-formatted name. 
        Capitalizes first letters of place-value locations.

        Parameters
        -------
        name (str): Name with potential special characters or spaces. 

        Returns
        -------
        f_name (str): Formatted name.
        """
        remove_spaces = re.sub("\s+|\s+$", "", name)

        # Split by special Characters.
        pattern = "(^|[^a-zA-Z0-9])([a-zA-Z0-9])"
        f_name = re.sub(pattern, lambda x: x.group(1) + x.group(2).upper(),remove_spaces)
        return f_name
    

    def get_names_from_email_list(self, recipients_list:[str], username=None, password=None, api_key=None):
        """
        Given a list of recipients, use People Data Labs to retreive their names. 
        Other data may be retreieved but at a later stage. 
        The username is the api_key from People Data Labs. Read their docs for more info.

        Parameters
        -------
        username (str): Username.
        password (str): Password.
        api_key (str): API key.

        Returns
        -------
        names {}: Dict of emails to names.
        """
        names = {}
        # NOTE: may want to batch this in the future or too many requests will be attempted too quickly.
        url = f"https://api.peopledatalabs.com/v5/person/enrich"
        for email in recipients_list:
            params = {
                "api_key": api_key,
                "email": email
            }
            response = requests.get(url, params=params)
            try:
                names[email] = self.get_name(response)
            except Exception as e:
                msg = str(e) + f"\nName fetching failed for: {email}. Skipping this name."
                db_handler.Timers().exec_time(msg)
        return names

In [516]:
# recipients
# PeopleDataLabs().get_names_from_email_list(recipients, api_key=people_data_api_key)
url = f"https://api.peopledatalabs.com/v5/person/enrich"

# for email in recipients_list:
email = "emma.alvarez@sphanalytics.com"
api_key=people_data_api_key
params = {
    "api_key": api_key,
    "email": email
}
response = requests.get(url, params=params)
# response_json = response.json()
# data = response_json["data"]
# first_name = self.format_name(data.get("first_name", "<FIRST_NAME>"))
# last_name = self.format_name(data.get("last_name", "<LAST_NAME>"))
# name = (first_name, last_name)



{'status': 404,
 'error': {'type': 'not_found',
  'message': 'No records were found matching your request'}}