In [383]:
import os
from pathlib import Path

import argparse
import datetime as dt
import regex as re

import yaml

import pandas as pd

import requests
import smtplib
import ssl
import mimetypes
from email.message import EmailMessage

from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
import pygsheets


In [597]:
def parse_config(config: dict):
    """
    Parse the configuration yaml.

    Parameters
    -------
    config (dict{any}): Dictionary of congiruations.

    Returns
    -------
    credentials (dict(any)): Dictionary of credentials.
    """
    credentials = config["credentials"]
    ...
    return credentials


def get_csv_as_list(filepath: str) -> list:
    """
    Get list using a path to a csv.

    Parameters
    -------
    filepath (str): Filepath to csv. 

    Returns
    -------
    ([str]): List of values.
    
    """
    recipients = pd.read_csv(filepath, header=None)
    return recipients[0].values.tolist()


class Clearbit():
    """A class for working with Clearbit API."""

    def get_names_from_email_list(self, recipients_list:[str], username=None, password=None, api_key=None):
        """
        Given a list of recipients, use Clearbit to retreive their names. Other data may be retreieved but at a later stage. 
        The username is the api_key from Clearbit. Read their docs for more info.

        Parameters
        -------
        username (str): Optional. Username.
        password (str): Optional. Password.
        api_key (str): API key.

        Returns
        -------
        names {}: Dict of emails to names.
        """
        names = {}
        # NOTE: may want to batch this in the future or too many requests will be attempted too quickly.
        for email in recipients_list:
            url = f"https://person.clearbit.com/v2/combined/find?email=:{email}"
            clearbit_response = get_response(url, username=username, password=password, api_key=api_key)
            clearbit_response = clearbit_response.json()
            first_name = clearbit_response["person"]["name"]["givenName"]
            last_name = clearbit_response["person"]["name"]["familyName"]

            names[email] = (first_name, last_name)
        return names


def get_response(url: str, username=None, password=None, api_key=None):
    """
    Get a response from API using HTTP.

    Parameters
    -------
    url (str): Url for API request.
    username (str): Optional. Username.
    password (str): Optional. Password.
    api_key (str): API key.

    Returns
    -------
    api_response (requests.models.Response): Response.
    """
    api_response = None
    api_response = requests.get(url, auth=(username, password))
    return api_response


def add_attachment(email: EmailMessage, filepath: str) -> EmailMessage:
        """
        Given original email message. May or may not include an attachment already.

        Parameters:
        -------
        email (EmailMessage): EmailMessage.
        filepath (str): Filepath to attachment.

        Returns
        -------
        email (EmailMessage): Mail message with new attachment.
        """
        # Attachments
        with open(filepath, "rb") as fp:
            data = fp.read()

        # guess encoding
        ctype, encoding = mimetypes.guess_type(filepath)
        if ctype is None or encoding is not None:
            # No guess could be made, or the file is encoded (compressed), so
            # use a generic bag-of-bits type.
            ctype = "application/octet-stream"
        maintype, subtype = ctype.split("/", 1)
        email.add_attachment(data, maintype=maintype, subtype=subtype, filename=os.path.basename(filepath))
        print(f"Successfully attached: {filepath}")
        return email


def build_text(text_path: str, text_vars=None) -> str:
    """
    Using passed-along dictionary of variable names to values, fill in the 
    text file located at text_path. May be passed on no variables to fill, in which case the text body is returned as-is.
    Ignores case (case-insensitive). 

    Parameters
    -------
    text_path (str): Path to text file.
    text_vars (dict): Dictionary of variables. Optional

    Returns
    -------
    query (str): Filled-in text body by value using the text_vars dictionary.
    """

    DEFAULT_FILL_IN = ""

    # Get the text.
    with open(text_path, "r+") as f:
        text = f.read() 
    f.close()
    if text_vars == None:
        return text

    # Extract variables from the text.
    variables_in_text = re.findall("\{(.*?)\}", text, flags=re.IGNORECASE)

    # do a replacement: each time call local_vars...
    for var in variables_in_text:
        replace_me = "\{" + var + "\}"
        replace_with = str(text_vars.get(var,DEFAULT_FILL_IN))
        text = re.sub(replace_me, replace_with, text)
    return text


def build_bodies(names, body_path, body_config):
    """
    Builds email bodies based off full names and corresponding email addresses.

    Parameters
    -------
    names (dict[tuple]): Dictionary of tuples corresponding to first and last names for recipient email address.
    body_path (str): Optional. Email body path to be parsed using the body config. Recommended to use HTML formatting.
    body_config (str): Optional. Email config for the body including variables that can be quickly parsed and replaced.

    Returns
    -------
    bodies (dict): Dictionary recipient email addresses and formatted body text from script.
    """
    bodies = {}
    for recipient, name in names.items():
        body_config["addressee"] = name[0]
        bodies[recipient] = build_text(body_path, body_config)
    return bodies


def send_email(sender: str, recipients: list, smpt_connection, subject="", body="", attachments=None):
    """
    Send an email via SMTP. Recommended body is provided as HTML formatted text.

    Parameters
    -------
    sender (str): Sender as a string.
    recipients ([str]): List of recipients. 
    smpt_connection (smtplib.SMTP_SSL): SMPT SSL connection object.
    subject (str): Optional. Email subject line.
    body (str): Optional. Body of message.
    attachments ([str]): Optional. List of filepath(s) to attachment(s).

    Returns
    -------
    email (EmailMessage): Email that was sent.
    """
    email = EmailMessage()
    email["Sender"] = sender
    email["Recipients"] = " ,".join(recipients)
    email["Subject"] = subject

    email.set_content(body, subtype="html")
    if attachments != None:
        for attachment in attachments:
            email = add_attachment(email, attachment)

    print("runnign this in test mode")
    recipients = ["jaime.meriz13@gmail.com", "hexarunner@gmail.com"]
    
    smpt_connection.sendmail(sender, recipients, email.as_string())
    print("Message sent!")
    return email


def send_emails(sender: str, recipients: list, smpt_connection, bodies={}, subject=None, attachments=None):
    """
    Sends emails with different bodies (can be).
    
    Parameters
    -------
    sender (str): Sender as a string.
    recipients ([str]): List of recipients. 
    smpt_connection (smtplib.SMTP_SSL): SMPT SSL connection object.
    subject (str): Optional. Email subject line.
    bodies ({str}): Optional. Dictionary condtaining bodies of separate messages.
    attachments ([str]): Optional. List of filepath(s) to attachment(s).

    Returns
    -------
    None
    """
    for recipient in recipients:
        body = bodies.get(recipient,("<FIRST>","<LAST>"))
        send_email(sender, [recipient], smpt_connection, subject=subject, body=body, attachments=attachments)
    print("Sent some emails!")


class Google():
    """"A class to connect to Google services."""

    import pickle

    def google_connect(self, credentials_path=None, service_account_env_var=None):
        """
        Connects to google drive and spreadsheets. Requires '[...]/client_secrets[...].json" and or 
        a service account variable in the form of a name (str).
        Will create a token in '.' to track authentication. 
        Returns a service object to allow connections to google drive files.
        Warning: do not share your token or anyone will have access to all content on your drive.

        Parameters
        -------
        credentials_path (str): Optional. Path to client secrets json. Defaults to None.
        service_account_env_var (str): Optional. Name of environment variable for google connection. Defaults to None.  
        
        Returns
        -------
        gdrive (googleapiclient.discovery.Resource object): Resource object with connection to google drive.
        gsheets (pygsheets client object): pygsheets client object to manipulate gsheets.
        """
        
        SCOPES = ["https://www.googleapis.com/auth/drive"]
        gdrive, gsheets = None, None

        if credentials_path != None: 
            creds = None 

            # Authentication flow.
            if Path("token.pickle").exists():
                with open("token.pickle", "rb") as token:
                    creds = self.pickle.load(token)
            if not creds or not creds.valid:
                if creds and creds.expired and creds.refresh_token:
                    creds.refresh(Request())
                else:
                    flow = InstalledAppFlow.from_client_secrets_file(credentials_path, SCOPES)
                    creds = flow.run_local_server(port=0)
                    # Save access token for future use.
                    with open("token.pickle", "wb") as token:
                        self.pickle.dump(creds, token)

            gdrive = build("drive", "v3", credentials=creds)
            gsheets = pygsheets.authorize(custom_credentials=creds)

        elif service_account_env_var != None:
            # dev note: not getting gdrive in this case yet.
            gsheets = pygsheets.authorize(service_account_env_var=service_account_env_var)

        return gdrive, gsheets
    

    def write_to_googlesheets(self, gsheetkey: str, data: pd.DataFrame, wks_title: str, set_df_start="A2", credentials_path=None) -> None:
        """
        Push DataFrame to Googlesheet via key.

        Parameters
        -------
        gsheetkey (str): Key to google sheet.
        data (pd.DataFrame): Dataframe with data to push.
        wks_title (str): Worksheet title.
        set_df_start (str): Optional. Defaults to A2. Set where the dataframe starting cell will write. Use A2 formatting.
        credentials_path (str): Optional. Filepath to local credentials files. Defaults to None.

        Returns
        -------
        (None)
        """
        df0 = data.copy(deep=True) 
        if credentials_path != None:
            _, gsheets = self.google_connect(credentials_path=credentials_path)
        else:
            _, gsheets = self.google_connect(service_account_env_var=SERVICE_ACCOUNT)

        sh = gsheets.open_by_key(gsheetkey)

        wks = sh.worksheet("title", wks_title)
        wks.clear(start="A1", end=None)

        if wks.rows < len(df0):
            msg = "Warning: Data rows exceeds worksheet rows available. Expanding worksheet."
            logger.warning(msg)
            Timers.exec_time(msg)

            wks.resize(rows=len(df0))

        wks.set_dataframe(df0, start=set_df_start, copy_head=True)

        log_msg = f"Pushed data to gsheet with key:{gsheetkey}"
        logger.info(log_msg)
        Timers.exec_time(log_msg)
        return 

In [374]:

# def jmailer():
"""Jmailer method."""

# inputs = parse_args()

config_path = "../config.yaml" #inputs.config_path

sender = "jaime.meriz13@gmail.com"  # inputs.sender

recipients = None # inputs.to
recipients_path  ="/Users/jaimemerizalde/Desktop/JOBS 2023/email_recipients.csv" # inputs.to_path
subject = "[DEV] Email sending." # inputs.subject

body = None #inputs.body
body_path = "/Users/jaimemerizalde/Desktop/JOBS 2023/Email Outreaches/generic_1/body.txt" #inputs.body_path
body_cfg_path = "/Users/jaimemerizalde/Desktop/JOBS 2023/Email Outreaches/generic_1/body.yaml"  # inputs.body_cfg_path
if body != None and (body_path != None or body_cfg_path != None):
    print("Provide body or body_path and body_cfg_path but not both.")
    # return

attachments = [
    "/Users/jaimemerizalde/Desktop/JOBS 2023/software/jmailer/jmailer/SF_Ryu.png",
    "/Users/jaimemerizalde/Desktop/JOBS 2023/lopez_jaime_resume_2sep2023.pdf" 
]
    # inputs.attachments_path
print("Parsed args flow complete.")


config = yaml.safe_load(open(config_path))
credentials = parse_config(config)
print("Loaded credentials flow complete..")

gmail_password = credentials["gmail"]["app_password"]
print("Get passwords flow complete.")

recipients = get_csv_as_list(recipients_path)
print("Get recipients flow complete.")

if body_cfg_path != None:
    body_config = yaml.safe_load(open(body_cfg_path))
    print("Load body config flow complete.")

clearbit_api_key = credentials["clearbit"]["api_key"]
print("Clearbit user flow complete.")

context = ssl.create_default_context()
smpt_connection = smtplib.SMTP_SSL("smtp.gmail.com", 465, context=context) 
smpt_connection.login(sender, gmail_password)
print("SMPT connection flow complete.")

names = Clearbit().get_names_from_email_list(recipients, username=clearbit_api_key)

### Start the Meat of the Message.
if body != None:
    bodies = dict(zip(recipients, body))
else:
    bodies = build_bodies(names, body_path, body_config)

send_emails(sender, recipients, smpt_connection, bodies, subject=subject, attachments=attachments)

# return



Parsed args flow complete.
Loaded credentials flow complete..
Get passwords flow complete.
Get recipients flow complete.
Load body config flow complete.
Clearbit user flow complete.
SMPT connection flow complete.
Successfully attached: /Users/jaimemerizalde/Desktop/JOBS 2023/software/jmailer/jmailer/SF_Ryu.png
Successfully attached: /Users/jaimemerizalde/Desktop/JOBS 2023/lopez_jaime_resume_2sep2023.pdf
runnign this in test mode
Message sent!
Successfully attached: /Users/jaimemerizalde/Desktop/JOBS 2023/software/jmailer/jmailer/SF_Ryu.png
Successfully attached: /Users/jaimemerizalde/Desktop/JOBS 2023/lopez_jaime_resume_2sep2023.pdf
runnign this in test mode
Message sent!
Successfully attached: /Users/jaimemerizalde/Desktop/JOBS 2023/software/jmailer/jmailer/SF_Ryu.png
Successfully attached: /Users/jaimemerizalde/Desktop/JOBS 2023/lopez_jaime_resume_2sep2023.pdf
runnign this in test mode
Message sent!
Sent some emails!


In [367]:
# email=EmailMessage()
email.add_attachment


{}

# Gsheets connectivity

In [489]:
gkey = "1t1wGAQvZuwEWOOgcgtBaqbZoafG_ZCfTV5QGyMfYHTg"

In [520]:
dt.datetime.today().strftime('%Y-%m-%d')

'2023-09-06'

What is the goal here?  

The goal is to connect to my spreadsheet and populate some FIELDS 
using the email recipients list I already have.

Using those recipients we want to then add their details.  

We want to update most of these details:

contacts:
['CREATEDATETIME', # IF DOES NOT EXIST: dt.datetime.today().strftime('%Y-%m-%d')
 'FIRST_NAME', names[recipient][0]
 'LAST_NAME', names[recipient][1]
 'EMAIL', recipient
 'COMPANY',  response_as_json["company"]["name"]
 'LAST_OUTREACH',  dt.datetime.today().strftime('%Y-%m-%d') 
 'FIRST_OUTREACH' # IF CONDITIONAL dt.datetime.today().strftime('%Y-%m-%d')
 ] 

applications
[
--'JOB', 
--'STATUS',
 'COMPANY', 
-- 'SITE', 
-- 'LINK',
-- 'CREATEDATETIME',
-- 'APPLIED_DATETIME',
 'EMAILS',
 'LASTOUTREACH_DATETIME',
 'OUTREACH_COUNTER',
 --'LAST_ACTION',
--'NOTES']

In [541]:
# Google connectivity
gg = Google()
credentials_path = "/Users/jaimemerizalde/Desktop/JOBS 2023/software/jmailer/secrets/jmailer_secret.json"
_, gsheets = gg.google_connect(credentials_path)

In [542]:
sh = gsheets.open_by_key(gkey)

worksheets = sh.worksheets()
titles = [wk.title for wk in worksheets]

wks_dict = dict(zip(titles, worksheets))
wks_dfs = dict(zip(titles, [wk.get_as_df() for wk in worksheets]))



Let's work on filling out this table first...  

explore using clearbit

In [556]:

email = "evelyn.siu@getgarner.com"
url = f"https://person.clearbit.com/v2/combined/find?email=:{email}"
clearbit_api_key = credentials["clearbit"]["api_key"]
clearbit_response = get_response(url, username=clearbit_api_key, password=None, api_key=None)


In [566]:
# clearbit_response
person_details = {}
person_details[recipient] = Clearbit().get_person_details(clearbit_response)


In [569]:
Okay now what....

Fill these into the sheet above

SyntaxError: invalid syntax (901599455.py, line 1)

Now we have to automate the call and response mechanism for this.

What is it you want?

We are first sending off an email based on the list - check.

Then when we have this email, we want to UPDATE our contact list. so the first thing we do is get the information from that table.

In parallel, we can call all of our information for every person we JUSt emailed ie the "provided email list". 
We populate it into a dictionary (more work than it sounds to automate). 
With this we pipe it into the spreadsheet by looking up name and email matches, if they don't exist, create them. 

here we go.

In [573]:
# Step 1. Get the info from the table (contacts)

contacts_df = wks_dfs["contacts"]
contacts_fields = contacts_df.columns.to_list()

In [594]:
# Step 2.
# this is who we emailed
recipients

# we want to get their information.
recipient_data = {}
clearbit_api_key = credentials["clearbit"]["api_key"]
for recipient in recipients:
    url = f"https://person.clearbit.com/v2/combined/find?email=:{recipient}"
    clearbit_response = get_response(url, username=clearbit_api_key, password=None, api_key=None)
    recipient_data[recipient] = clearbit_response



# next we want to create a dictionary per recipient of their formatted responses. Or we we create a dataframe???
# we need a datastructure. Probably best to use a dataframe 
# best start with a dictionary, then with a collection of dictionaries with the same keys, create a dataframe

# contacts_fields... continue sooon



In [685]:
# GEt the columns
contacts_fields

# fill in what clearbit gives you
recipient_push_data = {}
for recipient, response in recipient_data.items():
    response_json = response.json()
    recipient_push_data[recipient] = {
        "CREATEDATETIME":  dt.datetime.today().strftime('%Y-%m-%d'), # IF DOES NOT EXIST: dt.datetime.today().strftime('%Y-%m-%d')
        "FIRST_NAME": response_json["person"]["name"]["givenName"],
        "LAST_NAME": response_json["person"]["name"]["familyName"],
        "EMAIL": recipient,
        "COMPANY": response_json["company"]["name"],
        "LAST_OUTREACH":  dt.datetime.today().strftime('%Y-%m-%d'),
        "FIRST_OUTREACH": dt.datetime.today().strftime('%Y-%m-%d'), # IF DOES NOT EXIST: dt.datetime.today().strftime('%Y-%m-%d')
    }

recipient_push_data = pd.DataFrame.from_dict(recipient_push_data, orient="index")
recipient_push_data.reset_index(drop=True, inplace=True)

# We can include a check here to make sure we either have all the fields as much as possible or report on the ones we do not. 

In [686]:
recipient_push_data

Unnamed: 0,CREATEDATETIME,FIRST_NAME,LAST_NAME,EMAIL,COMPANY,LAST_OUTREACH,FIRST_OUTREACH
0,2023-09-06,Marco,Starger,marco.starger@getgarner.com,Garner Health,2023-09-06,2023-09-06
1,2023-09-06,Austin,Lovell,austin.lovell@getgarner.com,Garner Health,2023-09-06,2023-09-06
2,2023-09-06,Evelyn,Siu,evelyn.siu@getgarner.com,Garner Health,2023-09-06,2023-09-06


Now that you have your prepped data you have the prepped data you can compare it to the data already in google.

First, make a dataframe with the given data above. X complete



In [737]:
# pd.DataF(recipient_push_data, contacts_df, how="inner", on=["FIRST_NAME", "LAST_NAME", "EMAIL", "COMPANY"]) # you can see the same data is preserved. 


# # contacts_df
# recipient_push_data.merge(contacts_df, how="left")

Unnamed: 0,CREATEDATETIME,FIRST_NAME,LAST_NAME,EMAIL,COMPANY,LAST_OUTREACH,FIRST_OUTREACH
0,2023-09-06,Marco,Starger,marco.starger@getgarner.com,Garner Health,2023-09-06,2023-09-06
1,2023-09-06,Austin,Lovell,austin.lovell@getgarner.com,Garner Health,2023-09-06,2023-09-06
2,2023-09-06,Evelyn,Siu,evelyn.siu@getgarner.com,Garner Health,2023-09-06,2023-09-06


**How can I tell this will merge with existing data**?  

Try a small toy problem.

In [967]:
columns1 = ["A", "B", "C", "D"]
data1 = [[1, 2, 3, 4], [4, 5, 6, 7], [7, 8, 9, 11]]
df1 = pd.DataFrame(columns=columns1, data=data1)

columns2 = ["A", "B", "C", "D"]
data2 = [[1, 2, 3, 4], [4, 5, 14, 8], [11, 12, 13, 14]]
df2 = pd.DataFrame(columns=columns2, data=data2)

# df1
# df2

# # this is what they both have in common
# compare_on = ["A","B"] 
# df_inner = df1.merge(df2, how="inner", on=compare_on, suffixes=("_new", ""))


# # these columns you want to keep as-is from the original ground-truth table
# fixed_columns = ["C"]
# fixed_columns = compare_on + fixed_columns


# # these columns you want to update.
# update_columns = ["D"]


# # do some duplicate dropping here!
# # now we want to replace the " new"


# # so this is what they have in common / existing that is in process of being updated. By nature of this merge, we can Choose which columns to update

In [905]:
# list(map(lambda x: x.replace("_new", ""), df_inner.columns.to_list))

# # drop update columns. The old name still applies
# df_inner.drop(update_columns, axis=1, inplace=True)

# # replace old and new column names

# original_names = [col + "_new" for col in update_columns]
# new_names = update_columns
# rename_dict = dict(zip(original_names, new_names))
# df_inner.rename(columns=rename_dict, inplace=True)
# 


In [912]:
# now drop any columns that still have the "new" tag 
# drop_list = [col for col in df_inner.columns.to_list() if "_new" in col]
df_inner.drop(columns=drop_list, inplace=True)


In [922]:
df_updated = df_inner[columns1]

In [916]:
# Now the question is.... are we preserving order ... we need to have some rearrangements here. 

# two, are we "updating" what we expect correctly? # if we do the above we will instanty permute the columns to the location and way we want them to be done. 

df_inner #

Unnamed: 0,A,B,C,D
0,1,2,3,4
1,4,5,14,7


In [918]:
# columns C should just like it is in df2
# any new data from df1 should appear wwell they won't just yet but they will.... 

 LEt's redo it again

In [1076]:
# set up the data
columns1 = ["A", "B", "C", "D"]
data1 = [[1, 2, 3, 4], [4, 5, 6, 7], [7, 8, 9, 11]]
df1 = pd.DataFrame(columns=columns1, data=data1)

columns2 = ["A", "B", "C", "D"]
data2 = [[1, 2, 3, 4], [4, 5, 14, 8], [11, 12, 13, 14]]
df2 = pd.DataFrame(columns=columns2, data=data2)


In [1077]:
# 
merge_on = ["A","B"]
df3 = df1.merge(df2, on=merge_on, how="left", suffixes=("_new",""))

In [1078]:
df3

Unnamed: 0,A,B,C_new,D_new,C,D
0,1,2,3,4,3.0,4.0
1,4,5,6,7,14.0,8.0
2,7,8,9,11,,


In [1079]:
fixed_columns = ["C"] 
update_columns = [col for col in columns1 if not (col in fixed_columns  or col in  merged_on)] # update everything else


# make these add a "new" so you can drop them. 
drop_columns = update_columns + [col + "_new" for col in fixed_columns]


In [1080]:
df3.drop(columns=drop_columns, inplace=True)

In [1081]:
df3

Unnamed: 0,A,B,D_new,C
0,1,2,4,3.0
1,4,5,7,14.0
2,7,8,11,


In [1082]:
# rename and re-order
original_columns = df3.columns.to_list()
new_columns = list(map(lambda x: x.replace("_new", ""), df3.columns.to_list()))

columns_mapper = dict(zip(original_columns, new_columns))
df3.rename(columns=columns_mapper, inplace=True)

# re-order
df3 = df3[columns1]

Unnamed: 0,A,B,C,D
0,1,2,3.0,4
1,4,5,14.0,7
2,7,8,,11
