In [1]:
# Using simplegmail abstraction instead of writing gmail logic from scratch
# https://github.com/jeremyephron/simplegmail
from simplegmail import Gmail, query
# langchain
from langchain_community.llms import Ollama
from langchain import PromptTemplate
import pandas as pd


In [7]:
# Get gmail service - may need to authenticate - check https://github.com/jeremyephron/simplegmail
gmail = Gmail()

In [3]:
llm = Ollama(model="llama3.1:latest", stop=["<|eot_id|>"]) # Added stop token

In [4]:
def get_model_response(user_prompt, system_prompt):
    # NOTE: No f string and no whitespace in curly braces
    template = """
        <|begin_of_text|>
        <|start_header_id|>system<|end_header_id|>
        {system_prompt}
        <|eot_id|>
        <|start_header_id|>user<|end_header_id|>
        {user_prompt}
        <|eot_id|>
        <|start_header_id|>assistant<|end_header_id|>
        """

    # Added prompt template
    prompt = PromptTemplate(
        input_variables=["system_prompt", "user_prompt"],
        template=template
    )
    
    # invoking the model
    response = llm(prompt.format(system_prompt=system_prompt, user_prompt=user_prompt))
    
    return response

In [5]:
from bs4 import BeautifulSoup

def get_text_from_html(html: str):
    soup = BeautifulSoup(html, features="html.parser")
    # kill all script and style elements
    for script in soup(["script", "style"]):
        script.extract()    # rip it out
    # get text
    return soup.body.get_text(separator='\n', strip=True)


In [10]:
# Get mails from past _ units
numberOf = 2
messages = gmail.get_messages(query=query.construct_query(newer_than=(numberOf, "day"), labels=[["INBOX"]]))

In [11]:
froms = [message.sender for message in messages]
ids = [message.id for message in messages]
dates = [message.date for message in messages]
subjects = [message.subject for message in messages]
bodies = []
print(f'Total messages: {len(messages)}')
for message in messages:
    print(f"""--- from: {message.sender} date:{message.date} ---
        subject: {message.subject}""")
    content = message.plain if message.plain else get_text_from_html(message.html) if message.html else 'No content - answer N/A'
    bodies.append(content)
df = pd.DataFrame({
    'id':ids,
    'date':dates,
    'from':froms,
    'subject':subjects,
    'body':bodies
})
df['result'] = ['-'] * len(df.index)

Total messages: 42
--- from: renewtallahassee@emailrelay.com date:2024-08-25 09:33:14-04:00 ---
        subject: Maintenance Status changed to Completed
--- from: careers-noreply@google.com date:2024-08-24 16:05:29-04:00 ---
        subject: New job(s) match your search on Google Careers
--- from: careers-noreply@google.com date:2024-08-24 16:05:19-04:00 ---
        subject: New job(s) match your search on Google Careers
--- from: renewtallahassee@emailrelay.com date:2024-08-24 14:37:49-04:00 ---
        subject: Renters Insurance Balance Due
--- from: renewtallahassee@emailrelay.com date:2024-08-24 14:36:41-04:00 ---
        subject: Renters Insurance Balance Due
--- from: do-not-reply adobe <adobe@myworkday.com> date:2024-08-24 13:57:19-04:00 ---
        subject: Thanks for applying to Adobe
--- from: LinkedIn <jobs-noreply@linkedin.com> date:2024-08-24 13:45:39-04:00 ---
        subject: Your application to Software Developer - Remote Ohio at Crown Equipment Corporation
--- from: Li

In [12]:
system_mail_prompt = 'You are given the body of an email I recieved. Provide a simple Yes or No answer to the question - "Is this mail I recieved is regarding a SOFTWARE JOB APPLICATION that I applied for?" Think step by step. Look for job related infromation like job title, company name etc. Then decide your answer. You don\'t have to provide a long answer. Just a very precise Yes or No.'
# user_mail_prompt = """"""

In [13]:
print(f'Total messages: {len(df.index)}')
for row in df.itertuples():
    if row[6].lower() in ['yes', 'yes.', 'no', 'no.']:
        continue
    print(f'from:{row[3]}, subject:{row[4]}, date:{row[2]}')
    content = row[5]
    result = get_model_response(user_prompt=content, system_prompt=system_mail_prompt)
    df.at[row.Index, 'result'] = result
df

Total messages: 42
from:renewtallahassee@emailrelay.com, subject:Maintenance Status changed to Completed, date:2024-08-25 09:33:14-04:00


  warn_deprecated(


from:careers-noreply@google.com, subject:New job(s) match your search on Google Careers, date:2024-08-24 16:05:29-04:00
from:careers-noreply@google.com, subject:New job(s) match your search on Google Careers, date:2024-08-24 16:05:19-04:00
from:renewtallahassee@emailrelay.com, subject:Renters Insurance Balance Due, date:2024-08-24 14:37:49-04:00
from:renewtallahassee@emailrelay.com, subject:Renters Insurance Balance Due, date:2024-08-24 14:36:41-04:00
from:do-not-reply adobe <adobe@myworkday.com>, subject:Thanks for applying to Adobe, date:2024-08-24 13:57:19-04:00
from:LinkedIn <jobs-noreply@linkedin.com>, subject:Your application to Software Developer - Remote Ohio at Crown Equipment Corporation, date:2024-08-24 13:45:39-04:00
from:LinkedIn <jobs-noreply@linkedin.com>, subject:Your application to Software Engineer at EPITEC, date:2024-08-24 13:45:23-04:00
from:LinkedIn <jobs-noreply@linkedin.com>, subject:Your application to Software Engineer at Robert Half, date:2024-08-24 13:38:12-

Unnamed: 0,id,date,from,subject,body,result
0,19189bdb10f1082a,2024-08-25 09:33:14-04:00,renewtallahassee@emailrelay.com,Maintenance Status changed to Completed,MAINTENANCE REQUEST 26558485\nCOMPLETE\nBathro...,No
1,19185fe71ab48986,2024-08-24 16:05:29-04:00,careers-noreply@google.com,New job(s) match your search on Google Careers,"<table border=""0"" cellpadding=""0"" cellspacing=...",It appears you're sharing a job alert email fr...
2,19185fe485ee14a1,2024-08-24 16:05:19-04:00,careers-noreply@google.com,New job(s) match your search on Google Careers,"<table border=""0"" cellpadding=""0"" cellspacing=...",Here is the rewritten version of the job alert...
3,19185ae2c2c97004,2024-08-24 14:37:49-04:00,renewtallahassee@emailrelay.com,Renters Insurance Balance Due,"Dear Residents,\nIf you have recently moved in...",No
4,19185ad223d486b5,2024-08-24 14:36:41-04:00,renewtallahassee@emailrelay.com,Renters Insurance Balance Due,"Dear Residents,\nIf you have recently moved in...",No.
5,191858919a8bb00c,2024-08-24 13:57:19-04:00,do-not-reply adobe <adobe@myworkday.com>,Thanks for applying to Adobe,"Dear Mahidhar Narala ,\nWe wanted to let you k...",Yes
6,191857e6c959232a,2024-08-24 13:45:39-04:00,LinkedIn <jobs-noreply@linkedin.com>,Your application to Software Developer - Remot...,Your update from Crown Equipment Corporation\r...,No.
7,191857e2e3f2a1f7,2024-08-24 13:45:23-04:00,LinkedIn <jobs-noreply@linkedin.com>,Your application to Software Engineer at EPITEC,Your update from EPITEC\r\n\r\n---------------...,No.
8,191857798e16eb2a,2024-08-24 13:38:12-04:00,LinkedIn <jobs-noreply@linkedin.com>,Your application to Software Engineer at Rober...,Your update from Robert Half\r\n\r\n----------...,No.
9,191857738115a768,2024-08-24 13:37:48-04:00,LinkedIn <jobs-noreply@linkedin.com>,Your application to Backend Software Engineer ...,Your update from Corsha\r\n\r\n---------------...,Yes.
