Test for gmail API reader
Following instructions on that page: https://developers.google.com/gmail/api/quickstart/python

In [1]:
import os.path

from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError

In [2]:
SCOPES = ["https://www.googleapis.com/auth/gmail.readonly"]

In [3]:
creds = None
json_file = "../../secrets_vault/token.json"
if os.path.exists(json_file):
    creds = Credentials.from_authorized_user_file(json_file, SCOPES)

# Create my credentials (token.json) if needed

In [5]:
if creds is None:
    flow = InstalledAppFlow.from_client_secrets_file("credentials.json", SCOPES)
    creds = flow.run_local_server(port=8080, bind_addr='0.0.0.0')
    with open("token.json", "w") as token:
        token.write(creds.to_json())

[?1l>Please visit this URL to authorize this application: https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=1085373024194-n8rtj5ob0b8a6r96am65f846r0m1m6je.apps.googleusercontent.com&redirect_uri=http%3A%2F%2Flocalhost%3A8080%2F&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fgmail.readonly&state=urFUzAv4jK8VyBBsHX27XY0KEEq9h9&access_type=offline                      [5;1H                                                                                [6;1H                                                                                [7;1H                                                                                [8;1H                                                                                [9;1H                                                                                [10;1H                                                                                [11;1H                                                                                [12

# Test the Gmail API

In [4]:
from googleapiclient.discovery import build

# Build the Gmail API service
service = build('gmail', 'v1', credentials=creds)

In [89]:
results = service.users().messages().list(userId='me').execute()
messages = results.get('messages', [])
print(f"By default, you only receive {len(messages)} messages.")

By default, you only receive 7 messages.


In [90]:
results = service.users().messages().list(userId='me', maxResults=5).execute()
messages = results.get('messages', [])
print(f"Now we have received {len(messages)} messages.")

Now we have received 5 messages.


In [92]:
messages[0]

{'id': '19378a89533c25da', 'threadId': '19378a89533c25da'}

In [94]:
# Check the list of possible headers in the email
message = service.users().messages().get(userId='me', id=messages[0]['id']).execute()
for hd in message['payload']['headers']:
    print(hd['name'])

Delivered-To
Received
X-Google-Smtp-Source
X-Received
ARC-Seal
ARC-Message-Signature
ARC-Authentication-Results
Return-Path
Received
Received-SPF
Authentication-Results
Received
DKIM-Signature
DKIM-Signature
Return-Path
X-HS-Cid
List-Unsubscribe
Date
From
Reply-To
To
Message-ID
Subject
MIME-Version
Content-Type
Precedence
X-Report-Abuse-To
List-Unsubscribe-Post
Feedback-ID


In [5]:
#query = 'after:2024/11/29 before:2023/12/31 from:dan@tldrnewsletter.com'
query = 'after:2024/11/27 from:dan@tldrnewsletter.com'
results = service.users().messages().list(userId='me', q=query).execute()
messages = results.get('messages', [])
for msg in messages:
    print('='*10)
    msg_id = msg['id']
    message = service.users().messages().get(userId='me', id=msg_id).execute()

    # Extract headers
    headers = message['payload']['headers']
    subject = next(header['value'] for header in headers if header['name'] == 'Subject')
    sender = next(header['value'] for header in headers if header['name'] == 'From')
    date = next(header['value'] for header in headers if header['name'] == 'Date')
    email_id = msg['id']
    

    print(f"Subject: {subject}")
    print(f"From: {sender}")
    print(f"Date: {date}")
    print(f"ID: {email_id}")

Subject: Alibaba new model 🌐, Allen AI Open Instruct 🧑‍🏫, xAI app 📱
From: TLDR AI <dan@tldrnewsletter.com>
Date: Fri, 29 Nov 2024 14:16:46 +0000
ID: 19378480b1d8b929
Subject: Bluesky called out by the EU 📢, Gold-based cancer drug ⭐️, Buying Chrome 🤝
From: TLDR <dan@tldrnewsletter.com>
Date: Fri, 29 Nov 2024 11:28:59 +0000
ID: 19377ae6d00bdb0c
Subject: Misunderstood PMs 🤔, being data-informed not data-driven 📊, defining an active user 🚴‍♀️
From: TLDR Product <dan@tldrnewsletter.com>
Date: Fri, 29 Nov 2024 11:06:34 +0000
ID: 1937799e4ca5f9fc
Subject: TLDR AI - Confirm your signup
From: TLDR <dan@tldrnewsletter.com>
Date: Fri, 29 Nov 2024 03:30:55 +0000
ID: 19375f8c0ee524ef


# Parse a message with simple mimeType

In [116]:
# Get a single message
email_id = '19375f8c0ee524ef'
message = service.users().messages().get(userId='me', id=email_id).execute()

## message

In [117]:
type(message)

dict

In [118]:
message.keys()

dict_keys(['id', 'threadId', 'labelIds', 'snippet', 'payload', 'sizeEstimate', 'historyId', 'internalDate'])

In [119]:
message['id']

'19375f8c0ee524ef'

In [120]:
message['threadId']

'19375f8c0ee524ef'

In [121]:
message['labelIds']

['CATEGORY_UPDATES', 'INBOX']

In [122]:
message['sizeEstimate']

9445

In [123]:
message['historyId']

'2710'

In [124]:
message['internalDate']

'1732851055000'

## payload

In [125]:
payload = message['payload']

In [126]:
print(type(payload)), print(payload.keys())

<class 'dict'>
dict_keys(['partId', 'mimeType', 'filename', 'headers', 'body'])


(None, None)

In [127]:
payload['partId']

''

In [133]:
# Give the format of the email. Here it should be text/html. So a single part, we'll be able to parse that easily
payload['mimeType']

'text/html'

In [129]:
payload['filename']

''

In [130]:
payload['body'].keys()

dict_keys(['size', 'data'])

In [131]:
import base64

body_data = message['payload']['body']['data']
decoded_body = base64.urlsafe_b64decode(body_data).decode()
decoded_body

'\r\n  <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">\r\n    <html>\r\n    <head>\r\n        <title>Thanks for subscribing!</title>\r\n    </head>\r\n    <body>\r\n    <table width="100%" cellspacing="0" cellpadding="0" style="font-family: Arial, Helvetica, sans-serif; max-width: 700px; margin: 0 auto; padding: 15px;">\r\n        <tr>\r\n            <td>\r\n                <p style="font-size: 16px;"><b>Welcome to TLDR AI! Before we get into what to expect, one quick thing:</b></p>\r\n                <p style="font-size: 16px;">1) <b>Reply \'OK\' to this email.</b> (You can skip this step if you\'re signing up an RSS reader, Google Group, or Slack Group you are automatically verified).</p>\r\n                <p style="font-size: 16px;">2) If this email is in the spam or promotions folder make sure to <b>move it to your primary inbox.</b> Here\'s how to do it:</p>\r\n                <p><b>For Gmail on your phone:

In [132]:
from bs4 import BeautifulSoup

plain_text = BeautifulSoup(decoded_body, 'html.parser').get_text()
print(plain_text)  # Prints the plain text content of the HTML





Thanks for subscribing!





Welcome to TLDR AI! Before we get into what to expect, one quick thing:
1) Reply 'OK' to this email. (You can skip this step if you're signing up an RSS reader, Google Group, or Slack Group you are automatically verified).
2) If this email is in the spam or promotions folder make sure to move it to your primary inbox. Here's how to do it:
For Gmail on your phone: Hit the 3 dots at top right corner, click "Move to" then "Primary."
For Gmail on your computer: Back out of this email then drag and drop this email into the "Primary" tab near the top left of your screen.
For Apple mail users: Tap on our email address at the top of this email (next to "From:" on mobile) and click "Add to VIPs."
For everyone else: Please follow these instructions.
3) When you’re done with steps 1 and 2, click the button below to finalize your signup!







Confirm Signup








PS: If you did not subscribe to TLDR AI, please click here to unsubscribe.








# Parse an email with multiple parts

In [6]:
# Get a single message
email_id = '19378480b1d8b929'
message = service.users().messages().get(userId='me', id=email_id).execute()

In [7]:
# Give the format of the email. 
# Multipart (HTML and Plain Text): Use the parts array to select the desired format.
message['payload']['mimeType']

'multipart/alternative'

In [8]:
message['payload'].keys()

dict_keys(['partId', 'mimeType', 'filename', 'headers', 'body', 'parts'])

In [9]:
# In the case of a multiparts email, there is nothing in the body
message['payload']['body']

{'size': 0}

In [10]:
# And 'payload' has a new key, 'parts'
len(message['payload']['parts'])

2

In [12]:
import base64

for pp in message['payload']['parts']:
    if pp['mimeType'] == 'text/plain':
        body_data = pp['body']['data']
        decoded_body = base64.urlsafe_b64decode(body_data).decode()
        print(decoded_body)

Elon Musk's xAI plans to launch a standalone app for its Grok chatbot
by December, competing directly with OpenAI's
ChatGPT. ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌  ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ 


 Sign Up [1] |Advertise [2]|View Online [3] 

		TLDR

		TOGETHER WITH [Incogni] [4]

TLDR AI 2024-11-29

 🎁 THIS HOLIDAY SEASON, PROTECT YOUR PRIVACY: DATA BROKERS ARE
SELLING THEM LEGALLY (SPONSOR) [4] 

 This holiday season, you deserve peace of mind—especially when it
comes to your personal data.

Check out Incogni [4] — it's the hassle-free way to protect your
data privacy:

 	* Incogni scans people search sites for your personal information
and sends removal requests on your behalf.
 	* Within ±14 days, your records are off the dark corners of the
internet.
 	* Every 10 days, Incogni does it all over again.
 	* You stay in the loop with regular privacy reports.

Take back control. Reduce spam, scam, and cyber risk.

🎄 SPECIAL HOLIDAY OFFER: Get 58% off

In [150]:
for pp in message['payload']['parts']:
    if pp['mimeType'] == 'text/html':
        body_data = pp['body']['data']
        decoded_body = base64.urlsafe_b64decode(body_data).decode()
        plain_text = BeautifulSoup(decoded_body, 'html.parser').get_text()
        print(plain_text)  # Prints the plain text content of the HTML

TLDR AI
Elon Musk's xAI plans to launch a standalone app for its Grok chatbot by December, competing directly with OpenAI's ChatGPT. ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌  ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ 










Sign Up
|Advertise|View Online



TLDR

Together With 



TLDR AI 2024-11-29







🎁 This Holiday Season, Protect Your Privacy: Data Brokers Are Selling Them Legally (Sponsor)





                                    This holiday season, you deserve peace of mind—especially when it comes to your personal data.Check out Incogni — it's the hassle-free way to protect your data privacy:

Incogni scans people search sites for your personal information and sends removal requests on your behalf.
Within ±14 days, your records are off the dark corners of the internet.
Every 10 days, Incogni does it all over again.
You stay in the loop with regular privacy reports.

Take back control. Reduce spam, scam, and cyber risk.
🎄 Special Holiday Offer: Get 58