<a href="https://colab.research.google.com/github/alx-zhu/email-sentiment-analysis/blob/main/EmailSentimentAnalysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Installing and Importing Libraries

In [None]:
!pip install --upgrade google-api-python-client google-auth-httplib2 google-auth-oauthlib

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting google-api-python-client
  Downloading google_api_python_client-2.86.0-py2.py3-none-any.whl (11.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.3/11.3 MB[0m [31m72.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: google-api-python-client
  Attempting uninstall: google-api-python-client
    Found existing installation: google-api-python-client 2.84.0
    Uninstalling google-api-python-client-2.84.0:
      Successfully uninstalled google-api-python-client-2.84.0
Successfully installed google-api-python-client-2.86.0


In [None]:
!pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu117

Looking in indexes: https://download.pytorch.org/whl/cu117, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
!pip3 install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.29.2-py3-none-any.whl (7.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.1/7.1 MB[0m [31m51.3 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.14.1 (from transformers)
  Downloading huggingface_hub-0.14.1-py3-none-any.whl (224 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m224.5/224.5 kB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers)
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m27.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.14.1 tokenizers-0.13.3 transformers-4.29.2


In [None]:
from __future__ import print_function

import os.path

from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError

In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
from bs4 import BeautifulSoup
import base64
import html
import re
import pandas as pd

# Getting Google Credentials

In [None]:
def google_get_creds():
  # From the Gmail API
  SCOPES = ['https://mail.google.com/']
  creds = None
  # The file token.json stores the user's access and refresh tokens, and is
  # created automatically when the authorization flow completes for the first
  # time.
  if os.path.exists('token.json'):
    creds = Credentials.from_authorized_user_file('token.json', SCOPES)
  # If there are no (valid) credentials available, let the user log in.
  if not creds or not creds.valid:
    if (os.path.exists('token.json')):
      os.remove('token.json')
    flow = InstalledAppFlow.from_client_secrets_file(
        'credentials.json', SCOPES)
    creds = flow.run_local_server(port=0)
    # Save the credentials for the next run
    with open('token.json', 'w') as token:
      token.write(creds.to_json())
  return creds

# Getting Gmail Labels

In [None]:
def gmail_get_labels(creds):
  label_dict = dict()
  try:
    # Call the Gmail API
    service = build('gmail', 'v1', credentials=creds)
    results = service.users().labels().list(userId='me').execute()
    labels = results.get('labels', [])

    if not labels:
      print('No labels found.')
      return
    # print('Labels:')
    for label in labels:
      # print(label['id'], label['name'])
      label_dict[label['name']] = label['id']
    return label_dict

  except HttpError as error:
    # TODO(developer) - Handle errors from gmail API.
    print(f'An error occurred: {error}')

# Generating Sentiment Score Using BERT

In [None]:
def sentiment_score(message):
  tokenizer = AutoTokenizer.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')
  model = AutoModelForSequenceClassification.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')
  tokens = tokenizer.encode(message, return_tensors="pt", truncation=True)
  result = model(tokens)
  return int(torch.argmax(result.logits)) + 1

In [None]:
def remove_replies(message):
  matched = list(re.finditer(r'On (Mon|Tue|Wed|Thu|Fri|Sat|Sun),.*wrote:', message))
  if matched:
    matched = matched[0]
    message = message[:matched.start()]
  return message

In [None]:
def gmail_label_sentiments(creds):
  try:
    # Call the Gmail API
    service = build('gmail', 'v1', credentials=creds)

    labels = gmail_get_labels(creds)

    # request a list of all the messages
    result = service.users().messages().list(userId='me', labelIds=['UNREAD'], maxResults=10).execute()
    messages = result.get('messages')

    for msg in messages:
      txt = service.users().messages().get(userId='me', id=msg['id']).execute()
      payload = txt.get("payload")
      parts = payload.get("parts")
      if parts:
        parts = parts[0]
        body = parts.get("body")
        data = body.get("data")
      else:
        print("Could not read message body. Skipping.")
        continue

      if data:
        data = data.replace("-","+").replace("_","/")
        decoded_data = base64.b64decode(data)
        soup = BeautifulSoup(decoded_data , "lxml")
        if soup:
          body = soup.find('p')
          if body:
            text = body.text
            decoded = html.unescape(text)
            no_spaces = " ".join(decoded.split())
            message = remove_replies(no_spaces)
            score = sentiment_score(message)
            result = service.users().messages().modify(userId='me', id=msg['id'], body={"addLabelIds": [labels[f'SENTIMENT{score}'], 'UNREAD', 'STARRED']}).execute()
            print(f"Sentiment Score: SENTIMENT{score}")

    # return messages

  except HttpError as error:
    print(f'An error occurred: {error}')


In [None]:
creds = google_get_creds()
gmail_label_sentiments(creds)

# Filter Automatic Responses

In [None]:
def gmail_filter_autoresponses(creds):
  try:
    # Call the Gmail API
    service = build('gmail', 'v1', credentials=creds)

    # labels = gmail_get_labels(creds)
    # request a list of all the messages
    result = service.users().messages().list(userId='me', q="-is:starred", maxResults=10).execute()
    messages = result.get('messages')

    message_list = []

    for msg in messages:
      txt = service.users().messages().get(userId='me', id=msg['id']).execute()
      payload = txt.get("payload")
      parts = payload.get("parts")
      if parts:
        parts = parts[0]
        body = parts.get("body")
        data = body.get("data")
      else:
        print("Could not read message body. Skipping.")
        continue

      if data:
        data = data.replace("-","+").replace("_","/")
        decoded_data = base64.b64decode(data)
        soup = BeautifulSoup(decoded_data , "lxml")
        if soup:
          body = soup.find('p')
          if body:
            text = body.text
            decoded = html.unescape(text)
            no_spaces = " ".join(decoded.split())
            message = remove_replies(no_spaces)
            message_list.append(message)

    # return messages
    df = pd.DataFrame(message_list, columns =['Message'])
    df['Category'] = "auto"
    print(df)

  except HttpError as error:
    print(f'An error occurred: {error}')

In [None]:
creds = google_get_creds()
gmail_filter_autoresponses(creds)

Could not read message body. Skipping.
Could not read message body. Skipping.
                                             Message Category
0  Playlists for you. These playlists might earn ...     spam
1  Your job alert for software engineer in Pittsb...     spam
2  Your job alert for software engineer in New Yo...     spam
3  Your job alert for intern in Pittsburgh 10 new...     spam
4  Your job alert for intern in New York 30+ new ...     spam
5  Discover https://discover.app.link/3p?%243p=e_...     spam
6  Elin Jang has accepted your invitation. Let's ...     spam
