In [96]:
from google.oauth2 import service_account
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from datetime import datetime, timedelta
import re
import base64
from transformers import pipeline

In [97]:
def authenticate_gmail():
    SCOPES = ['https://www.googleapis.com/auth/gmail.modify']
    flow = InstalledAppFlow.from_client_secrets_file('client_secret_208589094238-nqpmnoec2lmkr5jbu637ea2u3chsg4lt.apps.googleusercontent.com.json', SCOPES)
    creds = flow.run_local_server(port=0)
    service = build('gmail', 'v1', credentials=creds)
    return service

In [98]:
def search_emails(service, query):
    results = service.users().messages().list(userId='me', q=query).execute()
    messages = results.get('messages', [])
    return messages

def filter_emails(service):
    keywords = ["Assignment", "Exam", "Deadline"]
    important_senders = ["professor@college.edu", "admin@college.edu"]
    today = datetime.now().date()
    three_days_ago = (today - timedelta(days=3)).strftime("%Y/%m/%d")

    # Construct search query for Gmail
    query = f"({ ' OR '.join(keywords) }) from:({ ' OR '.join(important_senders) }) after:{three_days_ago}"
    messages = search_emails(service, query)

    important_emails = []
    moderate_emails = []
    low_priority_emails = []

    for msg in messages:
        email_data = service.users().messages().get(userId='me', id=msg['id']).execute()
        payload = email_data['payload']
        headers = payload['headers']

        # Extracting Subject and From fields
        subject = next(header['value'] for header in headers if header['name'] == 'Subject')
        sender = next(header['value'] for header in headers if header['name'] == 'From')

        # Classify urgency
        urgency = detect_urgency(subject)

        # Tag emails based on importance and urgency
        if any(keyword.lower() in subject.lower() for keyword in keywords):
            if urgency:
                important_emails.append((msg['id'], subject))
            else:
                moderate_emails.append((msg['id'], subject))
        else:
            low_priority_emails.append((msg['id'], subject))

    return important_emails, moderate_emails, low_priority_emails

def detect_urgency(text):
    urgent_keywords = ["urgent", "asap", "important", "immediate"]
    return any(kw in text.lower() for kw in urgent_keywords)

In [99]:
# Step 4: Manage Labels
def sanitize_label_name(label_name):
    # Keep only alphanumeric characters and underscores, replace spaces with underscores
    sanitized_name = re.sub(r'[^a-zA-Z0-9_]', '_', label_name)  # Replace invalid characters with underscores
    return sanitized_name[:225]  # Ensure it doesn't exceed 225 characters


def get_or_create_label(service, label_name):
    # Static, sanitized label name for testing
    label_name = "ImportantEmails"  # Test with a simple label name first
    
    try:
        # Check if label already exists
        existing_labels = service.users().labels().list(userId='me').execute().get('labels', [])
        for label in existing_labels:
            if label['name'] == label_name:
                print(f"Label '{label_name}' exists, using existing label ID.")
                return label['id']
        
        # Create new label if it doesn't exist
        label = {
            'name': label_name,
            'labelListVisibility': 'labelShow',
            'messageListVisibility': 'show'
        }
        new_label = service.users().labels().create(userId='me', body=label).execute()
        print(f"Label '{label_name}' created with ID: {new_label['id']}")
        return new_label['id']
    
    except Exception as e:
        print(f"Failed to create or retrieve label '{label_name}': {e}")
        raise


def apply_labels(service, email_ids, label_name):
    label_id = get_or_create_label(service, label_name)
    for email_id in email_ids:
        service.users().messages().modify(userId='me', id=email_id, body={'addLabelIds': [label_id]}).execute()

In [100]:
def summarize_email(content):
    summarizer = pipeline("summarization")
    return summarizer(content, max_length=60, min_length=10, do_sample=False)[0]['summary_text']

In [101]:
def create_digest(service, emails):
    digest_content = "Your Email Digest:\n\n"
    for label, email_list in emails.items():
        digest_content += f"--- {label} Emails ---\n\n"
        for email_id, subject in email_list:
            message = service.users().messages().get(userId='me', id=email_id, format='full').execute()
            content = message['snippet']
            summary = summarize_email(content)
            digest_content += f"Subject: {subject}\nSummary: {summary}\n\n"

    # Set your email address here
    recipient_email = "aryakashyapayush@gmail.com"  # Replace with your actual email
    send_email(service, recipient_email, "Daily Digest", digest_content)



def send_email(service, recipient, subject, message_text):
    if not recipient:
        raise ValueError("Recipient address is required.")

    # Prepare the email message
    message = {
        'raw': base64.urlsafe_b64encode(f"From: {recipient}\nTo: {recipient}\nSubject: {subject}\n\n{message_text}".encode("utf-8")).decode("utf-8")
    }
    
    try:
        # Send the email
        service.users().messages().send(userId="me", body=message).execute()
        print(f"Email sent to {recipient} with subject: {subject}")
    except Exception as e:
        print(f"An error occurred while sending email: {e}")

In [102]:
def main():
    service = authenticate_gmail()
    
    # Filter and prioritize emails
    important_emails, moderate_emails, low_priority_emails = filter_emails(service)

    # Tag emails by importance
    apply_labels(service, [email_id for email_id, _ in important_emails], "Important")
    apply_labels(service, [email_id for email_id, _ in moderate_emails], "Moderate")
    apply_labels(service, [email_id for email_id, _ in low_priority_emails], "Low Priority")

    # Prepare the daily digest content
    emails_by_category = {
        "Important": important_emails,
        "Moderate": moderate_emails,
        "Low Priority": low_priority_emails,
    }
    create_digest(service, emails_by_category)

    print("Email automation complete!")

if __name__ == "__main__":
    main()

Please visit this URL to authorize this application: https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=208589094238-nqpmnoec2lmkr5jbu637ea2u3chsg4lt.apps.googleusercontent.com&redirect_uri=http%3A%2F%2Flocalhost%3A57791%2F&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fgmail.modify&state=hWRD3qbpnkmR6gaSBo17LdYZ2NgISI&access_type=offline
Label 'ImportantEmails' exists, using existing label ID.
Label 'ImportantEmails' exists, using existing label ID.
Label 'ImportantEmails' exists, using existing label ID.
Email sent to aryakashyapayush@gmail.com with subject: Daily Digest
Email automation complete!


In [104]:
# code to get mails from a specific sender

from __future__ import print_function
import os.path
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build

# If modifying these SCOPES, delete the file token.json.
SCOPES = ['https://www.googleapis.com/auth/gmail.readonly']

def authenticate_gmail():
    """Authenticate and return a Gmail service instance."""
    creds = None
    if os.path.exists('token.json'):
        creds = Credentials.from_authorized_user_file('token.json', SCOPES)
    else:
        flow = InstalledAppFlow.from_client_secrets_file('credentials.json', SCOPES)
        creds = flow.run_local_server(port=0)
        with open('token.json', 'w') as token:
            token.write(creds.to_json())
    return build('gmail', 'v1', credentials=creds)

def get_emails_from_senders(service, senders):
    """Get emails from specific senders."""
    try:
        unread_emails = []
        for sender in senders:
            # Use the query to filter emails from the specified sender
            results = service.users().messages().list(userId='me', q=f'from:{sender}').execute()
            messages = results.get('messages', [])

            if not messages:
                print(f"No emails found from {sender}.")
                continue

            for msg in messages:
                message = service.users().messages().get(userId='me', id=msg['id'], format='full').execute()
                subject = next(header['value'] for header in message['payload']['headers'] if header['name'] == 'Subject')
                snippet = message.get('snippet', '')
                unread_emails.append({'subject': subject, 'snippet': snippet, 'from': sender})

        return unread_emails

    except Exception as error:
        print(f'An error occurred: {error}')
        return []

if __name__ == '__main__':
    # Authenticate and create the Gmail service
    service = authenticate_gmail()
    
    # Define the specific email addresses you want to read from
    specific_senders = ['ayusharya.kashyap@iiitb.ac.in']  # Add the specific senders here

    # Get and print emails from specific senders
    emails = get_emails_from_senders(service, specific_senders)
    print("Emails from Specific Senders:")
    for email in emails:
        print(f"From: {email['from']}, Subject: {email['subject']}, Snippet: {email['snippet']}")


Emails from Specific Senders:
From: ayusharya.kashyap@iiitb.ac.in, Subject: , Snippet: I have an important assignment tomorrow.
From: ayusharya.kashyap@iiitb.ac.in, Subject: , Snippet: Hi, this is just the beginning!


In [107]:
# this code gives us all the subjects of the unread mails from all senders

from __future__ import print_function
import os.path
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build

# If modifying these SCOPES, delete the file token.json.
SCOPES = ['https://www.googleapis.com/auth/gmail.readonly']

def authenticate_gmail():
    """Authenticate and return a Gmail service instance."""
    creds = None
    # Check if token.json file exists
    if os.path.exists('token.json'):
        creds = Credentials.from_authorized_user_file('token.json', SCOPES)
    else:
        # If not, perform OAuth2 flow
        flow = InstalledAppFlow.from_client_secrets_file('client_secret_208589094238-nqpmnoec2lmkr5jbu637ea2u3chsg4lt.apps.googleusercontent.com.json', SCOPES)
        creds = flow.run_local_server(port=0)
        # Save the credentials for the next run
        with open('token.json', 'w') as token:
            token.write(creds.to_json())
    return build('gmail', 'v1', credentials=creds)

def get_unread_emails(service):
    """Get all unread emails."""
    try:
        # Call the Gmail API to fetch unread emails
        results = service.users().messages().list(userId='me', q='is:unread').execute()
        messages = results.get('messages', [])

        if not messages:
            print("No unread emails found.")
            return []

        unread_emails = []
        for msg in messages:
            # Get the full message details
            message = service.users().messages().get(userId='me', id=msg['id'], format='full').execute()
            subject = next(header['value'] for header in message['payload']['headers'] if header['name'] == 'Subject')
            snippet = message.get('snippet', '')
            unread_emails.append({'subject': subject, 'snippet': snippet})

        return unread_emails

    except Exception as error:
        print(f'An error occurred: {error}')
        return []

if __name__ == '__main__':
    # Authenticate and create the Gmail service
    service = authenticate_gmail()
    
    # Get and print unread emails
    unread_emails = get_unread_emails(service)
    print("Unread Emails:")
    for email in unread_emails:
        print(f"Subject: {email['subject']}, Snippet: {email['snippet']}")


Unread Emails:
Subject: , Snippet: Hi, this is a test run for all senders.


In [118]:
# this code gives us the subject, snippet and link of all the unread mails from all senders 

from __future__ import print_function
import os.path
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build

# If modifying these SCOPES, delete the file token.json.
SCOPES = ['https://www.googleapis.com/auth/gmail.readonly']

def authenticate_gmail():
    """Authenticate and return a Gmail service instance."""
    creds = None
    if os.path.exists('token.json'):
        creds = Credentials.from_authorized_user_file('token.json', SCOPES)
    else:
        flow = InstalledAppFlow.from_client_secrets_file('client_secret_208589094238-nqpmnoec2lmkr5jbu637ea2u3chsg4lt.apps.googleusercontent.com.json', SCOPES)
        creds = flow.run_local_server(port=0)
        with open('token.json', 'w') as token:
            token.write(creds.to_json())
    return build('gmail', 'v1', credentials=creds)

def get_unread_emails(service):
    """Get all unread emails."""
    try:
        results = service.users().messages().list(userId='me', q='is:unread').execute()
        messages = results.get('messages', [])

        if not messages:
            print("No unread emails found.")
            return []

        unread_emails = []
        for msg in messages:
            message = service.users().messages().get(userId='me', id=msg['id'], format='full').execute()
            subject = next(header['value'] for header in message['payload']['headers'] if header['name'] == 'Subject')
            snippet = message.get('snippet', '')
            link = f"https://mail.google.com/mail/u/3/#inbox/{msg['id']}"  # Create link to email
            unread_emails.append({'subject': subject, 'snippet': snippet, 'link': link})

        return unread_emails

    except Exception as error:
        print(f'An error occurred: {error}')
        return []

if __name__ == '__main__':
    # Authenticate and create the Gmail service
    service = authenticate_gmail()
    
    # Get and print unread emails
    unread_emails = get_unread_emails(service)
    print("Unread Emails:")
    for email in unread_emails:
        print(f"Subject: {email['subject']}, Snippet: {email['snippet']}, Link: {email['link']}")

Unread Emails:
Subject: Stalls for Synergy, Snippet: Dear Students, We are excited to announce an amazing opportunity to showcase your talents, skills, and innovative ideas at our upcoming tech fest, Synergy! This is your chance to bring your creativity, Link: https://mail.google.com/mail/u/3/#inbox/192f1029e209b9de


In [5]:
# this code uses transformers library from hugging face open source for email summarization

from __future__ import print_function
import os.path
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from transformers import pipeline

# If modifying these SCOPES, delete the file token.json.
SCOPES = ['https://www.googleapis.com/auth/gmail.readonly']

# Set up the Hugging Face summarization model
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

def authenticate_gmail():
    """Authenticate and return a Gmail service instance."""
    creds = None
    if os.path.exists('token.json'):
        creds = Credentials.from_authorized_user_file('token.json', SCOPES)
    else:
        flow = InstalledAppFlow.from_client_secrets_file('client_secret_208589094238-nqpmnoec2lmkr5jbu637ea2u3chsg4lt.apps.googleusercontent.com.json', SCOPES)
        creds = flow.run_local_server(port=0)
        with open('token.json', 'w') as token:
            token.write(creds.to_json())
    return build('gmail', 'v1', credentials=creds)

def summarize_email(content):
    """Use Hugging Face model to summarize the email content."""
    summary = summarizer(content, max_length=150, min_length=40, do_sample=False)
    return summary[0]['summary_text']

def extract_keywords(text, num_keywords=5):
    vectorizer = CountVectorizer(stop_words='english')
    X = vectorizer.fit_transform([text])
    keywords = vectorizer.get_feature_names_out()
    # Get the highest scoring keywords
    scores = X.sum(axis=0).A1
    top_keywords = sorted(zip(scores, keywords), reverse=True)[:num_keywords]
    return [keyword for score, keyword in top_keywords]

def get_unread_emails(service):
    """Get all unread emails and summarize them."""
    try:
        results = service.users().messages().list(userId='me', q='is:unread').execute()
        messages = results.get('messages', [])

        if not messages:
            print("No unread emails found.")
            return []

        unread_emails = []
        for msg in messages:
            message = service.users().messages().get(userId='me', id=msg['id'], format='full').execute()
            subject = next(header['value'] for header in message['payload']['headers'] if header['name'] == 'Subject')
            snippet = message.get('snippet', '')
            link = f"https://mail.google.com/mail/u/3/#inbox/{msg['id']}"  # Adjust to your Gmail account URL
            
            # Summarize the email snippet
            summary = summarize_email(snippet)
            unread_emails.append({'subject': subject, 'snippet': snippet, 'summary': summary, 'link': link})

        return unread_emails

    except Exception as error:
        print(f'An error occurred: {error}')
        return []

if __name__ == '__main__':
    # Authenticate and create the Gmail service
    service = authenticate_gmail()
    
    # Get and print unread emails
    unread_emails = get_unread_emails(service)
    print("Unread Emails:")
    for email in unread_emails:
        print(f"Subject: {email['subject']}\nSnippet: {email['snippet']}\nSummary: {email['summary']}\nLink: {email['link']}\n")


Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


No unread emails found.
Unread Emails:


In [2]:
# this code gives me the subject, summary and link alongwith the keywords of the unread emails

from __future__ import print_function
import os.path
import re
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from transformers import pipeline
from sklearn.feature_extraction.text import CountVectorizer

# If modifying these SCOPES, delete the file token.json.
SCOPES = ['https://www.googleapis.com/auth/gmail.readonly']

def authenticate_gmail():
    """Authenticate and return a Gmail service instance."""
    creds = None
    if os.path.exists('token.json'):
        creds = Credentials.from_authorized_user_file('token.json', SCOPES)
    else:
        flow = InstalledAppFlow.from_client_secrets_file('client_secret.json', SCOPES)
        creds = flow.run_local_server(port=0)
        with open('token.json', 'w') as token:
            token.write(creds.to_json())
    return build('gmail', 'v1', credentials=creds)

def get_unread_emails(service):
    """Get all unread emails."""
    try:
        results = service.users().messages().list(userId='me', q='is:unread').execute()
        messages = results.get('messages', [])

        if not messages:
            print("No unread emails found.")
            return []

        unread_emails = []
        for msg in messages:
            message = service.users().messages().get(userId='me', id=msg['id'], format='full').execute()
            subject = next(header['value'] for header in message['payload']['headers'] if header['name'] == 'Subject')
            snippet = message.get('snippet', '')
            link = f"https://mail.google.com/mail/u/3/#inbox/{msg['id']}"  # Create link to email
            unread_emails.append({'subject': subject, 'snippet': snippet, 'link': link, 'id': msg['id']})

        return unread_emails

    except Exception as error:
        print(f'An error occurred: {error}')
        return []

def clean_email_content(content):
    # """Clean and preprocess the email content."""
    # # Remove signatures, disclaimers, and unnecessary whitespace
    # content = re.sub(r'Sincerely,.*', '', content, flags=re.DOTALL)
    # content = re.sub(r'Best regards,.*', '', content, flags=re.DOTALL)
    # content = re.sub(r'\n\s*\n', '\n', content)  # Remove extra newlines
    # return content.strip()

    # Remove typical introductory phrases or sections
    content = re.sub(r'(?:From|Sent|To|Subject|Date|Regards|Sincerely|Best regards|On \w+day, \d{1,2} \w+ \d{4}).*$', '', content, flags=re.IGNORECASE)
    content = re.sub(r'\n\s*\n', '\n', content)  # Remove extra newlines
    return content.strip()

def extract_keywords(text, num_keywords=5):
    """Extract keywords from the given text."""
    vectorizer = CountVectorizer(stop_words='english')
    X = vectorizer.fit_transform([text])
    keywords = vectorizer.get_feature_names_out()
    # Get the highest scoring keywords
    scores = X.sum(axis=0).A1
    top_keywords = sorted(zip(scores, keywords), reverse=True)[:num_keywords]
    return [keyword for score, keyword in top_keywords]

def summarize_email(email_content):
    """Summarize the email content using a Hugging Face model."""
    summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
    summary = summarizer(email_content, max_length=150, min_length=40, do_sample=False)
    return summary[0]['summary_text']

if __name__ == '__main__':
    # Authenticate and create the Gmail service
    service = authenticate_gmail()
    
    # Get and process unread emails
    unread_emails = get_unread_emails(service)
    
    for email in unread_emails:
        email_content = email['snippet']  # Assuming snippet is enough; otherwise, you may need to fetch the full content
        cleaned_content = clean_email_content(email_content)
        
        if cleaned_content:
            summary = summarize_email(cleaned_content)
            keywords = extract_keywords(cleaned_content)

            print(f"Subject: {email['subject']}")
            print(f"Summary: {summary}")
            print(f"Keywords: {keywords}")
            print(f"Link: {email['link']}\n")


Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.
Your max_length is set to 150, but your input_length is only 9. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=4)


Subject: Re: Regarding 4 Credit Project Elective for the Upcoming Semester
Summary: I have a project. Dear Students, I have aproject. I'm working on a project for my students. Please help me with the project. Please email me at jennifer.smith@mailonline.co.uk.
Keywords: ['students', 'project', 'dear']
Link: https://mail.google.com/mail/u/3/#inbox/192f6a090d899594



Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.
Your max_length is set to 150, but your input_length is only 58. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=29)


Subject: T1-24-25-DAS 703: Updating Project Group and Title in Googledoc
Summary: Updating Project Group and Title in Googledoc by Prof. Uttam Kumar - Monday, 4. T1-24-25-DAS 703 » Forums » Announcements » Updating Project group and title in Go Googleledoc
Keywords: ['uttam', 'updating', 'title', 'project', 'prof']
Link: https://mail.google.com/mail/u/3/#inbox/192f618c87eff600



Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.
Your max_length is set to 150, but your input_length is only 60. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=30)


Subject: T1-24-CSE 303: Project Deliverable 1
Summary: Project Deliverable 1 by IMT2021082 Vidhu Arora - Monday, 4 November 2024, 12:05 PM. T1-24-CSE 303 » Forums » Announcements » Project deliverable 1.
Keywords: ['vidhu', 'project', 'imt2021082', 'deliverable', 'arora']
Link: https://mail.google.com/mail/u/3/#inbox/192f5e8496928362



Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.
Your max_length is set to 150, but your input_length is only 4. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=2)


Subject: Microsoft is hiring Technical Program Manager Intern at INR 40,000/Month!
Summary: CNN.com will feature iReporter photos in a weekly Travel Snapshots gallery. Visit CNN.com/Travel each week for a new gallery of snapshots. Click here for the gallery.
Keywords: ['tap']
Link: https://mail.google.com/mail/u/3/#inbox/192f5a1b4889ee32



Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.
Your max_length is set to 150, but your input_length is only 19. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=9)


Subject: Samvaad talk on "An empirical analysis of prices of radio spectrum for mobile communication services" by Dr. V. Sridhar
Summary: This is a gentle reminder for the weekly Samvaad Talk happening on the blog. Greetings everyone, This is a Gentle reminder for  the weekly SAMVAAD Talk happening online. This week, we will be talking about the recent election in the U.S.
Keywords: ['weekly', 'talk', 'samvaad', 'reminder', 'happening']
Link: https://mail.google.com/mail/u/3/#inbox/192f59c27e45ae03



In [2]:
# this code carries out the same functionalities but also builds a web interface using Flask to do the same (Not Working)

from __future__ import print_function
import os.path
import re
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from transformers import pipeline
from sklearn.feature_extraction.text import CountVectorizer
from flask import Flask, render_template

# Initialize Flask app
app = Flask(__name__)

# If modifying these SCOPES, delete the file token.json.
SCOPES = ['https://www.googleapis.com/auth/gmail.readonly']

def authenticate_gmail():
    """Authenticate and return a Gmail service instance."""
    creds = None
    if os.path.exists('token.json'):
        creds = Credentials.from_authorized_user_file('token.json', SCOPES)
    else:
        flow = InstalledAppFlow.from_client_secrets_file('client_secret_208589094238-nqpmnoec2lmkr5jbu637ea2u3chsg4lt.apps.googleusercontent.com.json', SCOPES)
        creds = flow.run_local_server(port=0)
        with open('token.json', 'w') as token:
            token.write(creds.to_json())
    return build('gmail', 'v1', credentials=creds)

def get_unread_emails(service):
    """Get all unread emails."""
    try:
        results = service.users().messages().list(userId='me', q='is:unread').execute()
        messages = results.get('messages', [])

        if not messages:
            print("No unread emails found.")
            return []

        unread_emails = []
        for msg in messages:
            message = service.users().messages().get(userId='me', id=msg['id'], format='full').execute()
            subject = next(header['value'] for header in message['payload']['headers'] if header['name'] == 'Subject')
            snippet = message.get('snippet', '')
            link = f"https://mail.google.com/mail/u/0/#inbox/{msg['id']}"  # Create link to email
            unread_emails.append({'subject': subject, 'snippet': snippet, 'link': link, 'id': msg['id']})

        return unread_emails

    except Exception as error:
        print(f'An error occurred: {error}')
        return []

def clean_email_content(content):
    
    """Clean and preprocess the email content."""
    # Remove signatures, disclaimers, and unnecessary whitespace
    content = re.sub(r'Sincerely,.*', '', content, flags=re.DOTALL)
    content = re.sub(r'Best regards,.*', '', content, flags=re.DOTALL)
    content = re.sub(r'\n\s*\n', '\n', content)  # Remove extra newlines
    return content.strip()

def extract_keywords(text, num_keywords=5):
    """Extract keywords from the given text."""
    vectorizer = CountVectorizer(stop_words='english')
    X = vectorizer.fit_transform([text])
    keywords = vectorizer.get_feature_names_out()
    # Get the highest scoring keywords
    scores = X.sum(axis=0).A1
    top_keywords = sorted(zip(scores, keywords), reverse=True)[:num_keywords]
    return [keyword for score, keyword in top_keywords]

def summarize_email(email_content):
    """Summarize the email content using a Hugging Face model."""
    summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
    summary = summarizer(email_content, max_length=150, min_length=40, do_sample=False)
    return summary[0]['summary_text']

@app.route('/')
def display_emails():
    """Fetch and display unread emails with summaries and keywords."""
    # Authenticate and create the Gmail service
    service = authenticate_gmail()
    unread_emails = get_unread_emails(service)
    
    # Process each email for summary and keywords
    processed_emails = []
    for email in unread_emails:
        email_content = email['snippet']  # Using snippet as email content
        cleaned_content = clean_email_content(email_content)
        
        if cleaned_content:
            summary = summarize_email(cleaned_content)
            keywords = extract_keywords(cleaned_content)
            processed_emails.append({
                'subject': email['subject'],
                'summary': summary,
                'keywords': keywords,
                'link': email['link']
            })

    return render_template('emails.html', emails=processed_emails)

if __name__ == '__main__':
    # Run the Flask app
    app.run(debug=True, port=5001)


 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:5001
Press CTRL+C to quit
 * Restarting with stat
Traceback (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "/Users/ayusharyakashyap/Library/Python/3.11/lib/python/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "/Users/ayusharyakashyap/Library/Python/3.11/lib/python/site-packages/traitlets/config/application.py", line 1074, in launch_instance
    app.initialize(argv)
  File "/Users/ayusharyakashyap/Library/Python/3.11/lib/python/site-packages/traitlets/config/application.py", line 118, in inner
    return method(app, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/ayusharyakashyap/Library/Python/3.11/lib/python/site-packages/ipykernel/kernelapp.py", line 692, in initialize
    self.init_sockets()
  File "/Users/ayusharyakashyap/Library/Python/3.11/lib/python/site-packages/ipykernel/kernelapp.py",

SystemExit: 1

In [1]:
from __future__ import print_function
import os.path
import re
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from transformers import pipeline
from sklearn.feature_extraction.text import CountVectorizer
from concurrent.futures import ThreadPoolExecutor, as_completed

# If modifying these SCOPES, delete the file token.json.
SCOPES = ['https://www.googleapis.com/auth/gmail.readonly']

def authenticate_gmail():
    creds = None
    if os.path.exists('token.json'):
        creds = Credentials.from_authorized_user_file('token.json', SCOPES)
    else:
        flow = InstalledAppFlow.from_client_secrets_file('client_secret.json', SCOPES)
        creds = flow.run_local_server(port=0)
        with open('token.json', 'w') as token:
            token.write(creds.to_json())
    return build('gmail', 'v1', credentials=creds)

def get_unread_emails(service):
    try:
        results = service.users().messages().list(userId='me', q='is:unread').execute()
        messages = results.get('messages', [])

        unread_emails = []
        for msg in messages:
            message = service.users().messages().get(userId='me', id=msg['id'], format='full').execute()
            subject = next(header['value'] for header in message['payload']['headers'] if header['name'] == 'Subject')
            snippet = message.get('snippet', '')
            link = f"https://mail.google.com/mail/u/3/#inbox/{msg['id']}"
            unread_emails.append({'subject': subject, 'snippet': snippet, 'link': link, 'id': msg['id']})

        return unread_emails

    except Exception as error:
        print(f'An error occurred: {error}')
        return []

def clean_email_content(content):
    # Remove typical introductory phrases or sections
    content = re.sub(r'(?:From|Sent|To|Subject|Date|Regards|Sincerely|Best regards|On \w+day, \d{1,2} \w+ \d{4}).*$', '', content, flags=re.IGNORECASE)
    content = re.sub(r'\n\s*\n', '\n', content)  # Remove extra newlines
    return content.strip()

def extract_keywords(text, num_keywords=5):
    vectorizer = CountVectorizer(stop_words='english')
    X = vectorizer.fit_transform([text])
    keywords = vectorizer.get_feature_names_out()
    scores = X.sum(axis=0).A1
    top_keywords = sorted(zip(scores, keywords), reverse=True)[:num_keywords]
    return [keyword for score, keyword in top_keywords]

def summarize_email(email_content):
    summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
    summary = summarizer(email_content, max_length=60, min_length=40, do_sample=False)
    return summary[0]['summary_text']

def process_email(email):
    email_content = email['snippet']
    cleaned_content = clean_email_content(email_content)
    if cleaned_content:
        summary = summarize_email(cleaned_content)
        keywords = extract_keywords(cleaned_content)
        return {
            'subject': email['subject'],
            'summary': summary,
            'keywords': keywords,
            'link': email['link']
        }

# def batch_process_emails(emails, batch_size=5):
#     processed_emails = []
#     with ThreadPoolExecutor() as executor:
#         for i in range(0, len(emails), batch_size):
#             batch = emails[i:i + batch_size]
#             future_to_email = {executor.submit(process_email, email): email for email in batch}
#             for future in as_completed(future_to_email):
#                 email_data = future.result()
#                 if email_data:
#                     processed_emails.append(email_data)
#     return processed_emails

if __name__ == '__main__':
    service = authenticate_gmail()
    unread_emails = get_unread_emails(service)
    
    # # Process emails in batches
    # processed_emails = batch_process_emails(unread_emails)
    
    # Display the processed emails
    for email in unread_emails:
        print(f"Subject: {email['subject']}")
        print(f"Summary: {email['summary']}")
        print(f"Keywords: {email['keywords']}")
        print(f"Link: {email['link']}\n")


Subject: Re: Regarding 4 Credit Project Elective for the Upcoming Semester


KeyError: 'summary'