In [1]:
#Version 1.2
import imaplib
import os
import pandas as pd
import email
import csv
import chardet
from bs4 import BeautifulSoup
import re
import unicodedata
from email.utils import parsedate_to_datetime

imap_servers = {
    'gmail.com': 'imap.gmail.com',
    'outlook.com': 'outlook.office365.com',  
    'hotmail.com': 'outlook.office365.com',       
    'yahoo.com': 'imap.mail.yahoo.com',
    'myyahoo.com': 'imap.mail.yahoo.com',
    'aol.com': 'imap.aol.com',          
    'mail.com': 'imap.mail.com',              
    # Add other providers here
}



def normalize_text(text):
    return unicodedata.normalize('NFKD', text)



def clean_html_body(html):
    """Clean the HTML content and return plain text."""
    soup = BeautifulSoup(html, 'html.parser')
    text = soup.get_text(separator='\n', strip=True)
    cleaned_text = re.sub(r'\n+', '\n', text)  # Remove multiple newlines
    cleaned_text = re.sub(r'[ ]{2,}', ' ', cleaned_text)  # Remove multiple spaces
    cleaned_text = normalize_text(cleaned_text)
    replacements = {
        'â€“': '-',  # En dash
        'â€': '',
        'â€‹': '',
        'Â': '',    # Non-breaking space
        'â€™': "'",
        'â€œ': '"',  # Left double quotation mark
        'â€': '"',  # Right double quotation mark
        'â€ž': '"',  # Low double quotation mark
        'â€¦': '...', # Ellipsis
        'â€¢': '•',  # Bullet
        '&#39;': "'", # HTML entity for apostrophe
        '\xa0': ' ',  # Non-breaking space in Unicode
    }

    for search, replacement in replacements.items():
        cleaned_text = cleaned_text.replace(search, replacement)

    return cleaned_text.strip()



def fetch_emails(email_id, app_specific_password,imap_server, alias_id):
    try:
        mail = imaplib.IMAP4_SSL(imap_server)
        mail.login(email_id, app_specific_password)
        mail.select("inbox")

        # Prepare folder and CSV file
        name_part = alias_id
        folder_name = "email_data"

        # Check if the folder exists, if not, create it
        if not os.path.exists(folder_name):
            os.makedirs(folder_name)
        
        csv_file_name = os.path.join(folder_name, f"{name_part}_email_data.csv")
        
        with open(csv_file_name, mode='w', newline='', encoding='utf-8') as csv_file:
            csv_writer = csv.writer(csv_file)
            csv_writer.writerow(['Email ID', 'Subject', 'Date Time','Body', 'Attachments' , 'Alias ID'])

            # Search and fetch emails
            result, data = mail.search(None, "ALL")
            for num in data[0].split():
                result, msg_data = mail.fetch(num, "(RFC822)")
                msg = email.message_from_bytes(msg_data[0][1])
                subject, encoding = email.header.decode_header(msg["Subject"])[0]
                
                # Decode subject if it's a bytes object
                if isinstance(subject, bytes):
                    subject = subject.decode(encoding if encoding else 'utf-8')

                email_sender = msg.get("From")
                date = msg.get("Date")
                
                # Parse date to datetime object
                date_time = parsedate_to_datetime(date).strftime("%Y-%m-%d %H:%M:%S") if date else "Unknown"
                body = ""
                attachments = []

                # Process email content
                if msg.is_multipart():
                    for part in msg.walk():
                        if part.get_content_type() == "text/html":
                            raw_body = part.get_payload(decode=True)
                            encoding = chardet.detect(raw_body)['encoding'] or 'utf-8'
                            body = clean_html_body(raw_body.decode(encoding, errors='ignore'))
                        elif 'attachment' in str(part.get("Content-Disposition")):
                            attachments.append(part.get_filename())
                else:
                    raw_body = msg.get_payload(decode=True)
                    encoding = chardet.detect(raw_body)['encoding'] or 'utf-8'
                    body = clean_html_body(raw_body.decode(encoding, errors='ignore'))

                # Write email data to CSV
                csv_writer.writerow([email_sender, subject, date_time , body , ', '.join(attachments) , alias_id])

        mail.logout()
        print(f"Emails have been successfully written to {csv_file_name}.")

    except Exception as e:
        print(f"Error accessing {email_id} : {e}")


# Read Excel file containing email credentials
def read_credentials_and_fetch_emails(excel_file, sheet_name):
    # Load credentials from Excel
    df = pd.read_excel(excel_file , sheet_name = sheet_name)
    
    # Loop through each row in the dataframe
    for index, row in df.iterrows():
        email_id = row['EMAIL']
        password = row['EMAIL PASSWORDS']
        alias_id = row['#']
        
        
        # Extract the domain to determine the correct IMAP server
        domain = email_id.split('@')[-1]
        imap_server = imap_servers.get(domain)
        
        if imap_server:
            print(f"Fetching emails from {email_id}")
            fetch_emails(email_id, password, imap_server,alias_id)
        else:
            print(f"IMAP server not configured for {email_id}")


excel_file = r"Property & Alias Info.xlsx"
sheet_name = "ALIAS INFO"
read_credentials_and_fetch_emails(excel_file,sheet_name)
        



Fetching emails from montgomery_august@aol.com
Emails have been successfully written to email_data\208_email_data.csv.
Fetching emails from tayloradams43@outlook.com
Error accessing tayloradams43@outlook.com : b'LOGIN failed.'
Fetching emails from campbellalex33@aol.com
Emails have been successfully written to email_data\210_email_data.csv.
Fetching emails from riley.brooks.work@hotmail.com
Error accessing riley.brooks.work@hotmail.com : b'LOGIN failed.'
Fetching emails from andavis.designs@gmail.com
Error accessing andavis.designs@gmail.com : b'[ALERT] Application-specific password required: https://support.google.com/accounts/answer/185833 (Failure)'
Fetching emails from whitmoreconsulting@outlook.com
Error accessing whitmoreconsulting@outlook.com : b'LOGIN failed.'
Fetching emails from edwards.workplace@yahoo.com
Error accessing edwards.workplace@yahoo.com : b'[AUTHENTICATIONFAILED] LOGIN Invalid credentials'
Fetching emails from official.lewiscasey@aol.com
Emails have been successf