# LEMAIYAN HEIGHTS RENT AUTOMATION BOT (PROOF OF CONCEPT, PROTOTYPE, DEPLOYMENT)

## PROOF OF CONCEPT

* Random generated dataset to simulate results

In [2]:
import random
from faker import Faker
from pathlib import Path
import string

fake = Faker()

# Define possible account codes (A1-A6, B1-B6, ..., G1-G6)
accounts = [f"{l}{n}" for l in "ABCDEFG" for n in range(1, 7)]

email_template = ("Dear Customer, your payment of KES {amount} for account: PAYLEMAIYAN #{code} "
                  "has been received from {name} {phone} on {date_time}. "
                  "M-Pesa Ref: {mpesa_ref}")
def random_ref_code(length=10):
    return ''.join(random.choices(string.ascii_uppercase + string.digits, k=length))
dummy_emails = []
for _ in range(200):
    amount = f"{random.randint(5, 20) * 1000}.00"
    code = random.choice(accounts)
    name = fake.name()
    phone = f"{random.randint(700, 799)}****{random.randint(100,999)}"
    date_time = fake.date_time_this_year().strftime('%d/%m/%Y %I:%M %p')
    mpesa_ref = random_ref_code(10)
    email_text = email_template.format(
        amount=amount,
        code=code,
        name=name,
        phone=phone,
        date_time=date_time,
        mpesa_ref=mpesa_ref
    )
    dummy_emails.append(email_text)

# Save to data/dummy_emails_200.txt
out_path = Path("data/dummy_emails_200.txt")
out_path.parent.mkdir(parents=True, exist_ok=True)
out_path.write_text("\n\n".join(dummy_emails))

print(f"Created {len(dummy_emails)} dummy emails in {out_path}")
print("Sample email:\n", dummy_emails[0])

Created 200 dummy emails in data\dummy_emails_200.txt
Sample email:
 Dear Customer, your payment of KES 18000.00 for account: PAYLEMAIYAN #A5 has been received from Misty Kim 715****630 on 28/07/2025 12:51 PM. M-Pesa Ref: NEYLBQ84MS


* Logic engine

In [3]:
# --------------------------------------------------------
# Reads MPESA email notifications from dummy_emails.txt,
# Parses payment info, updates dummy_rent_tracker.xlsx,
# Avoids double-logging by checking ProcessedRefs sheet.

import pandas as pd
import re
from pathlib import Path
from openpyxl import load_workbook
import warnings
warnings.filterwarnings('ignore')

# --- CONFIGURATION ---
DATA_DIR = Path('data')
EMAIL_FILE = DATA_DIR / 'dummy_emails_200.txt'
SPREADSHEET_FILE = DATA_DIR / 'dummy_rent_tracker.xlsx'

# --- 1. Load Dummy Emails ---
with open(EMAIL_FILE, 'r') as f:
    email_texts = f.read().split('\n\n')

print(f"Loaded {len(email_texts)} emails.")

Loaded 200 emails.


In [4]:
email_texts[22]

'Dear Customer, your payment of KES 10000.00 for account: PAYLEMAIYAN #A1 has been received from Ruth York 712****863 on 11/06/2025 10:27 PM. M-Pesa Ref: LWSB4EJMW3'

In [5]:
# --- 2. Load Workbook and All Sheet Names ---
wb = load_workbook(SPREADSHEET_FILE)
sheet_names = wb.sheetnames

# --- 3. Load ProcessedRefs (deduplication) ---
try:
    processed_refs_df = pd.read_excel(SPREADSHEET_FILE, sheet_name='ProcessedRefs')
    processed_refs = set(str(ref).strip().upper() for ref in processed_refs_df['Ref'] if pd.notna(ref))
except Exception:
    processed_refs = set()
    print("ProcessedRefs sheet is empty or missing. Will create it.")

print(f"Found {len(processed_refs)} previously processed refs.")

Found 50 previously processed refs.


In [6]:
# --- 4. Regex Parser Function ---
def extract_payment_info(email_body):
    pattern = (
        r'payment of KES ([\d,]+\.\d{2}) '
        r'for account: PAYLEMAIYAN\s*#?\s*([A-Za-z]\d{1,2})'
        r' has been received from (.+?) '
        r'(.{1,13}) '
        r'on (\d{2}/\d{2}/\d{4} \d{1,2}:\d{2} [APM]{2})\. '
        r'M-Pesa Ref: ([\w\d]+)'
    )
    match = re.search(pattern, email_body, flags=re.IGNORECASE)
    if match:
        return {
            'Amount': float(match.group(1).replace(',', '').strip()),
            'AccountCode': match.group(2).strip().upper(),
            'Payer': match.group(3).strip(),
            'PhoneOrID': match.group(4).strip(),
            'Date': match.group(5).strip(),
            'Ref': match.group(6).strip().upper(),
        }
    return None

In [7]:
# --- 5. Process Emails, Update or Create Sheets ---


updates_log = []
new_refs = []
updates_per_sheet = {}

# We'll use openpyxl to add new sheets if needed
wb = load_workbook(SPREADSHEET_FILE)
writer = pd.ExcelWriter(SPREADSHEET_FILE, engine='openpyxl', mode='a', if_sheet_exists='overlay')

# Loading a Master payments file
try:
    payment_history_df = pd.read_excel(SPREADSHEET_FILE, sheet_name='PaymentHistory')
except Exception:
    payment_history_df = pd.DataFrame(columns=[
        'Date', 'Amount', 'Ref', 'Payer', 'PhoneOrID', 'Payment Mode', 'AccountCode', 'TenantSheet'
    ])


for email in email_texts:
    payment_data = extract_payment_info(email)
    if not payment_data:
        updates_log.append("Skipped email: Could not parse payment info.")
        continue

    ref = payment_data['Ref'].upper().strip()
    if ref in processed_refs:
        updates_log.append(f"Duplicate ignored (Ref {ref})")
        continue

    account_code = payment_data['AccountCode']
    payer_name = payment_data['Payer'].replace(" ", "_")[:15]
    # Try to match an existing tenant sheet
    target_sheet = None
    for s in sheet_names:
        # Take just the code part from the sheet name
        sheet_token = s.split()[0].replace('-', '').upper().strip()
        if account_code == sheet_token and 'PROCESSEDREFS' not in s.upper() and 'PAYMENTHISTORY' not in s.upper():
            target_sheet = s
            break

    # --- 7. If no sheet found, CREATE it ---
    if target_sheet is None:
        target_sheet = f"{account_code} - {payer_name if payer_name else 'AutoAdded'}"
        print(f"Creating new sheet: {target_sheet} for new tenant {account_code}")
        new_tenant_df = pd.DataFrame(columns=[
            'Date', 'Amount', 'Ref', 'Payer', 'PhoneOrID', 'Payment Mode'
        ])
        new_tenant_df.to_excel(writer, sheet_name=target_sheet, index=False)
        updates_log.append(f"Created new sheet: {target_sheet}")
        sheet_names.append(target_sheet)  # So we don't create it twice

    # --- 8. Append payment to tenant sheet ---
    try:
        df = pd.read_excel(SPREADSHEET_FILE, sheet_name=target_sheet)
    except Exception:
        df = pd.DataFrame(columns=['Date', 'Amount', 'Ref', 'Payer', 'PhoneOrID', 'Payment Mode'])

    new_row = pd.DataFrame({
        'Date': [payment_data['Date']],
        'Amount': [payment_data['Amount']],
        'Ref': [payment_data['Ref']],
        'Payer': [payment_data['Payer']],
        'PhoneOrID': [payment_data['PhoneOrID']],
        'Payment Mode': ['MPESA Payment'],
    })
    df = pd.concat([df, new_row], ignore_index=True)
    df.to_excel(writer, sheet_name=target_sheet, index=False)
    updates_log.append(f"Logged payment for {account_code} - Ref {ref}")
    new_refs.append(ref)
    updates_per_sheet.setdefault(target_sheet, 0)
    updates_per_sheet[target_sheet] += 1

     # --- 9. Add to PaymentHistory sheet ---
    new_hist_row = new_row.copy()
    new_hist_row['AccountCode'] = account_code
    new_hist_row['TenantSheet'] = target_sheet
    payment_history_df = pd.concat([payment_history_df, new_hist_row], ignore_index=True)

Creating new sheet: A5 - Misty_Kim for new tenant A5
Creating new sheet: C5 - Susan_Cook for new tenant C5
Creating new sheet: D1 - Michael_Thomas for new tenant D1
Creating new sheet: E3 - Todd_Harris for new tenant E3
Creating new sheet: C1 - Grace_Huffman for new tenant C1
Creating new sheet: C4 - Bobby_Alvarez for new tenant C4
Creating new sheet: A3 - Bradley_Turner for new tenant A3
Creating new sheet: B6 - Eric_Pruitt for new tenant B6
Creating new sheet: B5 - Andrew_Johnson for new tenant B5
Creating new sheet: A4 - Dawn_Mason for new tenant A4
Creating new sheet: D2 - Robert_Brown for new tenant D2
Creating new sheet: D6 - Mr._Robert_Elli for new tenant D6
Creating new sheet: C2 - Valerie_Frost for new tenant C2
Creating new sheet: B4 - Lauren_Lee for new tenant B4
Creating new sheet: A2 - Lisa_Andrews for new tenant A2
Creating new sheet: G4 - Jason_Reyes for new tenant G4
Creating new sheet: G2 - Jessica_Tran for new tenant G2
Creating new sheet: G5 - Roger_Mejia for new ten

In [8]:
# --- 10. Save PaymentHistory sheet
payment_history_df.to_excel(writer, sheet_name='PaymentHistory', index=False)

# --- 11. Update ProcessedRefs sheet
try:
    refs_df = pd.read_excel(SPREADSHEET_FILE, sheet_name='ProcessedRefs')
except Exception:
    refs_df = pd.DataFrame({'Ref': []})
if new_refs:
    new_refs_df = pd.DataFrame({'Ref': new_refs})
    updated_refs = pd.concat([refs_df, new_refs_df], ignore_index=True)
    updated_refs.to_excel(writer, sheet_name='ProcessedRefs', index=False)
    updates_log.append(f"ProcessedRefs updated with {len(new_refs)} new refs.")

writer.close()

print("\n--- Processing Summary ---")
for log in updates_log:
    print(log)
print("\nUpdates per tenant sheet:")
for k, v in updates_per_sheet.items():
    print(f"{k}: {v} payments appended")


--- Processing Summary ---
Created new sheet: A5 - Misty_Kim
Logged payment for A5 - Ref NEYLBQ84MS
Logged payment for D4 - Ref 6X7SW0AA0C
Created new sheet: C5 - Susan_Cook
Logged payment for C5 - Ref TM9QKBFOT8
Logged payment for C5 - Ref IWH28PEGRQ
Created new sheet: D1 - Michael_Thomas
Logged payment for D1 - Ref WZC00PLXZD
Logged payment for C3 - Ref TME690D3J8
Created new sheet: E3 - Todd_Harris
Logged payment for E3 - Ref CQRTH2ABAE
Created new sheet: C1 - Grace_Huffman
Logged payment for C1 - Ref Z9DU3125QG
Logged payment for G1 - Ref D2JTSCXT25
Created new sheet: C4 - Bobby_Alvarez
Logged payment for C4 - Ref CMBGLCPABB
Logged payment for C1 - Ref HS3RFPZAT6
Logged payment for A5 - Ref 2OL2PNJB04
Logged payment for B3 - Ref LN9L8KDPWH
Logged payment for C4 - Ref 6JQJAW0CEX
Logged payment for E3 - Ref QKI8R6S5FK
Created new sheet: A3 - Bradley_Turner
Logged payment for A3 - Ref WPF5K5G4YC
Logged payment for G6 - Ref 6XK8J48LZV
Created new sheet: B6 - Eric_Pruitt
Logged payment

## PROTOTYPE

In [None]:
import re
import os
import pickle
import time
import schedule
import base64
from googleapiclient.discovery import build
from google.auth.transport.requests import Request
from google_auth_oauthlib.flow import InstalledAppFlow
import gspread
from google.oauth2.service_account import Credentials

# Define API scopes for Gmail and Google Sheets
GMAIL_SCOPES = ['https://www.googleapis.com/auth/gmail.readonly']
GSHEETS_SCOPES = ['https://www.googleapis.com/auth/spreadsheets', 'https://www.googleapis.com/auth/drive']

# ------------------ AUTHENTICATION SECTION ------------------

# Authenticate with Gmail API and return service object
def authenticate_gmail():
    creds = None
    # Check if token.pickle (saved session) exists
    if os.path.exists('token.pickle'):
        with open('token.pickle', 'rb') as token:
            creds = pickle.load(token)
    # If token is invalid or expired, refresh or re-authenticate
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file('credentials.json', GMAIL_SCOPES)
            creds = flow.run_local_server(port=0)
        # Save new token for future use
        with open('token.pickle', 'wb') as token:
            pickle.dump(creds, token)
    return build('gmail', 'v1', credentials=creds)

# ------------------ EMAIL FETCHING & PARSING ------------------

# Fetch recent NCBA transaction emails from Gmail inbox
def fetch_payment_emails(service):
    results = service.users().messages().list(
        userId='me',
        q='from:ncbacustomer@ncbagroup.com subject:"NCBA TRANSACTIONS STATUS UPDATE"',
        maxResults=10
    ).execute()
    messages = results.get('messages', [])
    email_data = []

    for msg in messages:
        msg_content = service.users().messages().get(userId='me', id=msg['id']).execute()
        snippet = msg_content['snippet']  # Get email preview text
        email_data.append(snippet)

    return email_data

# Extract payment details from email snippet using regex
def extract_payment_info(email_body):
    pattern = r'payment of KES ([\d,]+.\d{2}) for account: ([\w#]+) has been received from (.+?) (\d{3}\*\*\*\*\d{3}) on (\d{2}/\d{2}/\d{4} \d{1,2}:\d{2} [APM]{2})\. M-Pesa Ref: ([\w\d]+)'
    match = re.search(pattern, email_body)
    if match:
        amount = float(match.group(1).replace(',', ''))
        account_code = match.group(2)
        payer_name = match.group(3)
        phone = match.group(4)
        payment_date = match.group(5)
        mpesa_ref = match.group(6)
        return {
            'Amount': amount,
            'AccountCode': account_code.split('#')[-1],  # Extract "E5" from "PAYLEMAIYAN #E5"
            'Payer': payer_name,
            'Phone': phone,
            'Date': payment_date,
            'Ref': mpesa_ref
        }
    return None

# ------------------ GOOGLE SHEETS INTERACTION ------------------

# Check if MPESA Ref has already been processed (deduplication)
def is_ref_processed(ref, gc):
    sh = gc.open('2025 RENT TRACKING - Lemaiyan Heights')
    try:
        processed_refs_ws = sh.worksheet('ProcessedRefs')
    except gspread.exceptions.WorksheetNotFound:
        # Create the ProcessedRefs sheet if it doesn't exist
        processed_refs_ws = sh.add_worksheet(title='ProcessedRefs', rows="1000", cols="1")
        processed_refs_ws.append_row(['Ref'])  # Add header row

    refs = processed_refs_ws.col_values(1)
    return ref in refs

# Append new payment row to tenant sheet and log MPESA Ref
def update_google_sheet(payment_data, gc):
    sh = gc.open('2025 RENT TRACKING - Lemaiyan Heights')
    sheet_name = None

    # Match the correct tenant sheet by Account Code (E5, A3, etc.)
    for worksheet in sh.worksheets():
        if payment_data['AccountCode'] in worksheet.title and 'ProcessedRefs' not in worksheet.title:
            sheet_name = worksheet.title
            break

    if not sheet_name:
        print(f"No sheet found for Account Code: {payment_data['AccountCode']}")
        return

    worksheet = sh.worksheet(sheet_name)
    # Structure of the new row to be appended
    new_row = [
        payment_data['Date'],
        payment_data['Amount'],
        payment_data['Ref'],
        payment_data['Payer'],
        payment_data['Phone'],
        'MPESA Payment'
    ]
    # Append the payment row to the tenant's sheet
    worksheet.append_row(new_row)
    print(f"Payment for {payment_data['AccountCode']} logged successfully.")

    # Also log the MPESA Ref to ProcessedRefs to avoid future duplicates
    processed_refs_ws = sh.worksheet('ProcessedRefs')
    processed_refs_ws.append_row([payment_data['Ref']])

# ------------------ BOT ORCHESTRATION ------------------

# Main bot function to execute the workflow
def bot_task():
    print("Running Rent Payment Bot Task...")
    # Authenticate Gmail and Google Sheets
    gmail_service = authenticate_gmail()
    creds = Credentials.from_service_account_file('service_account.json', scopes=GSHEETS_SCOPES)
    gc = gspread.authorize(creds)

    # Fetch and process recent payment emails
    emails = fetch_payment_emails(gmail_service)
    for email_body in emails:
        payment_info = extract_payment_info(email_body)
        if payment_info:
            if not is_ref_processed(payment_info['Ref'], gc):
                update_google_sheet(payment_info, gc)
            else:
                print(f"Duplicate detected: Ref {payment_info['Ref']} already processed.")

# ------------------ SCHEDULER LOOP ------------------

# Schedule bot to run every 5 minutes
schedule.every(5).minutes.do(bot_task)

# Infinite loop to keep bot running
if __name__ == '__main__':
    print("Starting RPA Bot Scheduler...")
    while True:
        schedule.run_pending()
        time.sleep(10)


## DEPLOYMENT CODE

In [None]:
import streamlit as st
from google_auth_oauthlib.flow import Flow
from googleapiclient.discovery import build
import gspread
from google.oauth2.credentials import Credentials
import pandas as pd
import re
import os
import pickle

# --- CONFIGURATION ---
CLIENT_SECRETS_FILE = 'client_secret.json'  # Download this from Google Cloud (OAuth Client ID)
SCOPES = ['https://www.googleapis.com/auth/gmail.readonly',
          'https://www.googleapis.com/auth/spreadsheets',
          'https://www.googleapis.com/auth/drive']

SPREADSHEET_NAME = '2025 RENT TRACKING - Lemaiyan Heights'

# --- SESSION STATE ---
if 'credentials' not in st.session_state:
    st.session_state.credentials = None

# --- AUTHENTICATION FUNCTION ---
def authenticate_user():
    flow = Flow.from_client_secrets_file(
        CLIENT_SECRETS_FILE,
        scopes=SCOPES,
        redirect_uri='http://localhost:8501/'
    )
    auth_url, _ = flow.authorization_url(prompt='consent')

    st.write(f"[Click here to authorize access]({auth_url})")

    code = st.text_input('Paste the full redirect URL after authentication here:')
    if code:
        # Extract the code from URL
        parsed_code = code.split('code=')[1].split('&')[0]
        flow.fetch_token(code=parsed_code)
        creds = flow.credentials
        st.session_state.credentials = creds
        st.success('Authentication successful!')

# --- EMAIL PARSING LOGIC ---
def fetch_payment_emails(creds):
    service = build('gmail', 'v1', credentials=creds)
    results = service.users().messages().list(
        userId='me',
        q='from:ncbacustomer@ncbagroup.com subject:"NCBA TRANSACTIONS STATUS UPDATE"',
        maxResults=10
    ).execute()

    messages = results.get('messages', [])
    email_data = []

    for msg in messages:
        msg_content = service.users().messages().get(userId='me', id=msg['id']).execute()
        snippet = msg_content['snippet']
        email_data.append(snippet)

    return email_data

# --- PAYMENT DATA EXTRACTION ---
def extract_payment_info(email_body):
    pattern = r'payment of KES ([\d,]+.\d{2}) for account: ([\w#]+) has been received from (.+?) (\d{3}\*\*\*\*\d{3}) on (\d{2}/\d{2}/\d{4} \d{1,2}:\d{2} [APM]{2})\. M-Pesa Ref: ([\w\d]+)'
    match = re.search(pattern, email_body)
    if match:
        amount = float(match.group(1).replace(',', ''))
        account_code = match.group(2).split('#')[-1]
        payer_name = match.group(3)
        phone = match.group(4)
        payment_date = match.group(5)
        mpesa_ref = match.group(6)
        return {
            'Amount': amount,
            'AccountCode': account_code,
            'Payer': payer_name,
            'Phone': phone,
            'Date': payment_date,
            'Ref': mpesa_ref
        }
    return None

# --- GOOGLE SHEETS UPDATE FUNCTION ---
def update_google_sheet(creds, payment_data):
    gc = gspread.authorize(creds)
    sh = gc.open(SPREADSHEET_NAME)

    # Check or create 'ProcessedRefs' sheet
    try:
        refs_ws = sh.worksheet('ProcessedRefs')
    except gspread.exceptions.WorksheetNotFound:
        refs_ws = sh.add_worksheet(title='ProcessedRefs', rows="1000", cols="1")
        refs_ws.append_row(['Ref'])

    processed_refs = refs_ws.col_values(1)
    if payment_data['Ref'] in processed_refs:
        st.warning(f"Ref {payment_data['Ref']} already processed. Skipping.")
        return

    # Find Tenant Sheet
    tenant_ws = None
    for ws in sh.worksheets():
        if payment_data['AccountCode'] in ws.title and 'ProcessedRefs' not in ws.title:
            tenant_ws = ws
            break

    if tenant_ws:
        new_row = [
            payment_data['Date'],
            payment_data['Amount'],
            payment_data['Ref'],
            payment_data['Payer'],
            payment_data['Phone'],
            'MPESA Payment'
        ]
        tenant_ws.append_row(new_row)
        refs_ws.append_row([payment_data['Ref']])
        st.success(f"Payment logged for {payment_data['AccountCode']} successfully.")
    else:
        st.error(f"No matching tenant sheet found for {payment_data['AccountCode']}")

# --- MAIN APP LOGIC ---
st.title("🏠 Lemaiyan Heights Rent Automation Bot")

if not st.session_state.credentials:
    st.header("🔑 Authenticate with Google")
    authenticate_user()
else:
    st.success("You're authenticated!")

    if st.button("🚀 Run Payment Bot"):
        st.info("Fetching latest payment emails...")
        emails = fetch_payment_emails(st.session_state.credentials)

        if not emails:
            st.warning("No new payment emails found.")
        else:
            for email_body in emails:
                payment_info = extract_payment_info(email_body)
                if payment_info:
                    update_google_sheet(st.session_state.credentials, payment_info)
                else:
                    st.warning("Could not parse email. Possible format mismatch.")
