# LEMAIYAN HEIGHTS RENT AUTOMATION BOT (PROOF OF CONCEPT, PROTOTYPE, DEPLOYMENT)

## PROOF OF CONCEPT

* Random generated dataset to simulate results

In [1]:
import random
from faker import Faker
from pathlib import Path
import string

fake = Faker()

# Define possible account codes (A1-A6, B1-B6, ..., G1-G6)
accounts = [f"{l}{n}" for l in "ABCDEFG" for n in range(1, 7)]

email_template = ("Dear Customer, your payment of KES {amount} for account: PAYLEMAIYAN #{code} "
                  "has been received from {name} {phone} on {date_time}. "
                  "M-Pesa Ref: {mpesa_ref}")
def random_ref_code(length=10):
    return ''.join(random.choices(string.ascii_uppercase + string.digits, k=length))
dummy_emails = []
for _ in range(200):
    amount = f"{random.randint(5, 20) * 1000}.00"
    code = random.choice(accounts)
    name = fake.name()
    phone = f"{random.randint(700, 799)}****{random.randint(100,999)}"
    date_time = fake.date_time_this_year().strftime('%d/%m/%Y %I:%M %p')
    mpesa_ref = random_ref_code(10)
    email_text = email_template.format(
        amount=amount,
        code=code,
        name=name,
        phone=phone,
        date_time=date_time,
        mpesa_ref=mpesa_ref
    )
    dummy_emails.append(email_text)

# Save to data/dummy_emails_200.txt
out_path = Path("data/dummy_emails_200.txt")
out_path.parent.mkdir(parents=True, exist_ok=True)
out_path.write_text("\n\n".join(dummy_emails))

print(f"Created {len(dummy_emails)} dummy emails in {out_path}")
print("Sample email:\n", dummy_emails[0])

Created 200 dummy emails in data\dummy_emails_200.txt
Sample email:
 Dear Customer, your payment of KES 14000.00 for account: PAYLEMAIYAN #C3 has been received from Dawn Davis 774****102 on 05/07/2025 03:11 AM. M-Pesa Ref: EN2S5W1K15


* Logic engine

In [13]:
# --------------------------------------------------------
# Reads MPESA email notifications from dummy_emails.txt,
# Parses payment info, updates dummy_rent_tracker.xlsx,
# Avoids double-logging by checking ProcessedRefs sheet.

import pandas as pd
import re
from pathlib import Path
from openpyxl import load_workbook
import warnings
warnings.filterwarnings('ignore')

# --- CONFIGURATION ---
DATA_DIR = Path('data')
EMAIL_FILE = DATA_DIR / 'dummy_emails_200.txt'
SPREADSHEET_FILE = DATA_DIR / 'dummy_rent_tracker.xlsx'

# --- 1. Load Dummy Emails ---
with open(EMAIL_FILE, 'r') as f:
    email_texts = f.read().split('\n\n')

print(f"Loaded {len(email_texts)} emails.")

Loaded 200 emails.


In [3]:
email_texts[22]

'Dear Customer, your payment of KES 6000.00 for account: PAYLEMAIYAN #D2 has been received from Sean Watson 772****347 on 06/08/2025 12:37 PM. M-Pesa Ref: P3H9WOYU37'

In [4]:
# --- 2. Load Workbook and All Sheet Names ---
wb = load_workbook(SPREADSHEET_FILE)
sheet_names = wb.sheetnames

# --- 3. Load ProcessedRefs (deduplication) ---
try:
    processed_refs_df = pd.read_excel(SPREADSHEET_FILE, sheet_name='ProcessedRefs')
    processed_refs = set(str(ref).strip().upper() for ref in processed_refs_df['Ref'] if pd.notna(ref))
except Exception:
    processed_refs = set()
    print("ProcessedRefs sheet is empty or missing. Will create it.")

print(f"Found {len(processed_refs)} previously processed refs.")

Found 1050 previously processed refs.


In [5]:
# --- 4. Regex Parser Function ---
def extract_payment_info(email_body):
    pattern = (
        r'payment of KES ([\d,]+\.\d{2}) '
        r'for account: PAYLEMAIYAN\s*#?\s*([A-Za-z]\d{1,2})'
        r' has been received from (.+?) '
        r'(.{1,13}) '
        r'on (\d{2}/\d{2}/\d{4} \d{1,2}:\d{2} [APM]{2})\. '
        r'M-Pesa Ref: ([\w\d]+)'
    )
    match = re.search(pattern, email_body, flags=re.IGNORECASE)
    if match:
        return {
            'Amount': float(match.group(1).replace(',', '').strip()),
            'AccountCode': match.group(2).strip().upper(),
            'Payer': match.group(3).strip(),
            'PhoneOrID': match.group(4).strip(),
            'Date': match.group(5).strip(),
            'Ref': match.group(6).strip().upper(),
        }
    return None

In [6]:
# --- 5. Process Emails, Update or Create Sheets ---


updates_log = []
new_refs = []
updates_per_sheet = {}

# We'll use openpyxl to add new sheets if needed
wb = load_workbook(SPREADSHEET_FILE)
writer = pd.ExcelWriter(SPREADSHEET_FILE, engine='openpyxl', mode='a', if_sheet_exists='overlay')

# Loading a Master payments file
try:
    payment_history_df = pd.read_excel(SPREADSHEET_FILE, sheet_name='PaymentHistory')
except Exception:
    payment_history_df = pd.DataFrame(columns=[
        'Date', 'Amount', 'Ref', 'Payer', 'PhoneOrID', 'Payment Mode', 'AccountCode', 'TenantSheet'
    ])


for email in email_texts:
    payment_data = extract_payment_info(email)
    if not payment_data:
        updates_log.append("Skipped email: Could not parse payment info.")
        continue

    ref = payment_data['Ref'].upper().strip()
    if ref in processed_refs:
        updates_log.append(f"Duplicate ignored (Ref {ref})")
        continue

    account_code = payment_data['AccountCode']
    payer_name = payment_data['Payer'].replace(" ", "_")[:15]
    # Try to match an existing tenant sheet
    target_sheet = None
    for s in sheet_names:
        # Take just the code part from the sheet name
        sheet_token = s.split()[0].replace('-', '').upper().strip()
        if account_code == sheet_token and 'PROCESSEDREFS' not in s.upper() and 'PAYMENTHISTORY' not in s.upper():
            target_sheet = s
            break

    # --- 7. If no sheet found, CREATE it ---
    if target_sheet is None:
        target_sheet = f"{account_code} - {payer_name if payer_name else 'AutoAdded'}"
        print(f"Creating new sheet: {target_sheet} for new tenant {account_code}")
        new_tenant_df = pd.DataFrame(columns=[
            'Date', 'Amount', 'Ref', 'Payer', 'PhoneOrID', 'Payment Mode'
        ])
        new_tenant_df.to_excel(writer, sheet_name=target_sheet, index=False)
        updates_log.append(f"Created new sheet: {target_sheet}")
        sheet_names.append(target_sheet)  # So we don't create it twice

    # --- 8. Append payment to tenant sheet ---
    try:
        df = pd.read_excel(SPREADSHEET_FILE, sheet_name=target_sheet)
    except Exception:
        df = pd.DataFrame(columns=['Date', 'Amount', 'Ref', 'Payer', 'PhoneOrID', 'Payment Mode'])

    new_row = pd.DataFrame({
        'Date': [payment_data['Date']],
        'Amount': [payment_data['Amount']],
        'Ref': [payment_data['Ref']],
        'Payer': [payment_data['Payer']],
        'PhoneOrID': [payment_data['PhoneOrID']],
        'Payment Mode': ['MPESA Payment'],
    })
    df = pd.concat([df, new_row], ignore_index=True)
    df.to_excel(writer, sheet_name=target_sheet, index=False)
    updates_log.append(f"Logged payment for {account_code} - Ref {ref}")
    new_refs.append(ref)
    updates_per_sheet.setdefault(target_sheet, 0)
    updates_per_sheet[target_sheet] += 1

     # --- 9. Add to PaymentHistory sheet ---
    new_hist_row = new_row.copy()
    new_hist_row['AccountCode'] = account_code
    new_hist_row['TenantSheet'] = target_sheet
    payment_history_df = pd.concat([payment_history_df, new_hist_row], ignore_index=True)

In [7]:
# --- 10. Save PaymentHistory sheet
payment_history_df.to_excel(writer, sheet_name='PaymentHistory', index=False)

# --- 11. Update ProcessedRefs sheet
try:
    refs_df = pd.read_excel(SPREADSHEET_FILE, sheet_name='ProcessedRefs')
except Exception:
    refs_df = pd.DataFrame({'Ref': []})
if new_refs:
    new_refs_df = pd.DataFrame({'Ref': new_refs})
    updated_refs = pd.concat([refs_df, new_refs_df], ignore_index=True)
    updated_refs.to_excel(writer, sheet_name='ProcessedRefs', index=False)
    updates_log.append(f"ProcessedRefs updated with {len(new_refs)} new refs.")

writer.close()

print("\n--- Processing Summary ---")
for log in updates_log:
    print(log)
print("\nUpdates per tenant sheet:")
for k, v in updates_per_sheet.items():
    print(f"{k}: {v} payments appended")


--- Processing Summary ---
Logged payment for C3 - Ref EN2S5W1K15
Logged payment for E3 - Ref TDPBK94WB3
Logged payment for C3 - Ref 9R1ZXBXIEJ
Logged payment for E1 - Ref 1A2SAZ6ZXY
Logged payment for E4 - Ref MALRGW9Q2J
Logged payment for E6 - Ref OUHOP1L40S
Logged payment for C5 - Ref 6UC0B4NYFU
Logged payment for D2 - Ref WNL2SRV9N6
Logged payment for B1 - Ref 5TI40XT1D4
Logged payment for E1 - Ref 2C3KQZ7RV0
Logged payment for C6 - Ref Q58O0PQH3Y
Logged payment for C1 - Ref V4NRV9PA11
Logged payment for B5 - Ref JBWDP1V1PC
Logged payment for B3 - Ref AT43F9BAA6
Logged payment for A5 - Ref EHAUX3TWV0
Logged payment for A3 - Ref FSYAFA45RE
Logged payment for C2 - Ref X9NNQ4GARU
Logged payment for F3 - Ref LDXV7I787L
Logged payment for F4 - Ref VDYRMLQQZF
Logged payment for A5 - Ref JJAUPU41ZN
Logged payment for B1 - Ref L6IAV493B6
Logged payment for B5 - Ref J7BQ1QBTCE
Logged payment for D2 - Ref P3H9WOYU37
Logged payment for G3 - Ref 09W6DXTO2R
Logged payment for B5 - Ref WL1BPVYS

## PROTOTYPE

* Intergrating the proof of concept to the Google Platform
* First is to create a dummy account on gmail and populate it with dummy emails as above.

In [17]:
# ---------- SEND 200 TEST EMAILS INTO SANDBOX GMAIL ----------
import base64, random, string, time, datetime
from faker import Faker
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from email.mime.text import MIMEText

fake = Faker()
SCOPES = SCOPES = [
    'https://www.googleapis.com/auth/gmail.send',     # to inject test mail
    'https://www.googleapis.com/auth/gmail.readonly',
    'https://www.googleapis.com/auth/gmail.modify' # to call getProfile
]
flow   = InstalledAppFlow.from_client_secrets_file('bot_secret.json', SCOPES)
creds  = flow.run_local_server(port=0)
gmail  = build('gmail', 'v1', credentials=creds)
user_email = gmail.users().getProfile(userId='me').execute()['emailAddress']

accounts = [f"{l}{n}" for l in "ABCDEFG" for n in range(1,7)]
def rand_ref(): return ''.join(random.choices(string.ascii_uppercase+string.digits, k=10))

def make_msg(text):
    m = MIMEText(text)
    m['From'] = 'NCB <ncbcustomer@ncbgroup.com>'
    m['To']   = user_email
    m['Subject'] = 'NCBA TRANSACTIONS STATUS UPDATE'
    return {'raw': base64.urlsafe_b64encode(m.as_bytes()).decode()}

for _ in range(200):
    code  = random.choice(accounts)
    code_fragment = f"#{code}" if random.random()>.4 else code   # hash optional
    amt   = f"{random.randint(5,20)*1000:,}.00"
    name  = fake.name().upper()
    phone = f"{random.randint(700,799)}****{random.randint(100,999)}"
    dt    = fake.date_time_this_year().strftime('%d/%m/%Y %I:%M %p')
    ref   = rand_ref()
    body  = (f"Your M-Pesa payment of KES {amt} for account: PAYLEMAIYAN {code_fragment} "
             f"has been received from {name} {phone} on {dt}. M-Pesa Ref: {ref}. NCBA, Go for it.")
    gmail.users().messages().send(userId='me', body=make_msg(body)).execute()
    time.sleep(0.1)

print("✅ 200 dummy messages delivered to", user_email)

Please visit this URL to authorize this application: https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=899105285450-50tdk35cnnrrich3nlr0d80kdp2qeovr.apps.googleusercontent.com&redirect_uri=http%3A%2F%2Flocalhost%3A50902%2F&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fgmail.send+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fgmail.readonly+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fgmail.modify&state=Ry2FKYRbX92meiokEjcApt49P6zyMh&access_type=offline


HttpError: <HttpError 429 when requesting https://gmail.googleapis.com/gmail/v1/users/me/messages/send?alt=json returned "User-rate limit exceeded.  Retry after 2025-08-07T14:26:16.318Z (Mail sending)". Details: "[{'message': 'User-rate limit exceeded.  Retry after 2025-08-07T14:26:16.318Z (Mail sending)', 'domain': 'global', 'reason': 'rateLimitExceeded'}]">

In [11]:
# ---------- ONE‑TIME MIGRATION EXCEL → GOOGLE SHEETS ----------
import pandas as pd, gspread, openpyxl
from google.oauth2.service_account import Credentials

SRC_EXCEL = 'data/2025 RENT TRACKING - Lemaiyan Heights.xlsx'  # original data file
DEST_SHEET = 'RENT TRACKING-Lemaiyan Heights' # New file in google sheets

creds = Credentials.from_service_account_file('bot_service.json',
    scopes=['https://www.googleapis.com/auth/spreadsheets',
            'https://www.googleapis.com/auth/drive'])
gc = gspread.authorize(creds)
sh = gc.open(DEST_SHEET)

wb = openpyxl.load_workbook(SRC_EXCEL, data_only=True)
for ws in wb.worksheets:
    title = ws.title[:99]  # Sheets title limit
    if title in [s.title for s in sh.worksheets()]:
        sheet = sh.worksheet(title)
    else:
        sheet = sh.add_worksheet(title, rows=2000, cols=20)

    data = [[str(cell) if cell is not None else '' for cell in row] for row in ws.iter_rows(values_only=True)]
    
#   Update the sheets to populate
    sheet.update(values=data, range_name='A1', value_input_option='USER_ENTERED')
    time.sleep(2)  # Wait 2 seconds per write

    # freeze first row and bold headers
    sheet.format('1:1', {'textFormat': {'bold': True}})
    sheet.freeze(rows=1)

print("Bootstrap complete – Google Sheet mirrors the Excel file.")


Bootstrap complete – Google Sheet mirrors the Excel file.


In [None]:
#Parameters

SCOPES = SCOPES = [
    'https://www.googleapis.com/auth/gmail.send',     # to inject test mail
    'https://www.googleapis.com/auth/gmail.readonly',
    'https://www.googleapis.com/auth/gmail.modify' # to call getProfile
]

GMAIL_Q       = 'subject:"NCBA TRANSACTIONS STATUS UPDATE" is:unread'
MAX_PHONE_LEN = 13
PAYMENT_COLS  = ['Date Paid','Amount Paid','REF Number','Payer','Phone','Comments']



# ---- FLEXIBLE EMAIL PARSER (hash optional, case-insensitive code) ----
PATTERN = re.compile(
    rf'payment of KES ([\d,]+\.\d{{2}}) '
    rf'for account: PAYLEMAIYAN\s*#?\s*([A-Za-z]\d{{1,2}})'
    rf' has been received from (.+?) '
    rf'(.{{1,{MAX_PHONE_LEN}}}) '
    rf'on (\d{{2}}/\d{{2}}/\d{{4}} \d{{1,2}}:\d{{2}} [APM]{{2}})\. '
    rf'M-Pesa Ref: ([A-Z0-9]{{10}})',
    flags=re.IGNORECASE
)

def parse_email(text:str):
    m = PATTERN.search(text)
    if not m: return None
    amt, code, payer, phone, dt, ref = m.groups()
    return {
        'Date Paid': dt,
        'Amount Paid': float(amt.replace(',', '')),
        'REF Number': ref.upper(),
        'Payer': payer.strip(),
        'Phone': phone.strip(),
        'Comments': 'MPESA Payment',
        'AccountCode': code.upper()
    }

# ---- LOAD/CREATE PROCESSED REFS ----
try:
    refs_ws = sh.worksheet('ProcessedRefs')
except gspread.WorksheetNotFound:
    refs_ws = sh.add_worksheet('ProcessedRefs', rows=2000, cols=1)
if refs_ws.row_count < 2: refs_ws.append_row(['Refs'])
refs = set(r[0].upper() for r in refs_ws.get_all_values()[1:])  # skip header

# ---- GMAIL: GET UNREAD MPESA PAYMENT EMAILS ----
msgs = gmail.users().messages().list(userId='me', q=GMAIL_Q, maxResults=500).execute().get('messages', [])
print(f"🔎 Scanning {len(msgs)} unread NCBA mails…\n")

payments, new_refs, logs = [], [], []
for msg_meta in msgs:
    msg = gmail.users().messages().get(userId='me', id=msg_meta['id']).execute()
    snippet = msg['snippet']
    data = parse_email(snippet)
    if not data:
        logs.append(f"⚠️  Couldn’t parse: {snippet[:60]}…")
        continue
    if data['REF Number'] in refs:
        logs.append(f"🔁 Duplicate ignored: {data['REF Number']}")
        continue
    payments.append(data)
    new_refs.append([data['REF Number']])
    refs.add(data['REF Number'])
    # Mark as read (to prevent re-processing)
    gmail.users().messages().modify(userId='me', id=msg_meta['id'], body={'removeLabelIds': ['UNREAD']}).execute()
    time.sleep(0.15)  # Slow down to avoid hitting Gmail API limits

print(f"✅ Parsed {len(payments)} new payments.\n")

# ---- SHEET UPDATE LOGIC ----
tenant_logs = {}
for p in payments:
    # Try to match an existing sheet by AccountCode
    tenant_sheet = None
    for ws in sh.worksheets():
        # Only match tenant sheets, not meta sheets
        if ws.title.upper().startswith(p['AccountCode']) and 'PROCESSEDREFS' not in ws.title.upper() and 'PAYMENTHISTORY' not in ws.title.upper():
            tenant_sheet = ws
            break
    # If not found, create a new sheet for the tenant
    if not tenant_sheet:
        title = f"{p['AccountCode']} - {p['Payer']}"
        tenant_sheet = sh.add_worksheet(title, rows=1000, cols=10)
        tenant_sheet.update(values=[PAYMENT_COLS], range_name='A1', value_input_option='USER_ENTERED')
        tenant_sheet.format('1:1', {'textFormat': {'bold': True}})
        tenant_sheet.freeze(rows=1)
        logs.append(f"➕ Created new tenant sheet: {title}")

    # Write payment record to the tenant sheet
    row = [p[col] for col in PAYMENT_COLS]
    tenant_sheet.append_row(row, value_input_option='USER_ENTERED')
    logs.append(f"Logged payment for {p['AccountCode']} - Ref {p['REF Number']}")
    tenant_logs.setdefault(tenant_sheet.title, 0)
    tenant_logs[tenant_sheet.title] += 1
    time.sleep(0.3)  # Avoid Sheets write API limits (429 error)

# ---- UPDATE PAYMENT HISTORY SHEET ----
try:
    hist_ws = sh.worksheet('PaymentHistory')
except gspread.WorksheetNotFound:
    hist_ws = sh.add_worksheet('PaymentHistory', rows=2000, cols=12)
    hist_ws.append_row(PAYMENT_COLS + ['AccountCode', 'TenantSheet', 'Month'])
hist_values = hist_ws.get_all_values()
header = hist_values[0] if hist_values else []
existing_hist_refs = set([r[2].upper() for r in hist_values[1:]])  # 'Ref' is third col





🔎 Scanning 51 unread NCBA mails…

✅ Parsed 50 new payments.



AttributeError: module 'datetime' has no attribute 'strptime'

In [24]:
# Add new payments to PaymentHistory with Month
for p in payments:
    if p['REF Number'] in existing_hist_refs:
        continue
    dt = datetime.datetime.strptime(p['Date Paid'], '%d/%m/%Y %I:%M %p')
    month = dt.strftime('%Y-%m')
    row = [p[c] for c in PAYMENT_COLS] + [p['AccountCode'], tenant_sheet.title, month]
    hist_ws.append_row(row, value_input_option='USER_ENTERED')
    time.sleep(0.3)

# ---- GROUPED PAYMENT HISTORY BY MONTH (display in notebook) ----
hist_df = pd.DataFrame(hist_ws.get_all_values()[1:], columns=hist_ws.get_all_values()[0])
if not hist_df.empty:
    hist_df['Amount Paid'] = hist_df['Amount Paid'].astype(float)
    grouped = hist_df.groupby('Month').agg(
        Payments=('REF Number','count'),
        TotalAmount=('Amount Paid','sum')
    ).reset_index().sort_values('Month')
    display(grouped)
else:
    print("No payment history yet.")

# ---- UPDATE PROCESSEDREFS SHEET ----
if new_refs:
    if refs_ws.row_count < len(refs)+10:
        refs_ws.add_rows(1000)
    refs_ws.append_rows(new_refs, value_input_option='RAW')

# ---- LOG SUMMARY ----
print('\n------ BOT LOG ------')
for log in logs:
    print(log)
print('\nPayments per tenant sheet:')
for t, c in tenant_logs.items():
    print(f'  {t}: {c} payments appended')
print('\nGrouped monthly payment summary (also see table above).')


Unnamed: 0,Month,Payments,TotalAmount
0,2025-01,18,288000.0
1,2025-02,8,76000.0
2,2025-03,20,194000.0
3,2025-04,8,104000.0
4,2025-05,10,170000.0
5,2025-06,12,164000.0
6,2025-07,22,262000.0
7,2025-08,2,36000.0



------ BOT LOG ------
⚠️  Couldn’t parse: You have reached a limit for sending mail. Your message was …
Logged payment for G5 - Ref FI1SREN6RH
Logged payment for E1 - Ref G04JAFLSZ1
Logged payment for A4 - Ref IFOKQINW89
Logged payment for E4 - Ref J5RZ6PXW23
Logged payment for E6 - Ref 1RHHVRF43D
Logged payment for B2 - Ref 0Q4D65D9MD
Logged payment for D5 - Ref IG3QRTWVOJ
Logged payment for A5 - Ref MB53F78HKM
Logged payment for D1 - Ref 7R0G7XHQEF
Logged payment for E6 - Ref QD9XW1AMW5
Logged payment for C1 - Ref FJD3Q20AQP
Logged payment for B6 - Ref SLU7SOUXNO
Logged payment for D5 - Ref FNLMEFOX0U
Logged payment for E3 - Ref LCQO8EQZJT
Logged payment for B2 - Ref 8BPHMYRGUI
Logged payment for F3 - Ref 1GBKASXJC3
Logged payment for F2 - Ref 1BGQ2NZVBS
Logged payment for A4 - Ref 26DQI7D352
Logged payment for C5 - Ref Q5RCOERYMK
➕ Created new tenant sheet: G6 - MADELINE RICHARDSON
Logged payment for G6 - Ref 5NUZ2QCONE
Logged payment for B3 - Ref 6ANKA93LR2
Logged payment for C6 -

## DEPLOYMENT CODE

In [None]:
import streamlit as st
from google_auth_oauthlib.flow import Flow
from googleapiclient.discovery import build
import gspread
from google.oauth2.credentials import Credentials
import pandas as pd
import re
import os
import pickle

# --- CONFIGURATION ---
CLIENT_SECRETS_FILE = 'client_secret.json'  # Download this from Google Cloud (OAuth Client ID)
SCOPES = ['https://www.googleapis.com/auth/gmail.readonly',
          'https://www.googleapis.com/auth/spreadsheets',
          'https://www.googleapis.com/auth/drive']

SPREADSHEET_NAME = '2025 RENT TRACKING - Lemaiyan Heights'

# --- SESSION STATE ---
if 'credentials' not in st.session_state:
    st.session_state.credentials = None

# --- AUTHENTICATION FUNCTION ---
def authenticate_user():
    flow = Flow.from_client_secrets_file(
        CLIENT_SECRETS_FILE,
        scopes=SCOPES,
        redirect_uri='http://localhost:8501/'
    )
    auth_url, _ = flow.authorization_url(prompt='consent')

    st.write(f"[Click here to authorize access]({auth_url})")

    code = st.text_input('Paste the full redirect URL after authentication here:')
    if code:
        # Extract the code from URL
        parsed_code = code.split('code=')[1].split('&')[0]
        flow.fetch_token(code=parsed_code)
        creds = flow.credentials
        st.session_state.credentials = creds
        st.success('Authentication successful!')

# --- EMAIL PARSING LOGIC ---
def fetch_payment_emails(creds):
    service = build('gmail', 'v1', credentials=creds)
    results = service.users().messages().list(
        userId='me',
        q='from:ncbacustomer@ncbagroup.com subject:"NCBA TRANSACTIONS STATUS UPDATE"',
        maxResults=10
    ).execute()

    messages = results.get('messages', [])
    email_data = []

    for msg in messages:
        msg_content = service.users().messages().get(userId='me', id=msg['id']).execute()
        snippet = msg_content['snippet']
        email_data.append(snippet)

    return email_data

# --- PAYMENT DATA EXTRACTION ---
def extract_payment_info(email_body):
    pattern = r'payment of KES ([\d,]+.\d{2}) for account: ([\w#]+) has been received from (.+?) (\d{3}\*\*\*\*\d{3}) on (\d{2}/\d{2}/\d{4} \d{1,2}:\d{2} [APM]{2})\. M-Pesa Ref: ([\w\d]+)'
    match = re.search(pattern, email_body)
    if match:
        amount = float(match.group(1).replace(',', ''))
        account_code = match.group(2).split('#')[-1]
        payer_name = match.group(3)
        phone = match.group(4)
        payment_date = match.group(5)
        mpesa_ref = match.group(6)
        return {
            'Amount': amount,
            'AccountCode': account_code,
            'Payer': payer_name,
            'Phone': phone,
            'Date': payment_date,
            'Ref': mpesa_ref
        }
    return None

# --- GOOGLE SHEETS UPDATE FUNCTION ---
def update_google_sheet(creds, payment_data):
    gc = gspread.authorize(creds)
    sh = gc.open(SPREADSHEET_NAME)

    # Check or create 'ProcessedRefs' sheet
    try:
        refs_ws = sh.worksheet('ProcessedRefs')
    except gspread.exceptions.WorksheetNotFound:
        refs_ws = sh.add_worksheet(title='ProcessedRefs', rows="1000", cols="1")
        refs_ws.append_row(['Ref'])

    processed_refs = refs_ws.col_values(1)
    if payment_data['Ref'] in processed_refs:
        st.warning(f"Ref {payment_data['Ref']} already processed. Skipping.")
        return

    # Find Tenant Sheet
    tenant_ws = None
    for ws in sh.worksheets():
        if payment_data['AccountCode'] in ws.title and 'ProcessedRefs' not in ws.title:
            tenant_ws = ws
            break

    if tenant_ws:
        new_row = [
            payment_data['Date'],
            payment_data['Amount'],
            payment_data['Ref'],
            payment_data['Payer'],
            payment_data['Phone'],
            'MPESA Payment'
        ]
        tenant_ws.append_row(new_row)
        refs_ws.append_row([payment_data['Ref']])
        st.success(f"Payment logged for {payment_data['AccountCode']} successfully.")
    else:
        st.error(f"No matching tenant sheet found for {payment_data['AccountCode']}")

# --- MAIN APP LOGIC ---
st.title("🏠 Lemaiyan Heights Rent Automation Bot")

if not st.session_state.credentials:
    st.header("🔑 Authenticate with Google")
    authenticate_user()
else:
    st.success("You're authenticated!")

    if st.button("🚀 Run Payment Bot"):
        st.info("Fetching latest payment emails...")
        emails = fetch_payment_emails(st.session_state.credentials)

        if not emails:
            st.warning("No new payment emails found.")
        else:
            for email_body in emails:
                payment_info = extract_payment_info(email_body)
                if payment_info:
                    update_google_sheet(st.session_state.credentials, payment_info)
                else:
                    st.warning("Could not parse email. Possible format mismatch.")
