<a href="https://colab.research.google.com/github/aaron123908/Module2/blob/main/ReACT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:

import re
import pandas as pd
import random

# -----------------------------
# STAGE 1 — REASON / PLAN
# -----------------------------
print("STAGE 1 — REASON / PLAN")
print("Goal: Generate random contact data and extract valid name-email pairs.")
print("Plan:")
print("1. Randomly generate names and emails.")
print("2. Mix in malformed entries.")
print("3. Extract valid pairs using regex.")
print("4. Remove duplicates and handle edge cases.\n")


# -----------------------------
# STAGE 2 — ACT (Generate Code)
# -----------------------------

def generate_random_contacts(n=8):
    """
    Generate random contact entries with valid and invalid records.
    """
    first_names = ["Liam", "Olivia", "Noah", "Emma", "Ava", "Elijah", "Sophia", "Mason"]
    last_names = ["Johnson", "Williams", "Brown", "Jones", "Garcia", "Miller", "Davis", "Martinez"]

    entries = []

    for _ in range(n):
        first = random.choice(first_names)
        last = random.choice(last_names)
        email = f"{first.lower()}.{last.lower()}@example.com"
        entries.append(f"{first} {last} - {email}")

    # Add malformed entries (edge cases)
    entries.append("Invalid Entry - notanemail")
    entries.append("Random Text Without Email")

    return "\n".join(entries)


def extract_contacts(raw_text):
    """
    Extract name-email pairs from raw text.
    Returns a DataFrame with columns ['Name', 'Email'].
    Handles empty input and malformed entries.
    """

    # Basic validation
    if not raw_text or not isinstance(raw_text, str):
        print("Error: Input must be a non-empty string.")
        return pd.DataFrame(columns=["Name", "Email"])

    try:
        # Regex pattern for name + email pair
        pattern = r"([A-Z][a-z]+ [A-Z][a-z]+).*?([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,})"
        matches = re.findall(pattern, raw_text)

        if not matches:
            print("No valid contacts found.")
            return pd.DataFrame(columns=["Name", "Email"])

        # Create DataFrame
        df = pd.DataFrame(matches, columns=["Name", "Email"])

        # Remove duplicates
        df = df.drop_duplicates().reset_index(drop=True)

        return df

    except Exception as e:
        print("Unexpected error:", e)
        return pd.DataFrame(columns=["Name", "Email"])


print("STAGE 2 — Code generated successfully.\n")


# -----------------------------
# STAGE 3 — RUN
# -----------------------------
print("STAGE 3 — RUN")

random_text = generate_random_contacts(10)

print("\nGenerated Raw Data:\n")
print(random_text)


# -----------------------------
# STAGE 4 — OBSERVE
# -----------------------------
print("\nSTAGE 4 — OBSERVE (Extracted Output)")

df = extract_contacts(random_text)
print(df)


# -----------------------------
# STAGE 5 — FIX CHECK
# -----------------------------
print("\nSTAGE 5 — FIX CHECK")

if df.empty:
    print("No valid data extracted. Check input format.")
else:
    print("Extraction successful. Valid contacts extracted and cleaned.")

STAGE 1 — REASON / PLAN
Goal: Generate random contact data and extract valid name-email pairs.
Plan:
1. Randomly generate names and emails.
2. Mix in malformed entries.
3. Extract valid pairs using regex.
4. Remove duplicates and handle edge cases.

STAGE 2 — Code generated successfully.

STAGE 3 — RUN

Generated Raw Data:

Noah Johnson - noah.johnson@example.com
Sophia Johnson - sophia.johnson@example.com
Noah Miller - noah.miller@example.com
Liam Williams - liam.williams@example.com
Emma Martinez - emma.martinez@example.com
Emma Davis - emma.davis@example.com
Emma Johnson - emma.johnson@example.com
Mason Garcia - mason.garcia@example.com
Elijah Davis - elijah.davis@example.com
Liam Williams - liam.williams@example.com
Invalid Entry - notanemail
Random Text Without Email

STAGE 4 — OBSERVE (Extracted Output)
             Name                       Email
0    Noah Johnson    noah.johnson@example.com
1  Sophia Johnson  sophia.johnson@example.com
2     Noah Miller     noah.miller@example