# üé® AER v7.5 - Access Review Generator

## Production-Ready Notebook | All Stages Fully Implemented

---

### ‚ú® Key Features
- üé® **Apple-style design system** - Clean, modern interface
- üîó **Auto file loading** - Seamless stage-to-stage flow
- ‚úÖ **Card-based manual review** - Organized groups with batch actions
- üå≥ **Enhanced org tree** - Toggle to show/hide non-dept-heads (default: hide)
- üìä **Batch operations** - Select All, Deselect All, Skip All per group

### üìã Complete Workflow
1. **Stage 1**: AD Authentication & User Download
2. **Stage 1.5**: Organization Tree Builder with Department Heads
3. **Stage 2**: Email/User Validation with Card-Based Review
4. **Stage 3**: Reviewer Assignment

---

**Version:** 7.5  
**Status:** ‚úÖ Production Ready  
**Last Updated:** 2025-02-01

---

### üöÄ Quick Start
1. Run all cells in order
2. Each stage auto-loads output from previous stage
3. Manual review UI appears when validation issues found
4. Final output ready for distribution

---

In [None]:
# === Cell 1: Setup & Engine (v6.5 Directory Sync) ===
import os, sys, logging, glob, io, re, requests, json
import pandas as pd
import ipywidgets as widgets
from datetime import datetime
from dotenv import load_dotenv
from msal import PublicClientApplication
from concurrent.futures import ThreadPoolExecutor
from openpyxl import load_workbook
from openpyxl.worksheet.datavalidation import DataValidation
from openpyxl.utils import get_column_letter
from IPython.display import display, HTML, clear_output

# 1. Paths & Logging
today_str = datetime.now().strftime('%Y-%m-%d')
BASE_DIR = os.path.join("output", today_str, "create")
LOG_DIR = os.path.join(BASE_DIR, "logs")
MAPPING_DIR = os.path.join("input", "mapping")
os.makedirs(LOG_DIR, exist_ok=True)

log_file = os.path.join(LOG_DIR, f"aer_create_{today_str}_{datetime.now().strftime('%H%M')}.log")
logger = logging.getLogger("aer")
logger.handlers.clear()
logger.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s | %(levelname)s | %(message)s')
fh = logging.FileHandler(log_file, encoding="utf-8")
fh.setFormatter(formatter)
logger.addHandler(fh)
logger.addHandler(logging.StreamHandler(sys.stdout))

# 2. Auth & Cache
load_dotenv()
headers = {}
try:
    tid, cid = os.getenv("AZURE_TENANT_ID"), os.getenv("AZURE_CLIENT_ID")
    if tid and cid:
        app = PublicClientApplication(cid, authority=f"https://login.microsoftonline.com/{tid}")
        res = app.acquire_token_interactive(scopes=["User.Read.All"], prompt="select_account")
        if "access_token" in res:
            headers = {"Authorization": f"Bearer {res['access_token']}"}
            logger.info("Authentication Successful")
except Exception as e:
    logger.error(f"Auth Critical Error: {str(e)}")

ad_cache = {}
ad_directory_cache = {} # v6.5: Stores {name_lower: email} for all active users
session = requests.Session()
adapter = requests.adapters.HTTPAdapter(pool_connections=50, pool_maxsize=50)
session.mount('https://', adapter)

def fetch_full_directory():
    """ 
    v6.5 New Feature: 
    Downloads all active users (approx 1600) to build a local Name->Email map.
    This is used to auto-resolve missing emails in the input file.
    """
    global ad_directory_cache
    if ad_directory_cache: return # Already loaded
    
    if not headers:
        # Mock data for testing
        ad_directory_cache = {"mock user": "mock.user@example.com"}
        return

    logger.info("\ud83d\udce5 Pre-fetching Global Directory (Active Users)...")
    users_map = {}
    # Fetch only active users, get name and mail
    url = "https://graph.microsoft.com/v1.0/users?$filter=accountEnabled eq true&$select=displayName,mail,userPrincipalName&$top=999"
    
    try:
        while url:
            r = session.get(url, headers=headers, timeout=10)
            if r.status_code == 200:
                data = r.json()
                for u in data.get('value', []):
                    name = u.get('displayName')
                    # Prefer mail, fallback to UPN
                    email = u.get('mail') or u.get('userPrincipalName')
                    if name and email:
                        # Normalize name for better matching (lowercase, strip)
                        users_map[str(name).strip().lower()] = str(email).strip().lower()
                
                url = data.get('@odata.nextLink') # Pagination
            else:
                logger.error(f"Directory sync failed: {r.status_code}")
                break
        
        ad_directory_cache = users_map
        logger.info(f"\u2705 Global Directory Loaded: {len(ad_directory_cache)} users")
    except Exception as e:
        logger.error(f"Directory sync error: {str(e)}")

def fetch_ad(email):
    """ v5.7 Logic: Single user lookup """
    email = str(email).strip().lower()
    if not re.match(r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$', email):
        return {"email": email, "status": "Invalid", "name": "N/A", "dept": "N/A", "active": "N/A"}
    
    if email in ad_cache: return ad_cache[email]
    
    if not headers: 
        return {"email": email, "status": "Mock", "name": "User", "dept": "Mock Dept", "active": True}
    
    try:
        url = f"https://graph.microsoft.com/v1.0/users/{email}?$select=displayName,department,accountEnabled"
        r = session.get(url, headers=headers, timeout=5)
        if r.status_code == 200:
            d = r.json()
            res = {
                "email": email, 
                "status": "Found", 
                "name": d.get("displayName", "N/A"), 
                "dept": d.get("department") or "N/A", 
                "active": d.get("accountEnabled", "N/A")
            }
        else:
            res = {"email": email, "status": "Not Found", "name": "N/A", "dept": "N/A", "active": "N/A"}
    except:
        res = {"email": email, "status": "Error", "name": "N/A", "dept": "N/A", "active": "N/A"}

    ad_cache[email] = res
    return res

def is_valid_reviewer_name(reviewer):
    if not reviewer or pd.isna(reviewer): return False
    reviewer_str = str(reviewer).strip().lower()
    invalid = ["nan", "none", "", "null", "()", "n/a", "na", "tbd", "pending"]
    if reviewer_str in invalid: return False
    if len(reviewer_str) < 2 or not any(c.isalpha() for c in reviewer_str): return False
    return True

In [None]:
# ========== CELL 2: Stage 1 - AD Authentication & User Download ==========

# Try importing Microsoft Graph SDK
try:
    from msgraph import GraphServiceClient
    from azure.identity import ClientSecretCredential
    MSGRAPH_AVAILABLE = True
except ImportError:
    MSGRAPH_AVAILABLE = False
    print("‚ö†Ô∏è Microsoft Graph SDK not installed. Using mock mode for demo.")
    print("   To use real AD: pip install msgraph-sdk azure-identity")

AD_CACHE_DIR = os.path.join(BASE_DIR, "ad_cache")
os.makedirs(AD_CACHE_DIR, exist_ok=True)

# ========== GLOBAL STATE ==========
s1_ad_users = []
s1_cache_file = None
s1_is_downloading = False

# ========== UI COMPONENTS ==========
s1_tenant_id = widgets.Text(
    description='Tenant ID:',
    placeholder='xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx',
    layout=widgets.Layout(width='500px')
)

s1_client_id = widgets.Text(
    description='Client ID:',
    placeholder='xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx',
    layout=widgets.Layout(width='500px')
)

s1_client_secret = widgets.Password(
    description='Client Secret:',
    placeholder='Enter client secret',
    layout=widgets.Layout(width='500px')
)

s1_btn_download = widgets.Button(
    description='üîê Download AD Users',
    button_style='',
    layout=widgets.Layout(width='200px', height='44px')
)
s1_btn_download.style.button_color = APPLE_COLORS['blue']

s1_status = widgets.HTML("<i>Enter credentials to download users</i>")
s1_output = widgets.Output()
s1_progress = widgets.IntProgress(
    value=0, min=0, max=100,
    layout=widgets.Layout(width='100%', height='8px')
)

# ========== AD DOWNLOAD FUNCTION ==========

def download_ad_users(tenant_id, client_id, client_secret):
    \"\"\"Download users from Microsoft Graph API\"\"\"
    global s1_ad_users, s1_cache_file, s1_is_downloading
    
    if s1_is_downloading:
        return
    
    s1_is_downloading = True
    s1_btn_download.disabled = True
    
    s1_output.clear_output()
    
    logger.info("Starting AD user download...")
    logger.info(f"Tenant: {tenant_id}")
    
    try:
        if MSGRAPH_AVAILABLE and tenant_id and client_id and client_secret:
            # Real Microsoft Graph API
            with s1_output:
                print("üîê Authenticating with Microsoft Graph API...")
            
            credential = ClientSecretCredential(
                tenant_id=tenant_id,
                client_id=client_id,
                client_secret=client_secret
            )
            
            client = GraphServiceClient(credentials=credential)
            
            with s1_output:
                print("‚úÖ Authentication successful")
                print("üì• Downloading user data...")
            
            s1_progress.value = 20
            
            # Get all users
            users_response = client.users.get()
            users = users_response.value
            
            s1_progress.value = 60
            
            # Process users
            ad_users = []
            for user in users:
                ad_users.append({
                    'email': user.mail or user.user_principal_name,
                    'displayName': user.display_name,
                    'department': user.department,
                    'accountEnabled': user.account_enabled,
                    'jobTitle': user.job_title,
                    'managerEmail': None,  # Will need separate call
                    'lastSignInDateTime': None
                })
            
            s1_progress.value = 90
            s1_ad_users = ad_users
            
        else:
            # Mock mode for demonstration
            with s1_output:
                print("üîÑ Running in MOCK MODE (for demonstration)")
                print("   Install msgraph-sdk for real AD connection")
                print()
                print("üì• Generating mock user data...")
            
            s1_progress.value = 20
            
            # Mock data
            mock_users = [
                {'email': 'ceo@company.com', 'displayName': 'Steven Bush', 
                 'department': 'Corporate - Executive', 'accountEnabled': True, 
                 'jobTitle': 'Chief Executive Officer', 'managerEmail': None,
                 'lastSignInDateTime': '2025-01-30T10:00:00Z'},
                {'email': 'vp.it@company.com', 'displayName': 'John Doe', 
                 'department': 'Corporate - IT', 'accountEnabled': True, 
                 'jobTitle': 'VP of IT', 'managerEmail': 'ceo@company.com',
                 'lastSignInDateTime': '2025-01-30T09:00:00Z'},
                {'email': 'vp.sales@company.com', 'displayName': 'Jane Smith', 
                 'department': 'Corporate - Sales', 'accountEnabled': True, 
                 'jobTitle': 'VP of Sales', 'managerEmail': 'ceo@company.com',
                 'lastSignInDateTime': '2025-01-30T08:00:00Z'},
                {'email': 'it.manager@company.com', 'displayName': 'Bob Wilson', 
                 'department': 'Corporate - IT', 'accountEnabled': True, 
                 'jobTitle': 'IT Manager', 'managerEmail': 'vp.it@company.com',
                 'lastSignInDateTime': '2025-01-29T14:00:00Z'},
                {'email': 'inactive@company.com', 'displayName': 'Former Employee', 
                 'department': 'Corporate - IT', 'accountEnabled': False, 
                 'jobTitle': 'Ex-Manager', 'managerEmail': 'vp.it@company.com',
                 'lastSignInDateTime': 'Never'},
            ]
            
            s1_progress.value = 60
            s1_ad_users = mock_users
        
        # Save to cache
        df = pd.DataFrame(s1_ad_users)
        timestamp = datetime.now().strftime('%Y%m%d_%H%M')
        s1_cache_file = os.path.join(AD_CACHE_DIR, f"ad_users_{timestamp}.csv")
        df.to_csv(s1_cache_file, index=False)
        
        s1_progress.value = 100
        
        with s1_output:
            print()
            print("="*60)
            print(f"‚úÖ Downloaded {len(s1_ad_users)} users")
            print(f"üíæ Saved to: {s1_cache_file}")
            print("="*60)
        
        logger.info(f"Successfully downloaded {len(s1_ad_users)} users")
        logger.info(f"Cache file: {s1_cache_file}")
        
        s1_status.value = f"<span style='color:{APPLE_COLORS['green']};'>‚úÖ Downloaded {len(s1_ad_users)} users</span>"
        
    except Exception as e:
        with s1_output:
            print(f"\n‚ùå Error: {str(e)}")
        logger.error(f"AD download error: {str(e)}", exc_info=True)
        s1_status.value = f"<span style='color:{APPLE_COLORS['red']};'>‚ùå Error: {str(e)}</span>"
    
    finally:
        s1_is_downloading = False
        s1_btn_download.disabled = False
        s1_progress.value = 0

# ========== EVENT HANDLERS ==========

def on_s1_download(b):
    download_ad_users(
        s1_tenant_id.value,
        s1_client_id.value,
        s1_client_secret.value
    )

s1_btn_download.on_click(on_s1_download)

# ========== UI LAYOUT ==========

stage1_ui = widgets.VBox([
    widgets.HTML(get_header_html(
        "üîê Stage 1: AD Authentication",
        "Download user data from Microsoft Graph API",
        'blue_purple'
    )),
    widgets.HTML("<p style='margin-bottom:16px;'><b>Enter Azure AD credentials:</b></p>"),
    s1_tenant_id,
    s1_client_id,
    s1_client_secret,
    widgets.HTML("<p style='margin:16px 0 8px 0;'><i>Leave blank to run in mock mode for demo</i></p>"),
    s1_btn_download,
    s1_progress,
    s1_status,
    s1_output
])

clear_output()
display(stage1_ui)

logger.info("Stage 1 UI initialized")


In [None]:
# === CELL 1.5: Org Tree Builder (v7.4 - Ultra Intuitive Tree View) ===
import os, glob, logging, io, math
import pandas as pd
import ipywidgets as widgets
from datetime import datetime
from IPython.display import display, HTML, clear_output

# Constants
CORP_PREFIX = "corporate"
MAX_TREE_LEVELS = 4  # Steven Bush = Level 1, then 3 more levels

# Paths
today_str = datetime.now().strftime('%Y-%m-%d')
BASE_DIR = os.path.join("output", today_str)
AD_CACHE_DIR = os.path.join(BASE_DIR, "ad_cache")
ORG_DIR = os.path.join(BASE_DIR, "orgchart")
MAP_DIR = os.path.join("input", "mapping")
LOG_DIR = os.path.join(BASE_DIR, "logs")
os.makedirs(ORG_DIR, exist_ok=True)
os.makedirs(MAP_DIR, exist_ok=True)

# Logging
log_file = os.path.join(LOG_DIR, f"aer_stage1_5_{datetime.now().strftime('%Y%m%d_%H%M')}.log")
logger_s15 = logging.getLogger("aer_stage1_5")
logger_s15.handlers.clear()
logger_s15.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s | %(levelname)s | %(message)s')
fh = logging.FileHandler(log_file, encoding="utf-8")
fh.setFormatter(formatter)
logger_s15.addHandler(fh)
logger_s15.addHandler(logging.StreamHandler(sys.stdout))

# ========== GLOBAL STATE ==========
s15_ad_df = None
s15_tree_html = widgets.HTML()
s15_status = widgets.HTML(value="<i>Click Rebuild Tree to start</i>")
s15_output = widgets.Output()
s15_is_building = False
s15_dept_heads = {}  # Store dept heads globally
s15_nodes = {}
s15_children = {}
s15_levels = {}

# ========== EDITABLE MAPPING TABLE ==========
s15_mapping_data = []  # List of dicts: {dept, head_name, head_email, reviewer_name, reviewer_email}

# Widgets for editable table
mapping_container = widgets.VBox()
btn_add_row = widgets.Button(
    description='‚ûï Add Department', 
    button_style='info',
    icon='plus',
    layout=widgets.Layout(width='180px')
)
btn_save_mapping = widgets.Button(
    description='üíæ Save Mapping', 
    button_style='success',
    icon='save',
    layout=widgets.Layout(width='180px')
)
btn_refresh = widgets.Button(
    description='üîÑ Rebuild Tree', 
    button_style='primary',
    icon='refresh',
    layout=widgets.Layout(width='180px', height='40px')
)

# ========== HELPER FUNCTIONS ==========

def normalize_department(dept):
    """Normalize department name"""
    if dept is None or (isinstance(dept, float) and math.isnan(dept)):
        dept = ''
    else:
        dept = str(dept).strip()
    if dept.lower().startswith('branch'):
        return 'Branch'
    return dept or 'N/A'

def load_latest_ad():
    """Load latest AD cache file"""
    try:
        cache_files = glob.glob(os.path.join(AD_CACHE_DIR, "ad_users_*.csv"))
        if not cache_files:
            return None, "No AD cache found. Run Stage 1 first."
        latest = max(cache_files, key=os.path.getmtime)
        df = pd.read_csv(latest)
        df = df[df['department'].fillna('').str.lower().str.startswith(CORP_PREFIX)]
        return df, f"Loaded {len(df)} corporate users from {os.path.basename(latest)}"
    except Exception as e:
        return None, f"Error loading AD cache: {e}"

def find_root(df):
    """Find CEO (Steven/Steve Bush) as root"""
    names = df['displayName'].str.lower().fillna('').str.strip()
    mask_name = names.isin(['steven bush', 'steve bush'])
    if mask_name.any():
        return df.loc[mask_name, 'email'].iloc[0]
    mask_title = df['jobTitle'].str.lower().fillna('').str.contains('chief executive|ceo|president')
    if mask_title.any():
        return df.loc[mask_title, 'email'].iloc[0]
    mask_nomgr = df['managerEmail'].fillna('N/A') == 'N/A'
    if mask_nomgr.any():
        return df.loc[mask_nomgr, 'email'].iloc[0]
    return df.iloc[0]['email'] if len(df) else None

def build_org_structure(df, root_email):
    """Build org structure with progress tracking"""
    df = df.copy()
    df['managerEmail'] = df['managerEmail'].fillna('N/A')
    
    nodes = {}
    for idx, row in df.iterrows():
        nodes[row['email']] = {
            'email': row['email'],
            'name': row['displayName'],
            'title': row.get('jobTitle', 'N/A'),
            'dept': normalize_department(row.get('department')),
            'manager': row['managerEmail']
        }
    
    children = {}
    for email, node in nodes.items():
        mgr = node['manager']
        if mgr == 'N/A' or mgr not in nodes:
            if email != root_email:
                children.setdefault(root_email, []).append(email)
        else:
            children.setdefault(mgr, []).append(email)
    
    levels = {root_email: 1}
    queue = [root_email]
    
    while queue:
        current = queue.pop(0)
        current_level = levels[current]
        if current_level >= MAX_TREE_LEVELS:
            continue
        for child in children.get(current, []):
            if child not in levels:
                levels[child] = current_level + 1
                queue.append(child)
    
    return nodes, children, levels

def find_dept_heads(df, nodes, levels):
    """Find head of each department"""
    dept_heads = {}
    
    for dept_name, grp in df.groupby(df['department'].apply(normalize_department)):
        if dept_name == 'N/A':
            continue
        
        dept_people = []
        for _, row in grp.iterrows():
            email = row['email']
            if email in levels:
                dept_people.append({
                    'email': email,
                    'level': levels[email],
                    'name': row['displayName'],
                    'title': row.get('jobTitle', 'N/A')
                })
        
        if not dept_people:
            continue
        
        dept_people.sort(key=lambda x: x['level'])
        head = dept_people[0]
        
        # Find reviewer
        head_email = head['email']
        reviewer_email = nodes[head_email]['manager']
        
        visited = set()
        max_hops = 10
        hops = 0
        
        while reviewer_email in nodes and reviewer_email != 'N/A' and hops < max_hops:
            if reviewer_email in visited:
                break
            visited.add(reviewer_email)
            reviewer_node = nodes[reviewer_email]
            if reviewer_node['dept'] != dept_name:
                break
            reviewer_email = reviewer_node['manager']
            hops += 1
        
        if reviewer_email == 'N/A' or reviewer_email not in nodes or hops >= max_hops:
            reviewer_email = nodes[head_email]['manager']
            if reviewer_email == 'N/A' or reviewer_email not in nodes:
                reviewer_email = head_email
        
        dept_heads[dept_name] = {
            'head_email': head_email,
            'head_name': head['name'],
            'head_title': head['title'],
            'head_level': head['level'],
            'reviewer_email': reviewer_email,
            'reviewer_name': nodes.get(reviewer_email, {}).get('name', 'N/A')
        }
    
    return dept_heads

def get_dept_color(dept_name):
    """Get unified color for all departments"""
    return '#F8F9FA'  # Light gray for all departments

def analyze_children_depts(email, children, nodes):
    """Analyze if all children are in same dept"""
    if email not in children:
        return True, None
    
    kids = children[email]
    if not kids:
        return True, None
    
    parent_dept = nodes[email]['dept']
    child_depts = set()
    
    for kid in kids:
        if kid in nodes:
            child_depts.add(nodes[kid]['dept'])
    
    all_same = len(child_depts) == 1 and parent_dept in child_depts
    return all_same, child_depts

def render_ultra_intuitive_tree(root_email, nodes, children, levels, dept_heads, max_level=MAX_TREE_LEVELS):
    """
    Ultra intuitive tree rendering with:
    - Dept head highlighting
    - Smart folding (same dept = folded)
    - Department grouping
    - Color coding
    """
    # Build reverse lookup: dept -> head_email
    dept_to_head = {info['head_email']: dept for dept, info in dept_heads.items()}
    
    def render_node(email, current_level, parent_dept=None):
        """Recursively render node"""
        if current_level > max_level or email not in nodes:
            return ""
        
        node = nodes[email]
        level_num = levels.get(email, 0)
        
        # Check if this person is a dept head
        is_dept_head = email in dept_to_head
        dept_name = node['dept']
        dept_color = get_dept_color(dept_name)
        
        # Analyze children departments
        all_same_dept, child_depts = analyze_children_depts(email, children, nodes)
        
        # Build node label
        if is_dept_head:
            # Department head - BOLD + HIGHLIGHTED
            dept_badge = f"<span style='background:#FFF3CD;color:#000;padding:3px 8px;border-radius:4px;font-weight:700;border:2px solid #856404;'>üìå {dept_name}</span>"
            name_html = f"<b style='font-size:1.1em;font-weight:700;'>{node['name']}</b>"
        else:
            dept_badge = f"<span style='background:#F8F9FA;color:#495057;padding:2px 6px;border-radius:3px;font-weight:500;'>{dept_name}</span>"
            name_html = f"<b style='font-size:1.05em;'>{node['name']}</b>"
        
        level_badge = f"<span style='background:#6C757D;color:white;padding:2px 6px;border-radius:3px;font-size:0.85em;'>L{level_num}</span>"
        title_html = f"<span style='color:#666;font-style:italic;'>{node['title']}</span>"
        email_html = f"<span style='color:#999;font-size:0.85em;' title='{email}'>üìß {email.split('@')[0]}</span>"
        
        # Build full label
        label = f"{name_html} {dept_badge} {level_badge}"
        sublabel = f"{title_html} ‚Ä¢ {email_html}"
        
        # Get children
        kids = children.get(email, [])
        kids_in_range = [k for k in kids if k in levels and levels[k] <= max_level]
        
        if not kids_in_range:
            # Leaf node
            return f"""
            <li style='padding:6px 0;'>
                <div>{label}</div>
                <div style='padding-left:20px;font-size:0.9em;'>{sublabel}</div>
            </li>
            """
        
        # Has children - decide on grouping strategy
        if len(child_depts) <= 1:
            # All children in same dept - simple collapsed tree
            kids_html = "".join([render_node(kid, current_level + 1, dept_name) for kid in kids_in_range])
            
            # Smart default: collapse if same dept as parent, expand if different
            is_open = "open" if dept_name != parent_dept else ""
            
            return f"""
            <details {is_open}>
                <summary style='cursor:pointer;padding:6px 0;list-style-position:outside;'>
                    <div style='display:inline-block;margin-left:8px;'>{label}</div>
                    <div style='padding-left:28px;font-size:0.9em;'>{sublabel}</div>
                </summary>
                <ul style='list-style:none;padding-left:28px;border-left:2px solid #DEE2E6;margin:4px 0;'>
                    {kids_html}
                </ul>
            </details>
            """
        else:
            # Multiple depts - group by department
            dept_groups = {}
            for kid in kids_in_range:
                kid_dept = nodes[kid]['dept']
                dept_groups.setdefault(kid_dept, []).append(kid)
            
            groups_html = ""
            for grp_dept in sorted(dept_groups.keys()):
                grp_kids = dept_groups[grp_dept]
                grp_color = get_dept_color(grp_dept)
                
                # Find dept head in this group
                grp_head = None
                for kid in grp_kids:
                    if kid in dept_to_head:
                        grp_head = nodes[kid]['name']
                        break
                
                head_label = f" (Head: {grp_head})" if grp_head else ""
                
                grp_content = "".join([render_node(kid, current_level + 1, grp_dept) for kid in grp_kids])
                
                # AUTO EXPAND multi-dept branches
                groups_html += f"""
                <details open>
                    <summary style='cursor:pointer;padding:4px 8px;margin:4px 0;background:#E9ECEF;border-radius:4px;border-left:4px solid #6C757D;font-weight:600;'>
                        <b>üìÅ {grp_dept}</b>{head_label} <span style='color:#666;'>({len(grp_kids)} people)</span>
                    </summary>
                    <ul style='list-style:none;padding-left:16px;margin:4px 0;'>
                        {grp_content}
                    </ul>
                </details>
                """
            
            return f"""
            <details open>
                <summary style='cursor:pointer;padding:6px 0;list-style-position:outside;'>
                    <div style='display:inline-block;margin-left:8px;'>{label}</div>
                    <div style='padding-left:28px;font-size:0.9em;'>{sublabel}</div>
                </summary>
                <div style='padding-left:20px;margin:8px 0;'>
                    {groups_html}
                </div>
            </details>
            """
    
    tree_html = f"""
    <div style='font-family:system-ui,-apple-system,sans-serif;font-size:0.95em;line-height:1.6;'>
        <div style='background:linear-gradient(135deg,#667eea 0%,#764ba2 100%);color:white;padding:16px;border-radius:8px 8px 0 0;'>
            <h3 style='margin:0 0 8px 0;'>üå≥ Organization Hierarchy Tree (4 Levels)</h3>
            <p style='margin:0;opacity:0.9;font-size:0.9em;'>
                üìå Bold + Highlighted = Department Head ‚Ä¢ Click to expand/collapse ‚Ä¢ 
                Multi-department branches auto-expand
            </p>
        </div>
        <div style='padding:16px;background:white;border:1px solid #ddd;border-top:none;border-radius:0 0 8px 8px;max-height:700px;overflow:auto;'>
            <ul style='list-style:none;padding-left:0;margin:0;'>
                {render_node(root_email, 1)}
            </ul>
        </div>
    </div>
    """
    
    return tree_html

# ========== EDITABLE MAPPING TABLE ==========

def render_editable_mapping():
    """Render editable mapping table using widgets"""
    global s15_mapping_data
    
    if not s15_mapping_data:
        mapping_container.children = [widgets.HTML("<i>No departments yet. Rebuild tree to populate.</i>")]
        return
    
    rows = []
    
    # Header
    header = widgets.HTML("""
        <div style='display:grid;grid-template-columns:200px 200px 200px 80px;gap:8px;padding:8px;background:#4CAF50;color:white;font-weight:bold;border-radius:4px 4px 0 0;'>
            <div>Department</div>
            <div>Department Head</div>
            <div>Reviewer</div>
            <div>Actions</div>
        </div>
    """)
    rows.append(header)
    
    # Data rows
    for idx, mapping in enumerate(s15_mapping_data):
        dept_label = widgets.HTML(
            value=f"<div style='padding:8px;'><b>{mapping['dept']}</b></div>",
            layout=widgets.Layout(width='200px')
        )
        
        head_input = widgets.Text(
            value=mapping['head_name'],
            placeholder='Head name',
            layout=widgets.Layout(width='200px')
        )
        head_input.mapping_idx = idx
        head_input.field = 'head_name'
        
        reviewer_input = widgets.Text(
            value=mapping['reviewer_name'],
            placeholder='Reviewer name',
            layout=widgets.Layout(width='200px')
        )
        reviewer_input.mapping_idx = idx
        reviewer_input.field = 'reviewer_name'
        
        # Update handlers
        def on_change(change, idx=idx, field=''):
            s15_mapping_data[idx][field] = change['new']
        
        head_input.observe(lambda c, i=idx: on_change(c, i, 'head_name'), 'value')
        reviewer_input.observe(lambda c, i=idx: on_change(c, i, 'reviewer_name'), 'value')
        
        delete_btn = widgets.Button(
            description='üóëÔ∏è',
            button_style='danger',
            layout=widgets.Layout(width='60px')
        )
        delete_btn.mapping_idx = idx
        delete_btn.on_click(lambda b, i=idx: delete_mapping_row(i))
        
        row = widgets.HBox(
            [dept_label, head_input, reviewer_input, delete_btn],
            layout=widgets.Layout(padding='4px 8px', border_bottom='1px solid #eee')
        )
        rows.append(row)
    
    mapping_container.children = rows

def delete_mapping_row(idx):
    """Delete a mapping row"""
    global s15_mapping_data
    if 0 <= idx < len(s15_mapping_data):
        del s15_mapping_data[idx]
        render_editable_mapping()

def add_mapping_row(_=None):
    """Add a new mapping row"""
    global s15_mapping_data
    s15_mapping_data.append({
        'dept': 'New Department',
        'head_name': '',
        'head_email': '',
        'reviewer_name': '',
        'reviewer_email': ''
    })
    render_editable_mapping()

def save_mapping(_=None):
    """Save mapping to CSV"""
    if not s15_mapping_data:
        s15_status.value = "<span style='color:red;'>‚ùå No mappings to save</span>"
        return
    
    # Convert to CSV format
    rows = []
    for mapping in s15_mapping_data:
        # Head row
        rows.append({
            'email': mapping.get('head_email', mapping['head_name']),
            'department': mapping['dept'],
            'reviewer': mapping.get('reviewer_email', mapping['reviewer_name']),
            'branch': '' if mapping['dept'].lower().startswith('corporate') else 'Branch'
        })
        # Wildcard row
        rows.append({
            'email': '*',
            'department': mapping['dept'],
            'reviewer': mapping.get('head_email', mapping['head_name']),
            'branch': '' if mapping['dept'].lower().startswith('corporate') else 'Branch'
        })
    
    df = pd.DataFrame(rows)
    ts = datetime.now().strftime('%Y%m%d_%H%M')
    path = os.path.join(MAP_DIR, f"org_mapping_{ts}.csv")
    df.to_csv(path, index=False)
    
    s15_status.value = f"<span style='color:green;'>üíæ Saved: {os.path.basename(path)}</span>"
    logger_s15.info(f"Mapping saved to {path}")
    
    with s15_output:
        print(f"\n‚úÖ Mapping saved to: {path}")
        print(f"   Total rows: {len(rows)}")

# ========== REFRESH TREE ==========

def refresh_tree(_=None):
    """Rebuild organization tree"""
    global s15_ad_df, s15_is_building, s15_dept_heads, s15_nodes, s15_children, s15_levels, s15_mapping_data
    
    if s15_is_building:
        return
    
    s15_is_building = True
    btn_refresh.disabled = True
    s15_output.clear_output()
    
    progress = widgets.IntProgress(
        value=0, min=0, max=100, description='Building:',
        bar_style='info', layout=widgets.Layout(width='80%')
    )
    status_label = widgets.HTML(value="Initializing...")
    
    with s15_output:
        display(widgets.VBox([progress, status_label]))
        print("\n" + "="*70)
        print("üå≥ Building Ultra-Intuitive Organization Tree")
        print("="*70 + "\n")
    
    try:
        # Load data
        status_label.value = "üìÇ Loading AD cache..."
        progress.value = 20
        
        df, msg = load_latest_ad()
        if df is None:
            raise Exception(msg)
        
        s15_ad_df = df
        with s15_output:
            print(f"‚úì {msg}\n")
        
        # Find root
        status_label.value = "üëë Finding CEO..."
        progress.value = 30
        
        root_email = find_root(df)
        if not root_email:
            raise Exception("Could not locate root (CEO)")
        
        root_name = df[df['email'] == root_email].iloc[0]['displayName']
        with s15_output:
            print(f"‚úì CEO: {root_name} ({root_email})\n")
        
        # Build structure
        status_label.value = "üèóÔ∏è Building hierarchy..."
        progress.value = 50
        
        s15_nodes, s15_children, s15_levels = build_org_structure(df, root_email)
        
        with s15_output:
            print(f"‚úì Structure built: {len(s15_nodes)} people, {len(s15_levels)} in tree\n")
        
        # Find dept heads
        status_label.value = "üìä Identifying department heads..."
        progress.value = 70
        
        s15_dept_heads = find_dept_heads(df, s15_nodes, s15_levels)
        
        with s15_output:
            print(f"‚úì Found {len(s15_dept_heads)} departments\n")
        
        # Generate mapping
        status_label.value = "üó∫Ô∏è Generating mapping..."
        progress.value = 85
        
        s15_mapping_data = []
        for dept, info in s15_dept_heads.items():
            s15_mapping_data.append({
                'dept': dept,
                'head_name': info['head_name'],
                'head_email': info['head_email'],
                'reviewer_name': info['reviewer_name'],
                'reviewer_email': info['reviewer_email']
            })
        
        render_editable_mapping()
        
        # Render tree
        status_label.value = "üé® Rendering tree..."
        progress.value = 95
        
        tree_html = render_ultra_intuitive_tree(
            root_email, s15_nodes, s15_children, s15_levels, s15_dept_heads
        )
        s15_tree_html.value = tree_html
        
        # Complete
        progress.value = 100
        progress.bar_style = 'success'
        status_label.value = "<span style='color:green;font-weight:bold;'>‚úÖ Complete!</span>"
        
        with s15_output:
            print("="*70)
            print("‚úÖ Tree Built Successfully")
            print("="*70)
            print(f"üìä {len(s15_nodes)} people ‚Ä¢ {len(s15_dept_heads)} departments ‚Ä¢ {len(s15_mapping_data)} mappings")
            print("="*70)
        
        s15_status.value = f"<span style='color:green;'>‚úÖ Tree ready: {len(s15_dept_heads)} depts</span>"
        
    except Exception as e:
        with s15_output:
            print(f"\n‚ùå Error: {e}")
        s15_status.value = f"<span style='color:red;'>‚ùå {e}</span>"
        logger_s15.error(f"Error: {e}", exc_info=True)
    finally:
        s15_is_building = False
        btn_refresh.disabled = False

# ========== EVENT BINDINGS ==========
btn_add_row.on_click(add_mapping_row)
btn_save_mapping.on_click(save_mapping)
btn_refresh.on_click(refresh_tree)

# ========== UI LAYOUT ==========
stage15_ui = widgets.VBox([
    widgets.HTML("""
        <div style='background:linear-gradient(135deg,#5ee7df 0%,#b490ca 100%);padding:20px;border-radius:8px;color:white;margin-bottom:20px;'>
            <h2 style='margin:0 0 10px 0;'>üå≥ Stage 1.5: Ultra-Intuitive Org Tree Builder</h2>
            <p style='margin:0;opacity:0.9;'>
                ‚ú® Smart folding ‚Ä¢ üìå Bold dept head highlighting ‚Ä¢ üîì Multi-dept auto-expand ‚Ä¢ ‚úèÔ∏è Editable mapping
            </p>
        </div>
    """),
    widgets.HBox([btn_refresh, s15_status], layout=widgets.Layout(margin='0 0 16px 0')),
    s15_tree_html,
    widgets.HTML("<div style='margin:24px 0 12px 0;'><h3>üìã Department Reviewer Mapping (Editable)</h3><p style='color:#666;'>Edit names directly ‚Ä¢ Click üóëÔ∏è to delete ‚Ä¢ Click ‚ûï to add custom department</p></div>"),
    mapping_container,
    widgets.HBox([btn_add_row, btn_save_mapping], layout=widgets.Layout(margin='12px 0')),
    s15_output
])

clear_output()
display(stage15_ui)

logger_s15.info("Stage 1.5 Ultra-Intuitive UI initialized")
logger_s15.info("="*60)
print("\n‚úÖ Stage 1.5 UI ready - Click 'üîÑ Rebuild Tree' to start")


In [None]:
# === CELL 3: Email/User Validation with Ultra Validation (v7.5) ===
import os, sys, logging, glob, io, re, unicodedata
import pandas as pd
import ipywidgets as widgets
from datetime import datetime
from IPython.display import display, HTML, clear_output

# Try import fuzzy matching
try:
    from rapidfuzz import fuzz, process
    FUZZY_AVAILABLE = True
except ImportError:
    try:
        from fuzzywuzzy import fuzz, process
        FUZZY_AVAILABLE = True
        print("‚ö†Ô∏è Using fuzzywuzzy. Install rapidfuzz for better performance: pip install rapidfuzz")
    except ImportError:
        FUZZY_AVAILABLE = False
        print("‚ùå Fuzzy matching unavailable. Install: pip install rapidfuzz")

# Setup paths
today_str = datetime.now().strftime('%Y-%m-%d')
BASE_DIR = os.path.join("output", today_str)
AD_CACHE_DIR = os.path.join(BASE_DIR, "ad_cache")
STAGE2_DIR = os.path.join(BASE_DIR, "stage2_validated")
LOG_DIR = os.path.join(BASE_DIR, "logs")
os.makedirs(STAGE2_DIR, exist_ok=True)

# Logging
log_file = os.path.join(LOG_DIR, f"aer_stage2_{datetime.now().strftime('%Y%m%d_%H%M')}.log")
logger_s2 = logging.getLogger("aer_stage2")
logger_s2.handlers.clear()
logger_s2.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s | %(levelname)s | %(message)s')
fh = logging.FileHandler(log_file, encoding="utf-8")
fh.setFormatter(formatter)
logger_s2.addHandler(fh)
logger_s2.addHandler(logging.StreamHandler(sys.stdout))

# ========== GLOBAL STATE ==========
s2_ad_cache = {}  # email -> user data
s2_name_index = {}  # normalized_name -> email
s2_input_df = None
s2_input_filename = ""
s2_validated_rows = []  # List of validated user dicts
s2_review_rows = []  # Rows needing manual review
s2_is_processing = False

# Validation categories
CATEGORIES = {
    'PERFECT_MATCH': '‚úÖ Perfect Match',
    'AUTO_FILLED_NAME': '‚úÖ Auto-filled Name',
    'AUTO_FILLED_EMAIL': '‚úÖ Auto-filled Email',
    'MISMATCH_EMAIL_PRIORITY': '‚ö†Ô∏è Mismatch (Email Priority)',
    'AMBIGUOUS_NAME': '‚ö†Ô∏è Ambiguous Name Match',
    'INACTIVE_ACCOUNT': '‚ö†Ô∏è Inactive Account',
    'NOT_FOUND': '‚ùå Not Found in AD',
    'INVALID_FORMAT': '‚ùå Invalid Email Format',
    'BOTH_EMPTY': '‚ùå Both Empty (Skipped)'
}

# ========== UI COMPONENTS ==========
s2_upload = widgets.FileUpload(
    accept='.xlsx, .csv',
    description="Upload User List",
    button_style='info'
)
s2_upload_status = widgets.HTML(value="<i>No file selected</i>")
s2_btn_validate = widgets.Button(
    description="üîç Validate Users",
    button_style='warning',
    layout=widgets.Layout(width='180px'),
    disabled=True
)
s2_btn_save = widgets.Button(
    description="üíæ Save Validated File",
    button_style='success',
    layout=widgets.Layout(width='180px'),
    disabled=True
)
s2_status = widgets.HTML(value="<i>Please load AD cache and upload user list</i>")
s2_output = widgets.Output()
s2_review_container = widgets.VBox()

# ========== HELPER FUNCTIONS ==========

def normalize_email(email):
    """Normalize email to lowercase, strip whitespace"""
    if pd.isna(email) or not email:
        return None
    email_str = str(email).strip().lower()
    # Check for null variants
    if email_str in ['nan', 'none', '', 'n/a', 'na', '-', 'null']:
        return None
    return email_str

def is_valid_email_format(email):
    """Check if email has valid format"""
    if not email:
        return False
    # Basic email regex
    pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
    return re.match(pattern, email) is not None

def normalize_name(name):
    """Normalize name for matching"""
    if pd.isna(name) or not name:
        return None
    name_str = str(name).strip()
    # Check for null variants
    if name_str.lower() in ['nan', 'none', '', 'n/a', 'na', '-', 'null']:
        return None
    # Normalize unicode
    name_str = unicodedata.normalize('NFKC', name_str)
    # Collapse multiple spaces
    name_str = re.sub(r'\s+', ' ', name_str)
    return name_str

def normalize_name_for_index(name):
    """Normalize name for fuzzy matching index (more aggressive)"""
    if not name:
        return ""
    name = str(name).lower()
    name = unicodedata.normalize('NFKC', name)
    # Remove punctuation
    name = re.sub(r'[^a-z0-9\s]', ' ', name)
    # Collapse spaces
    name = ' '.join(name.split())
    return name.strip()

def detect_columns(df, sample_size=20):
    """
    Auto-detect email and name columns
    Returns: (email_col_idx, name_col_idx, email_col_name, name_col_name)
    """
    if len(df.columns) < 2:
        return None, None, None, None
    
    # Sample first N rows
    sample = df.head(sample_size)
    
    # Count valid emails in each column
    email_scores = {}
    for idx, col in enumerate(df.columns):
        valid_count = 0
        for val in sample[col]:
            val_str = str(val).strip().lower()
            if '@' in val_str and '.' in val_str:
                valid_count += 1
        email_scores[idx] = valid_count
    
    # Find column with most emails
    if not email_scores or max(email_scores.values()) == 0:
        # No email column found, assume first two
        return 0, 1, df.columns[0], df.columns[1]
    
    email_col_idx = max(email_scores, key=email_scores.get)
    email_col_name = df.columns[email_col_idx]
    
    # Find name column (first non-email column)
    name_col_idx = None
    name_col_name = None
    for idx, col in enumerate(df.columns):
        if idx != email_col_idx:
            name_col_idx = idx
            name_col_name = col
            break
    
    return email_col_idx, name_col_idx, email_col_name, name_col_name

def load_ad_cache():
    """Load AD cache from Stage 1"""
    global s2_ad_cache, s2_name_index
    
    try:
        cache_files = glob.glob(os.path.join(AD_CACHE_DIR, "ad_users_*.csv"))
        if not cache_files:
            return False, "No AD cache found. Please run Stage 1 first."
        
        latest_cache = max(cache_files, key=os.path.getmtime)
        df = pd.read_csv(latest_cache)
        
        # Build cache
        for _, row in df.iterrows():
            email = normalize_email(row['email'])
            if not email:
                continue
            
            s2_ad_cache[email] = {
                'email': email,
                'name': row['displayName'],
                'dept': row['department'],
                'active': row['accountEnabled'],
                'jobTitle': row.get('jobTitle', 'N/A'),
                'lastSignIn': row.get('lastSignInDateTime', 'N/A')
            }
            
            # Build name index
            norm_name = normalize_name_for_index(row['displayName'])
            if norm_name:
                s2_name_index[norm_name] = email
                
                # Add reversed name
                parts = norm_name.split()
                if len(parts) == 2:
                    reversed_name = f"{parts[1]} {parts[0]}"
                    s2_name_index[reversed_name] = email
        
        return True, f"Loaded {len(s2_ad_cache)} users from {os.path.basename(latest_cache)}"
        
    except Exception as e:
        return False, f"Error loading AD cache: {str(e)}"

def fuzzy_match_name(target_name, top_n=5):
    """Find best email matches for a name using fuzzy matching"""
    if not FUZZY_AVAILABLE or not s2_name_index or not target_name:
        return []
    
    norm_target = normalize_name_for_index(target_name)
    if not norm_target:
        return []
    
    # Exact match check
    if norm_target in s2_name_index:
        email = s2_name_index[norm_target]
        user = s2_ad_cache[email]
        return [{
            'email': email,
            'name': user['name'],
            'dept': user['dept'],
            'score': 100,
            'active': user['active']
        }]
    
    # Fuzzy search
    try:
        candidates = list(s2_name_index.keys())
        matches = process.extract(
            norm_target,
            candidates,
            scorer=fuzz.token_sort_ratio,
            limit=top_n * 2
        )
        
        results = []
        seen = set()
        
        for match_name, score, _ in matches:
            if score < 70:
                continue
            
            email = s2_name_index[match_name]
            if email in seen:
                continue
            
            seen.add(email)
            user = s2_ad_cache[email]
            
            results.append({
                'email': email,
                'name': user['name'],
                'dept': user['dept'],
                'score': int(score),
                'active': user['active']
            })
            
            if len(results) >= top_n:
                break
        
        return results
    except:
        return []

def validate_user(input_email, input_name):
    """
    Validate a single user
    Returns: (validated_dict, needs_review)
    """
    # Normalize inputs
    email = normalize_email(input_email)
    name = normalize_name(input_name)
    
    result = {
        'original_email': input_email,
        'original_name': input_name,
        'validated_email': None,
        'validated_name': None,
        'ad_status': None,
        'validation_status': None,
        'validation_notes': [],
        'department': None,
        'job_title': None,
        'needs_review': False,
        'review_options': []
    }
    
    # Case 1: Both empty ‚Üí Skip (Q3: Option B)
    if not email and not name:
        result['validation_status'] = 'BOTH_EMPTY'
        result['validation_notes'].append('Both email and name are empty')
        return result, False  # Don't add to review, skip it
    
    # Case 2: Invalid email format
    if email and not is_valid_email_format(email):
        result['validation_status'] = 'INVALID_FORMAT'
        result['validation_notes'].append(f'Invalid email format: {email}')
        result['needs_review'] = True
        return result, True
    
    # Case 3: Only email provided
    if email and not name:
        if email in s2_ad_cache:
            ad_user = s2_ad_cache[email]
            result['validated_email'] = email
            result['validated_name'] = ad_user['name']
            result['ad_status'] = 'Active' if ad_user['active'] else 'Inactive'
            result['department'] = ad_user['dept']
            result['job_title'] = ad_user['jobTitle']
            
            if not ad_user['active']:
                result['validation_status'] = 'INACTIVE_ACCOUNT'
                result['validation_notes'].append('Account is inactive in AD')
                result['needs_review'] = True
            else:
                result['validation_status'] = 'AUTO_FILLED_NAME'
                result['validation_notes'].append('Name auto-filled from AD')
            
            return result, result['needs_review']
        else:
            result['validation_status'] = 'NOT_FOUND'
            result['validation_notes'].append('Email not found in AD')
            result['needs_review'] = True
            return result, True
    
    # Case 4: Only name provided
    if name and not email:
        matches = fuzzy_match_name(name)
        
        if not matches:
            result['validation_status'] = 'NOT_FOUND'
            result['validated_name'] = name
            result['validation_notes'].append('Name not found in AD')
            result['needs_review'] = True
            return result, True
        
        if len(matches) == 1:
            # Single match - auto-fill (Q2: this is OK)
            match = matches[0]
            result['validated_email'] = match['email']
            result['validated_name'] = match['name']
            result['ad_status'] = 'Active' if match['active'] else 'Inactive'
            result['department'] = match['dept']
            
            if not match['active']:
                result['validation_status'] = 'INACTIVE_ACCOUNT'
                result['validation_notes'].append('Account is inactive in AD')
                result['needs_review'] = True
            else:
                result['validation_status'] = 'AUTO_FILLED_EMAIL'
                result['validation_notes'].append(f"Email auto-filled from AD (match score: {match['score']})")
            
            return result, result['needs_review']
        
        # Multiple matches - need review (Q2: Option A)
        result['validation_status'] = 'AMBIGUOUS_NAME'
        result['validated_name'] = name
        result['validation_notes'].append(f'Found {len(matches)} possible matches')
        result['needs_review'] = True
        result['review_options'] = matches
        return result, True
    
    # Case 5: Both email and name provided
    if email and name:
        if email in s2_ad_cache:
            ad_user = s2_ad_cache[email]
            ad_name = normalize_name_for_index(ad_user['name'])
            input_name_norm = normalize_name_for_index(name)
            
            # Check if names match
            if ad_name == input_name_norm:
                # Perfect match
                result['validated_email'] = email
                result['validated_name'] = ad_user['name']
                result['ad_status'] = 'Active' if ad_user['active'] else 'Inactive'
                result['department'] = ad_user['dept']
                result['job_title'] = ad_user['jobTitle']
                
                if not ad_user['active']:
                    result['validation_status'] = 'INACTIVE_ACCOUNT'
                    result['validation_notes'].append('Account is inactive in AD')
                    result['needs_review'] = True
                else:
                    result['validation_status'] = 'PERFECT_MATCH'
                    result['validation_notes'].append('Email and name match AD perfectly')
                
                return result, result['needs_review']
            else:
                # Mismatch - prioritize email (Q4: Option A)
                result['validated_email'] = email
                result['validated_name'] = ad_user['name']
                result['ad_status'] = 'Active' if ad_user['active'] else 'Inactive'
                result['department'] = ad_user['dept']
                result['job_title'] = ad_user['jobTitle']
                result['validation_status'] = 'MISMATCH_EMAIL_PRIORITY'
                result['validation_notes'].append(f"Name mismatch: Input '{name}' vs AD '{ad_user['name']}'")
                result['validation_notes'].append('Using email as source of truth')
                result['needs_review'] = True
                return result, True
        else:
            # Email not in AD
            result['validation_status'] = 'NOT_FOUND'
            result['validated_name'] = name
            result['validation_notes'].append('Email not found in AD')
            result['needs_review'] = True
            return result, True
    
    return result, False

# ========== FILE UPLOAD HANDLER ==========

def on_s2_upload_change(change):
    if s2_upload.value and len(s2_upload.value) > 0:
        fname = s2_upload.value[0]['name']
        s2_upload_status.value = f"<b style='color:green;'>‚úÖ Selected: {fname}</b>"
        if s2_ad_cache:
            s2_btn_validate.disabled = False

s2_upload.observe(on_s2_upload_change, 'value')

# ========== VALIDATION FUNCTION ==========

def do_stage2_validate(b):
    global s2_input_df, s2_input_filename, s2_validated_rows, s2_review_rows, s2_is_processing
    
    if s2_is_processing:
        return
    
    s2_is_processing = True
    b.disabled = True
    s2_output.clear_output()
    s2_validated_rows = []
    s2_review_rows = []
    
    if not s2_upload.value:
        with s2_output:
            print("‚ùå Please upload a user list file")
        s2_is_processing = False
        b.disabled = False
        return
    
    try:
        with s2_output:
            print("\n" + "="*70)
            print("üîç Stage 2: Ultra Email/User Validation")
            print("="*70 + "\n")
        
        # Load file
        f_item = s2_upload.value[0]
        s2_input_filename = f_item['name']
        
        if s2_input_filename.endswith('.csv'):
            s2_input_df = pd.read_csv(io.BytesIO(f_item['content']))
        else:
            s2_input_df = pd.read_excel(io.BytesIO(f_item['content']))
        
        logger_s2.info(f"Loaded input file: {s2_input_filename}, {len(s2_input_df)} rows")
        
        with s2_output:
            print(f"üìÑ Loaded: {s2_input_filename}")
            print(f"   Rows: {len(s2_input_df)}")
            print(f"   Columns: {list(s2_input_df.columns)}\n")
        
        # Detect columns
        email_idx, name_idx, email_col, name_col = detect_columns(s2_input_df)
        
        if email_idx is None or name_idx is None:
            raise Exception("Could not detect email and name columns")
        
        with s2_output:
            print(f"üìã Auto-detected columns:")
            print(f"   Email column: '{email_col}' (column {email_idx})")
            print(f"   Name column: '{name_col}' (column {name_idx})\n")
        
        logger_s2.info(f"Detected columns: Email='{email_col}', Name='{name_col}'")
        
        # Validate each user
        stats = {cat: 0 for cat in CATEGORIES.keys()}
        
        with s2_output:
            print("üîç Validating users...")
            print()
        
        for idx, row in s2_input_df.iterrows():
            input_email = row[email_col]
            input_name = row[name_col]
            
            validated, needs_review = validate_user(input_email, input_name)
            
            # Add other columns from input
            for col in s2_input_df.columns:
                if col not in [email_col, name_col]:
                    validated[col] = row[col]
            
            if validated['validation_status'] != 'BOTH_EMPTY':
                s2_validated_rows.append(validated)
                stats[validated['validation_status']] += 1
                
                if needs_review:
                    s2_review_rows.append(validated)
        
        # Display statistics
        with s2_output:
            print("="*70)
            print("üìä Validation Summary")
            print("="*70)
            for cat, count in stats.items():
                if count > 0:
                    print(f"{CATEGORIES[cat]:40} {count:>5}")
            print("="*70)
            print(f"Total validated: {len(s2_validated_rows)}")
            print(f"Need review: {len(s2_review_rows)}")
            print("="*70 + "\n")
        
        logger_s2.info(f"Validation complete: {len(s2_validated_rows)} validated, {len(s2_review_rows)} need review")
        
        # Render review UI if needed
        if s2_review_rows:
            render_review_ui()
        else:
            s2_review_container.children = [
                widgets.HTML("<h3 style='color:green;'>‚úÖ All users validated successfully! No manual review needed.</h3>")
            ]
        
        s2_btn_save.disabled = False
        s2_status.value = f"<span style='color:green;'>‚úÖ Validation complete: {len(s2_validated_rows)} users</span>"
        
    except Exception as e:
        with s2_output:
            print(f"\n‚ùå Error: {str(e)}")
        logger_s2.error(f"Validation error: {str(e)}", exc_info=True)
        s2_status.value = f"<span style='color:red;'>‚ùå Error: {str(e)}</span>"
    finally:
        s2_is_processing = False
        b.disabled = False

# ========== MANUAL REVIEW UI ==========

def render_review_ui():
    """Render manual review UI for rows needing review"""
    global s2_review_rows
    
    if not s2_review_rows:
        s2_review_container.children = []
        return
    
    rows_ui = []
    
    # Header
    header = widgets.HTML(f"""
        <div style='background:linear-gradient(135deg,#f093fb 0%,#f5576c 100%);color:white;padding:16px;border-radius:8px;margin:16px 0;'>
            <h3 style='margin:0 0 8px 0;'>‚ö†Ô∏è Manual Review Required ({len(s2_review_rows)} items)</h3>
            <p style='margin:0;opacity:0.9;'>Review and edit the suggestions below before saving</p>
        </div>
    """)
    rows_ui.append(header)
    
    # Review rows
    for idx, row in enumerate(s2_review_rows):
        row_widget = create_review_row_widget(idx, row)
        rows_ui.append(row_widget)
    
    s2_review_container.children = rows_ui

def create_review_row_widget(idx, row):
    """Create widget for a single review row"""
    
    # Status badge
    status = row['validation_status']
    status_label = CATEGORIES.get(status, status)
    
    if status.startswith('‚ùå'):
        badge_color = '#dc3545'
    elif status.startswith('‚ö†Ô∏è'):
        badge_color = '#ffc107'
    else:
        badge_color = '#28a745'
    
    # Build UI
    status_html = widgets.HTML(f"""
        <div style='background:{badge_color};color:white;padding:8px 12px;border-radius:4px;font-weight:600;margin-bottom:8px;'>
            {status_label}
        </div>
        <div style='font-size:0.9em;color:#666;margin-bottom:12px;'>
            {' ‚Ä¢ '.join(row['validation_notes'])}
        </div>
    """)
    
    # Input fields
    input_html = widgets.HTML(f"""
        <div style='display:grid;grid-template-columns:1fr 1fr;gap:12px;padding:8px;background:#f8f9fa;border-radius:4px;margin-bottom:8px;'>
            <div>
                <b>Original Email:</b><br>
                <code>{row['original_email']}</code>
            </div>
            <div>
                <b>Original Name:</b><br>
                <code>{row['original_name']}</code>
            </div>
        </div>
    """)
    
    # Editable fields
    email_input = widgets.Text(
        value=row.get('validated_email', '') or '',
        description='Email:',
        layout=widgets.Layout(width='45%')
    )
    
    name_input = widgets.Text(
        value=row.get('validated_name', '') or '',
        description='Name:',
        layout=widgets.Layout(width='45%')
    )
    
    # Handle ambiguous name case
    if status == 'AMBIGUOUS_NAME' and row.get('review_options'):
        options_list = []
        for opt in row['review_options']:
            label = f"{opt['name']} ({opt['email']}) - {opt['dept']} [Score: {opt['score']}]"
            options_list.append((label, opt))
        
        dropdown = widgets.Dropdown(
            options=[('-- Select --', None)] + options_list,
            description='Select:',
            layout=widgets.Layout(width='90%')
        )
        
        def on_select_change(change, idx=idx, email_input=email_input, name_input=name_input):
            if change['new']:
                selected = change['new']
                email_input.value = selected['email']
                name_input.value = selected['name']
                s2_review_rows[idx]['validated_email'] = selected['email']
                s2_review_rows[idx]['validated_name'] = selected['name']
        
        dropdown.observe(on_select_change, 'value')
        
        fields = widgets.VBox([
            dropdown,
            widgets.HBox([email_input, name_input])
        ])
    else:
        fields = widgets.HBox([email_input, name_input])
    
    # Update handlers
    def update_email(change, idx=idx):
        s2_review_rows[idx]['validated_email'] = change['new']
    
    def update_name(change, idx=idx):
        s2_review_rows[idx]['validated_name'] = change['new']
    
    email_input.observe(update_email, 'value')
    name_input.observe(update_name, 'value')
    
    # Action buttons
    keep_btn = widgets.Button(description='‚úì Keep', button_style='success', layout=widgets.Layout(width='100px'))
    skip_btn = widgets.Button(description='‚úó Skip', button_style='danger', layout=widgets.Layout(width='100px'))
    
    def on_keep(b, idx=idx):
        # Mark as kept
        s2_review_rows[idx]['action'] = 'keep'
        b.description = '‚úì Kept'
        b.disabled = True
    
    def on_skip(b, idx=idx):
        # Mark as skipped
        s2_review_rows[idx]['action'] = 'skip'
        b.description = '‚úó Skipped'
        b.disabled = True
    
    keep_btn.on_click(on_keep)
    skip_btn.on_click(on_skip)
    
    actions = widgets.HBox([keep_btn, skip_btn])
    
    # Container
    container = widgets.VBox([
        status_html,
        input_html,
        fields,
        actions
    ], layout=widgets.Layout(
        border='1px solid #ddd',
        border_radius='8px',
        padding='16px',
        margin='8px 0'
    ))
    
    return container

# ========== SAVE FUNCTION ==========

def do_stage2_save(b):
    if not s2_validated_rows:
        with s2_output:
            print("‚ùå No validated data to save")
        return
    
    b.disabled = True
    
    try:
        # Apply review changes
        final_rows = []
        for row in s2_validated_rows:
            # Check if this row is in review and was skipped
            if row in s2_review_rows and row.get('action') == 'skip':
                continue
            final_rows.append(row)
        
        # Build DataFrame
        df_output = pd.DataFrame(final_rows)
        
        # Reorder columns
        priority_cols = [
            'original_email', 'original_name',
            'validated_email', 'validated_name',
            'ad_status', 'validation_status',
            'validation_notes', 'department', 'job_title'
        ]
        
        other_cols = [c for c in df_output.columns if c not in priority_cols + ['needs_review', 'review_options', 'action']]
        
        final_cols = [c for c in priority_cols if c in df_output.columns] + other_cols
        df_output = df_output[final_cols]
        
        # Convert validation_notes list to string
        if 'validation_notes' in df_output.columns:
            df_output['validation_notes'] = df_output['validation_notes'].apply(lambda x: '; '.join(x) if isinstance(x, list) else x)
        
        # Generate filename
        base_name = s2_input_filename.replace('.csv', '').replace('.xlsx', '')
        date_str = datetime.now().strftime('%Y%m%d')
        output_filename = f"{base_name}_User_Listing_{date_str}_AD_verified.xlsx"
        output_path = os.path.join(STAGE2_DIR, output_filename)
        
        # Save
        df_output.to_excel(output_path, index=False, sheet_name='Validated Users')
        
        with s2_output:
            print("\n" + "="*70)
            print("üíæ File Saved")
            print("="*70)
            print(f"Location: {output_path}")
            print(f"Rows: {len(df_output)}")
            print(f"Columns: {list(df_output.columns[:9])}...")
            print("="*70)
        
        s2_status.value = f"<span style='color:blue;'>‚úÖ Saved: {output_filename}</span>"
        logger_s2.info(f"Saved validated file: {output_path}")
        
    except Exception as e:
        with s2_output:
            print(f"\n‚ùå Save error: {str(e)}")
        logger_s2.error(f"Save error: {str(e)}", exc_info=True)
    finally:
        b.disabled = False

# ========== BIND EVENTS ==========
s2_btn_validate.on_click(do_stage2_validate)
s2_btn_save.on_click(do_stage2_save)

# ========== INITIALIZE ==========
success, msg = load_ad_cache()
if success:
    s2_status.value = f"<span style='color:green;'>‚úÖ {msg}</span>"
    logger_s2.info(msg)
else:
    s2_status.value = f"<span style='color:orange;'>‚ö†Ô∏è {msg}</span>"
    logger_s2.warning(msg)

# ========== UI LAYOUT ==========
stage2_ui = widgets.VBox([
    widgets.HTML("""
        <div style='
            background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%);
            padding: 20px;
            border-radius: 8px;
            color: white;
            margin-bottom: 20px;
        '>
            <h2 style='margin: 0 0 10px 0;'>üîç Stage 2: Ultra Email/User Validation</h2>
            <p style='margin: 0; opacity: 0.9;'>
                ‚ú® Auto-detect columns ‚Ä¢ üéØ Smart matching ‚Ä¢ ‚ö†Ô∏è Edge case handling ‚Ä¢ ‚úèÔ∏è Manual review UI
            </p>
        </div>
    """),
    widgets.HBox([s2_upload, s2_upload_status]),
    widgets.HBox([s2_btn_validate, s2_btn_save]),
    s2_status,
    s2_review_container,
    s2_output
])

clear_output()
display(stage2_ui)

logger_s2.info("Stage 2 Ultra Validation UI initialized")
logger_s2.info("="*60)


In [None]:
# === Cell 3: Enhanced Hybrid Logic (v6.5 with Auto-Resolve) ===

# --- 1. UI Styling ---
style_html = """
<style>
    .aer-row { border-bottom: 1px solid #e0e0e0; padding: 5px 0; align-items: center; }
    .aer-header { font-weight: bold; background-color: #f0f0f0; padding: 8px 0; border-bottom: 2px solid #ccc; }
    .aer-cell { overflow: hidden; white-space: nowrap; text-overflow: ellipsis; }
    .widget-label { font-size: 11px; font-weight: 600; }
    .widget-dropdown select, .widget-text input { font-size: 11px; }
</style>
"""
display(HTML(style_html))

# --- 2. Widgets ---
up_list = widgets.FileUpload(accept='.xlsx, .csv', description="1. User List", button_style='info')
txt_list = widgets.HTML(value="<i>No file selected</i>", layout=widgets.Layout(margin='0 10px'))
up_map = widgets.FileUpload(accept='.csv', description="2. Reviewer Map", button_style='info')

btn_process = widgets.Button(description="\ud83d\ude80 Step 1: Analyze", button_style='warning', layout=widgets.Layout(width='180px'))
btn_save = widgets.Button(description="\ud83d\udcbe Step 2: Save", button_style='success', layout=widgets.Layout(width='180px'), disabled=True)

# v6.5 New Widgets for Resolver
resolver_area = widgets.Output()
out_area = widgets.Output()

# Global State
review_registry = []
processed_df = None
current_fname = ""
mapping_data = {"emails": {}, "depts": {}, "all_depts": [], "all_branches": []}
match_stats = {"email_match": 0, "dept_match": 0, "no_match": 0, "email_invalid": 0}
pending_df = None # Holds the dataframe while waiting for user to fix emails
pending_col_map = {} # Holds column names

# --- 3. Helper Functions ---
def get_latest_map():
    try:
        files = glob.glob(os.path.join(MAPPING_DIR, "*.csv"))
        return max(files, key=os.path.getmtime) if files else None
    except:
        return None

def on_list_file_change(c):
    try:
        if up_list.value and len(up_list.value) > 0:
            fname = up_list.value[0]['name']
            txt_list.value = f"<b style='color:green;'>\u2705 Selected: {fname}</b>"
            resolver_area.clear_output() # Clear previous resolver if new file
            out_area.clear_output()
    except:
        pass

up_list.observe(on_list_file_change, 'value')

map_path = get_latest_map()
txt_map = widgets.HTML(value=f"<b style='color:green;'>\u2705 Default: {os.path.basename(map_path)}</b>" if map_path else "<i>No map found</i>", layout=widgets.Layout(margin='0 10px'))

def identify_columns_smart(df):
    if len(df.columns) < 2: return df.columns[0], df.columns[0]
    c0, c1 = df.columns[0], df.columns[1]
    sample = df.head(20).fillna('').astype(str)
    score_0 = sum(1 for x in sample[c0] if '@' in x and '.' in x)
    score_1 = sum(1 for x in sample[c1] if '@' in x and '.' in x)
    return (c0, c1) if score_0 >= score_1 else (c1, c0)

def detect_map_column(df, candidates):
    cols = [str(c).lower().strip() for c in df.columns]
    for cand in candidates:
        for i, c in enumerate(cols):
            if cand in c: return df.columns[i]
    return None

# --- 4. Logic: Analysis with Enhanced Features ---
def do_process(b):
    """ Entry point: Checks for missing emails first """
    global pending_df, pending_col_map, current_fname
    out_area.clear_output()
    resolver_area.clear_output()
    
    if not up_list.value or len(up_list.value) == 0:
        with out_area: print("\u274c Error: Please upload a user list.")
        return

    b.disabled = True
    try:
        # Load User List
        f_item = up_list.value[0]
        current_fname = f_item['name']
        df_u = pd.read_csv(io.BytesIO(f_item['content'])) if current_fname.endswith('.csv') else pd.read_excel(io.BytesIO(f_item['content']))
        col_email, col_name = identify_columns_smart(df_u)
        
        # v6.5: Check for missing emails
        missing_mask = df_u[col_email].isna() | (df_u[col_email].astype(str).str.strip() == '')
        missing_count = missing_mask.sum()
        
        if missing_count > 0:
            # Trigger Resolver Flow
            pending_df = df_u
            pending_col_map = {'email': col_email, 'name': col_name}
            show_resolver_ui(df_u[missing_mask], col_name, col_email)
            b.disabled = False
            return
        
        # If clean, proceed to main logic
        execute_main_logic(df_u, col_email, col_name)
        
    except Exception as e:
        with out_area: print(f"\u274c Error: {str(e)}")
        logger.error(f"Process error: {str(e)}", exc_info=True)
        b.disabled = False

def show_resolver_ui(missing_df, col_name, col_email):
    """ v6.5: Displays the widget to fix missing emails """
    with resolver_area:
        print(f"\u26a0\ufe0f Found {len(missing_df)} rows with missing emails. Syncing Directory...")
        fetch_full_directory() # Ensure we have the map
        
        print("\ud83d\udd27 Please verify/enter emails below:")
        
        resolver_widgets = []
        
        # Header
        header = widgets.HBox([
            widgets.Label("Name (from File)", layout=widgets.Layout(flex='1', font_weight='bold')),
            widgets.Label("Email (Auto-Suggested)", layout=widgets.Layout(flex='1', font_weight='bold')),
            widgets.Label("Status", layout=widgets.Layout(width='100px', font_weight='bold'))
        ], layout=widgets.Layout(border_bottom='2px solid #ccc', padding='5px'))
        display(header)
        
        for idx, row in missing_df.iterrows():
            raw_name = str(row[col_name]).strip()
            clean_name = raw_name.lower()
            
            # Try to find in directory
            suggested_email = ad_directory_cache.get(clean_name, "")
            status_text = "\u2705 Found" if suggested_email else "\u274c Not Found"
            status_color = "green" if suggested_email else "red"
            
            txt_email = widgets.Text(value=suggested_email, placeholder="Enter email...", layout=widgets.Layout(flex='1'))
            lbl_name = widgets.Label(raw_name, layout=widgets.Layout(flex='1'))
            lbl_status = widgets.HTML(f"<span style='color:{status_color}'>{status_text}</span>", layout=widgets.Layout(width='100px'))
            
            row_box = widgets.HBox([lbl_name, txt_email, lbl_status], layout=widgets.Layout(border_bottom='1px solid #eee', padding='2px'))
            resolver_widgets.append({'idx': idx, 'widget': txt_email, 'box': row_box})
            display(row_box)
            
        btn_confirm = widgets.Button(description="\u2714 Confirm & Continue", button_style='success', layout=widgets.Layout(margin='10px 0'))
        
        def on_confirm(b):
            global pending_df
            # Update dataframe
            for item in resolver_widgets:
                val = item['widget'].value.strip()
                if val:
                    pending_df.at[item['idx'], col_email] = val
            
            resolver_area.clear_output()
            with out_area: print("\u2705 Missing emails resolved. Resuming analysis...")
            execute_main_logic(pending_df, col_email, col_name)
            
        btn_confirm.on_click(on_confirm)
        display(btn_confirm)

def execute_main_logic(df_u, col_email, col_name):
    """ The original do_process logic, now separated """
    global processed_df, review_registry, mapping_data, match_stats
    
    review_registry = []
    match_stats = {"email_match": 0, "dept_match": 0, "no_match": 0, "email_invalid": 0}
    
    try:
        logger.info("--- Starting Analysis (v6.5) ---")
        
        # A. Load Map
        m_src = None
        if up_map.value and len(up_map.value) > 0:
            m_src = io.BytesIO(up_map.value[0]['content'])
        elif map_path:
            m_src = map_path
        
        if not m_src:
            with out_area: print("\u274c Error: No Mapping CSV found!")
            return
            
        df_m = pd.read_csv(m_src)
        col_m_email = detect_map_column(df_m, ["email", "mail"])
        col_m_dept = detect_map_column(df_m, ["department", "dept"])
        col_m_rev = detect_map_column(df_m, ["reviewer", "owner", "manager"])
        col_m_br = detect_map_column(df_m, ["branch", "category", "type"])
        
        if not col_m_rev or not col_m_dept:
            with out_area: print(f"\u274c Error: Map needs 'Department' and 'Reviewer'. Found: {list(df_m.columns)}")
            return

        # Build Mapping
        mapping_data['emails'] = {}
        if col_m_email:
            mapping_data['emails'] = dict(zip(df_m[col_m_email].astype(str).str.lower().str.strip(), df_m[col_m_rev]))
        mapping_data['depts'] = dict(zip(df_m[col_m_dept].astype(str).str.lower().str.strip(), df_m[col_m_rev]))
        
        # Split Branch vs Dept
        if col_m_br:
            unique_locs = df_m[[col_m_dept, col_m_br]].drop_duplicates()
            is_branch = unique_locs[col_m_br].astype(str).str.lower().str.contains("branch", na=False)
            list_branches = sorted(unique_locs[is_branch][col_m_dept].unique())
            list_depts = sorted(unique_locs[~is_branch][col_m_dept].unique())
        else:
            list_branches = []
            list_depts = sorted(df_m[col_m_dept].astype(str).unique())

        mapping_data['all_branches'] = [("Select Branch...", "")] + [(d, d.lower()) for d in list_branches]
        mapping_data['all_depts'] = [("Select Dept...", "")] + [(d, d.lower()) for d in list_depts]
        
        # C. AD Fetch
        emails = df_u[col_email].dropna().unique().tolist()
        
        with out_area: 
            print(f"\ud83d\udd04 Syncing {len(emails)} emails with AD...")
            progress = widgets.IntProgress(value=0, min=0, max=len(emails), description='AD Sync:', bar_style='info', layout=widgets.Layout(width='80%'))
            progress_label = widgets.HTML(value=f"0 / {len(emails)}")
            display(widgets.HBox([progress, progress_label]))
        
        completed = [0]
        def fetch_with_progress(email):
            result = fetch_ad(email)
            completed[0] += 1
            progress.value = completed[0]
            progress_label.value = f"{completed[0]} / {len(emails)}"
            return result
        
        with ThreadPoolExecutor(max_workers=25) as ex:
            results = list(ex.map(fetch_with_progress, emails))
            ad_results = {r['email']: r for r in results}
        
        # D. Processing Loop
        final_rows = []
        widget_rows = []
        
        header_box = widgets.HBox([
            widgets.Label("Remove", layout=widgets.Layout(width='60px', font_weight='bold', font_size='12px')),
            widgets.Label("User Name", layout=widgets.Layout(flex='2', font_weight='bold', font_size='12px')),
            widgets.Label("User Email", layout=widgets.Layout(flex='2', font_weight='bold', font_size='12px')),
            widgets.Label("AD Status", layout=widgets.Layout(flex='2', font_weight='bold', font_size='12px')),
            widgets.Label("Mode", layout=widgets.Layout(width='40px', font_weight='bold', font_size='12px')),
            widgets.Label("Mapping", layout=widgets.Layout(flex='2', font_weight='bold', font_size='12px')),
            widgets.Label("Reviewer", layout=widgets.Layout(flex='1', font_weight='bold', font_size='12px')),
        ], layout=widgets.Layout(border_bottom='3px solid #1976d2', padding='10px 5px', background='linear-gradient(135deg, #e3f2fd 0%, #bbdefb 100%)', border_radius='4px 4px 0 0'))

        for idx, row in df_u.iterrows():
            raw_email = str(row[col_email]).strip().lower()
            raw_name = str(row[col_name]).strip()
            
            ad = ad_results.get(raw_email, {"status": "Not Found", "dept": "N/A", "active": "N/A"})
            
            reviewer = None
            match_method = ""
            
            if raw_email in mapping_data['emails']:
                reviewer = mapping_data['emails'][raw_email]
                match_method = "email"
                if not is_valid_reviewer_name(reviewer):
                    reviewer = None
                    match_method = "email_invalid"
                    match_stats['email_invalid'] += 1
                else:
                    match_stats['email_match'] += 1
            
            if not reviewer:
                clean_dept = str(ad['dept']).split(" - ")[-1].strip().lower()
                dept_reviewer = mapping_data['depts'].get(clean_dept)
                if dept_reviewer and match_method != "email_invalid":
                    reviewer = dept_reviewer
                    match_method = "department"
                    match_stats['dept_match'] += 1
            
            is_problem = not reviewer or str(reviewer).strip().lower() in ["nan", "none", "", "null", "()"]
            
            if is_problem:
                reviewer = "(please manual review)"
                if match_method != "email_invalid": match_stats['no_match'] += 1
                
                is_active = ad['active'] is True
                found = "Found" in ad['status']
                remove_default = not found
                
                if is_active: stat_html = f'<span style="color:green;font-weight:bold;">\u2713 {ad.get("name","N/A")}</span> | {ad["dept"]}'
                elif not found: stat_html = '<span style="color:red;font-weight:bold;">\u2717 Not Found</span>'
                else: stat_html = f'<span style="color:orange;font-weight:bold;">\u26a0 {ad.get("name","N/A")}</span> | {ad["dept"]}'
                
                chk = widgets.Checkbox(value=remove_default, indent=False, layout=widgets.Layout(width='40px'))
                tgl = widgets.ToggleButton(value=False, description='B', tooltip='Dept Mode', layout=widgets.Layout(width='35px', height='28px'))
                drp = widgets.Dropdown(options=mapping_data['all_depts'], value="", layout=widgets.Layout(flex='2', height='28px'))
                res_txt = widgets.Text(value=reviewer, layout=widgets.Layout(flex='1'), continuous_update=False)
                
                def on_tgl(change, d=drp, btn=tgl):
                    if change['new']: 
                        d.options = mapping_data['all_branches']; btn.button_style = 'success'; btn.description = '\ud83c\udf33'
                    else: 
                        d.options = mapping_data['all_depts']; btn.button_style = ''; btn.description = 'B'
                    d.value = ""
                
                def on_drp(change, txt=res_txt, email=raw_email):
                    val = str(change['new']).strip()
                    if val:
                        matched = mapping_data['depts'].get(val.lower())
                        if not matched: matched = mapping_data['emails'].get(email)
                        if matched: txt.value = matched
                
                tgl.observe(on_tgl, names='value')
                drp.observe(on_drp, names='value')
                
                row_box = widgets.HBox([chk, widgets.Box([widgets.Label(raw_name)], layout=widgets.Layout(flex='2')), widgets.Box([widgets.Label(raw_email)], layout=widgets.Layout(flex='2')), widgets.Box([widgets.HTML(stat_html)], layout=widgets.Layout(flex='2')), tgl, widgets.Box([drp], layout=widgets.Layout(flex='2')), widgets.Box([res_txt], layout=widgets.Layout(flex='1'))], layout=widgets.Layout(border='1px solid #e0e0e0', padding='8px 4px', margin='2px 0', border_radius='4px', background='#fafafa'))
                
                def on_check_change(change, box=row_box):
                    box.layout.background = '#ffebee' if change['new'] else '#fafafa'
                    box.layout.border = '1px solid #ef5350' if change['new'] else '1px solid #e0e0e0'
                chk.observe(on_check_change, names='value')
                
                widget_rows.append(row_box)
                review_registry.append({'email': raw_email, 'name': raw_name, 'ad_name': ad.get('name', 'N/A'), 'ad_dept': ad['dept'], 'ad_status': ad['status'], 'is_active': is_active, 'reviewer': reviewer, 'chk': chk, 'res': res_txt})
            else:
                final_rows.append({'User Name': raw_name, 'User Email': raw_email, 'AD Name': ad.get('name', 'N/A'), 'Department': ad['dept'], 'Reviewer': reviewer, 'AD Status': ad['status']})
        
        processed_df = pd.DataFrame(final_rows)
        
        if widget_rows:
            active_count = sum(1 for r in review_registry if r['is_active'])
            no_account_count = sum(1 for r in review_registry if "Not Found" in r['ad_status'])
            inactive_count = len(review_registry) - active_count - no_account_count
            
            summary_html = f"<div style='background: linear-gradient(135deg, #e8eaf6 0%, #c5cae9 100%); padding: 15px; margin: 10px 0 20px 0; border-radius: 8px; border-left: 5px solid #3f51b5; box-shadow: 0 2px 8px rgba(0,0,0,0.1);'><h4 style='margin: 0 0 10px 0; color: #3f51b5;'>\ud83d\udcca Manual Review Summary</h4><div style='display: flex; justify-content: space-around;'><div style='text-align: center;'><div style='font-size: 24px; font-weight: bold; color: #4caf50;'>{active_count}</div><div style='font-size: 11px; color: #666;'>Active</div></div><div style='text-align: center;'><div style='font-size: 24px; font-weight: bold; color: #f44336;'>{no_account_count}</div><div style='font-size: 11px; color: #666;'>No Account</div></div><div style='text-align: center;'><div style='font-size: 24px; font-weight: bold; color: #ffc107;'>{inactive_count}</div><div style='font-size: 11px; color: #666;'>Inactive</div></div><div style='text-align: center;'><div style='font-size: 24px; font-weight: bold; color: #2196f3;'>{len(widget_rows)}</div><div style='font-size: 11px; color: #666;'>Total</div></div></div></div>"
            
            with out_area:
                print(f"\n\ud83d\udd0d Manual Review Required: {len(review_registry)} users")
                display(header_box)
                display(widgets.HTML(summary_html))
                for w in widget_rows: display(w)
        else:
            with out_area: print(f"\u2705 All {len(final_rows)} users matched!")
        
        btn_save.disabled = False
    except Exception as e:
        with out_area: print(f"\u274c Error: {str(e)}")
        logger.error(f"Process error: {str(e)}", exc_info=True)
    finally: btn_process.disabled = False

# --- 5. Logic: Save ---
def do_save(b):
    if not review_registry and processed_df is None:
        with out_area: print("\u274c Error: No data to save.")
        return
    b.disabled = True
    try:
        manual = []
        for r in review_registry:
            if not r['chk'].value:
                manual.append({'User Name': r['name'], 'User Email': r['email'], 'AD Name': r['ad_name'], 'Department': r['ad_dept'], 'Reviewer': r['res'].value, 'AD Status': r['ad_status']})
        
        df_manual = pd.DataFrame(manual)
        df_final = pd.concat([processed_df, df_manual], ignore_index=True) if processed_df is not None else df_manual
        
        fname_base = current_fname.replace('.csv', '').replace('.xlsx', '')
        fname = f"{fname_base}_review_{datetime.now().strftime('%Y%m%d_%H%M')}.xlsx"
        fpath = os.path.join(BASE_DIR, fname)
        
        df_final.to_excel(fpath, index=False, sheet_name='Review')
        
        wb = load_workbook(fpath)
        ws = wb.active
        col = None
        for i, c in enumerate(df_final.columns, 1):
            if 'action' in str(c).lower() or 'decision' in str(c).lower():
                col = i; break
        
        if col:
            dv = DataValidation(type="list", formula1='"Approved,Denied,Changes Required"', allow_blank=True)
            ws.add_data_validation(dv)
            letter = get_column_letter(col)
            dv.add(f"{letter}2:{letter}{len(df_final)+1}")
            
        wb.save(fpath)
        with out_area: display(HTML(f"<b style='color:blue;'>\u2714 Saved: {fpath}</b>"))
        logger.info(f"\u2705 Saved: {fname}, {len(df_final)} records")
    except Exception as e:
        with out_area: print(f"\u274c Save Error: {str(e)}")
        logger.error(f"Save error: {str(e)}", exc_info=True)
    finally: b.disabled = False

# --- 6. Final UI ---
btn_process.on_click(do_process); btn_save.on_click(do_save)
ui = widgets.VBox([widgets.HTML("<h4>\ud83d\udee1\ufe0f AER Generator (v6.5 Auto-Resolve)</h4>"), widgets.HBox([up_list, txt_list]), widgets.HBox([up_map, txt_map]), widgets.HBox([btn_process, btn_save]), resolver_area, out_area])
clear_output(); display(ui)