In [3]:
# ==========================================
# Estrazione email per ticket (EARLY BIRD)
# ==========================================
# Requisiti: pandas
# Se non installato:  !pip install pandas

import pandas as pd
from pathlib import Path
from datetime import datetime
import re

# === PARAMETRI ===
csv_path = r"C:\Users\spina\Documents\Other_Codes\7chackras\Documenti\eda_outputs_ListaTicketVenduti_12Nov25\ListaTicketVenduti_12Nov25.csv"

# Imposta a True se vuoi rimuovere email duplicate (es. stessa persona con pi√π ticket)
DEDUP_EMAILS = True

# Nomi colonne attese nel CSV (dalla tua ultima versione)
COL_TICKET_TYPE   = "Ticket Type"
COL_ATT_EMAIL     = "Attendee E-mail"
COL_BUYER_EMAIL   = "Buyer E-Mail"
COL_FIRST_NAME    = "First Name"
COL_LAST_NAME     = "Last Name"
COL_NAME_FALLBACK = "Name"            # full-name eventuale
COL_PAYMENT_DATE  = "Payment Date"

# === FUNZIONI DI SUPPORTO ===
def parse_payment_date(x: str):
    """Tenta di parsare la data pagamento in vari formati noti.
       Ritorna un datetime oppure None."""
    if pd.isna(x):
        return None
    s = str(x).strip()
    # formati pi√π frequenti:
    patterns = [
        ("%d/%m/%Y - %H:%M", r"^\d{2}/\d{2}/\d{4}\s*-\s*\d{2}:\d{2}$"),
        ("%Y-%m-%dT%H:%M",   r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}$"),
        ("%d/%m/%Y",         r"^\d{2}/\d{2}/\d{4}$"),
        ("%d.%m.%Y",         r"^\d{2}\.\d{2}\.\d{4}$"),
        ("%d.%m.%y",         r"^\d{2}\.\d{2}\.\d{2}$"),
        ("%d/%m/%y",         r"^\d{2}/\d{2}/\d{2}$"),
    ]
    for fmt, rx in patterns:
        if re.match(rx, s):
            try:
                return datetime.strptime(s, fmt)
            except Exception:
                pass
    # fallback: prova con dayfirst
    try:
        return pd.to_datetime(s, dayfirst=True, errors="coerce")
    except Exception:
        return None

def build_attendee_name(row):
    fn = str(row.get(COL_FIRST_NAME, "") or "").strip()
    ln = str(row.get(COL_LAST_NAME, "") or "").strip()
    if fn or ln:
        return (fn + " " + ln).strip()
    # fallback su "Name" se presente
    nf = str(row.get(COL_NAME_FALLBACK, "") or "").strip()
    if nf:
        return nf
    # placeholder visibile se mancano entrambi
    return "‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî‚Äî"

def clean_email(s):
    if pd.isna(s):
        return ""
    return str(s).strip().lower()

# === LETTURA CSV ===
df = pd.read_csv(csv_path, encoding="utf-8-sig")

# Verifica colonne minime
missing = [c for c in [COL_TICKET_TYPE, COL_ATT_EMAIL, COL_PAYMENT_DATE] if c not in df.columns]
if missing:
    raise ValueError(f"Mancano colonne obbligatorie nel CSV: {missing}")

# === FILTRO EARLY BIRD ===
# Consideriamo EARLY BIRD come qualunque Ticket Type che contenga "EARLY BIRD" (case-insensitive)
mask_eb = df[COL_TICKET_TYPE].astype(str).str.contains("EARLY BIRD", case=False, na=False)
df_eb = df.loc[mask_eb].copy()

# === COSTRUZIONE CAMPI UTILI ===
df_eb["AttendeeName"] = df_eb.apply(build_attendee_name, axis=1)

# Email: quella del ticket holder; se manca, fallback al buyer
df_eb["EmailTicket"] = df_eb[COL_ATT_EMAIL].apply(clean_email)
if COL_BUYER_EMAIL in df_eb.columns:
    df_eb.loc[df_eb["EmailTicket"]=="", "EmailTicket"] = df_eb.loc[df_eb["EmailTicket"]=="", COL_BUYER_EMAIL].apply(clean_email)

# Data acquisto parsata e ISO
df_eb["PaymentDate_dt"] = df_eb[COL_PAYMENT_DATE].apply(parse_payment_date)
df_eb["PaymentDate_ISO"] = df_eb["PaymentDate_dt"].apply(lambda d: d.isoformat(timespec="minutes") if pd.notna(d) else "")

# Ordina per data acquisto (se disponibile)
df_eb.sort_values(by=["PaymentDate_dt", "AttendeeName"], inplace=True, na_position="last")

# Colonne di output "di servizio" (opzionali, utili per controllo)
service_cols = [
    "AttendeeName",
    "EmailTicket",
    "PaymentDate_ISO",
    COL_TICKET_TYPE,
]

# === CREAZIONE LISTA EMAIL PER FILE TXT ===
emails = df_eb["EmailTicket"].fillna("").tolist()

# Rimuovi stringhe vuote
emails = [e for e in emails if e]

# Dedup opzionale
if DEDUP_EMAILS:
    emails = list(dict.fromkeys(emails))  # preserve order

# === SCRITTURA FILES NELLA STESSA CARTELLA DEL CSV ===
csv_dir = Path(csv_path).parent
txt_out = csv_dir / "attendee_emails_earlybird_DEDUP_EMAILSTrue.txt"
csv_check = csv_dir / "attendee_emails_earlybird_preview_DEDUP_EMAILSTrue.csv"

# Salva TXT: una email per riga
with open(txt_out, "w", encoding="utf-8") as f:
    for e in emails:
        f.write(e + "\n")

# Salva anche un CSV di anteprima (utile per audit)
df_eb.to_csv(csv_check, index=False, encoding="utf-8-sig", columns=[*service_cols])

print(f"‚úÖ Filtrati ticket EARLY BIRD: {len(df_eb)} righe")
print(f"üìß Email salvate in: {txt_out}")
print(f"üßæ Anteprima (con nome/data/tipo): {csv_check}")
print(f"‚ÑπÔ∏è DEDUP_EMAILS = {DEDUP_EMAILS} (cambia a True se vuoi email uniche)")


‚úÖ Filtrati ticket EARLY BIRD: 88 righe
üìß Email salvate in: C:\Users\spina\Documents\Other_Codes\7chackras\Documenti\eda_outputs_ListaTicketVenduti_12Nov25\attendee_emails_earlybird_DEDUP_EMAILSTrue.txt
üßæ Anteprima (con nome/data/tipo): C:\Users\spina\Documents\Other_Codes\7chackras\Documenti\eda_outputs_ListaTicketVenduti_12Nov25\attendee_emails_earlybird_preview_DEDUP_EMAILSTrue.csv
‚ÑπÔ∏è DEDUP_EMAILS = True (cambia a True se vuoi email uniche)
