In [None]:
# final.py

import json
import csv
from typing import List, Optional

from google import genai
from google.genai import types
from pydantic import BaseModel, Field


# ============================================================
#  Data Models
# ============================================================

class CompanyAddress(BaseModel):
    street: str
    city: str
    state: str
    postal_code: str
    country: str


class VerificationResult(BaseModel):
    legal_name: str
    name_source_url: str
    address: CompanyAddress
    phone_number: str
    source_type: str
    analysis: str
    verification_sources: List[str] = Field(default_factory=list)


class Super7Record(BaseModel):
    """
    Example Super-7 model.
    Modify field names if your actual schema differs.
    """
    company_name: str
    country: Optional[str] = None

    street_address: Optional[str] = None
    city: Optional[str] = None
    state: Optional[str] = None
    postal_code: Optional[str] = None
    phone_number: Optional[str] = None

    legal_name: Optional[str] = None
    legal_name_source_url: Optional[str] = None
    legal_name_source_type: Optional[str] = None
    verification_analysis: Optional[str] = None
    evidence_urls: List[str] = Field(default_factory=list)


class CorporateVerificationInput(BaseModel):
    company_name: str
    country: str


# ============================================================
#  Corporate Verifier (Gemini Search Grounding)
# ============================================================

class CorporateVerifier:

    def __init__(self, project_id: str, location: str = "us-central1",
                 model_id: str = "gemini-2.5-flash"):
        self.client = genai.Client(project=project_id, location=location)
        self.model_id = model_id

    def _prompt(self, company_name: str, country: str) -> str:
        return f"""
You are a corporate verification engine.
Return legal name, address, phone, and evidence URLs.

Company: "{company_name}"
Country: "{country}"

Output strictly in JSON:
{{
  "legal_name": "...",
  "name_source_url": "...",
  "address": {{
    "street": "...",
    "city": "...",
    "state": "...",
    "postal_code": "...",
    "country": "..."
  }},
  "phone_number": "...",
  "source_type": "...",
  "analysis": "..."
}}
""".strip()

    def verify(self, company_name: str, country: str) -> VerificationResult:

        prompt = self._prompt(company_name, country)

        response = self.client.models.generate_content(
            model=self.model_id,
            contents=prompt,
            config=types.GenerateContentConfig(
                tools=[types.Tool(google_search=types.GoogleSearch())],
                response_mime_type="application/json",
            ),
        )

        raw = json.loads(response.text)

        # Extract grounding citations
        urls = []
        try:
            c = response.candidates[0]
            gm = c.grounding_metadata
            if gm:
                for chunk in gm.grounding_chunks:
                    if chunk.web and chunk.web.uri:
                        urls.append(chunk.web.uri)
        except:
            pass

        raw["verification_sources"] = list(set(urls))
        return VerificationResult.model_validate(raw)

    def verify_batch(self, inputs: List[CorporateVerificationInput]) -> List[VerificationResult]:
        output = []
        for item in inputs:
            result = self.verify(item.company_name, item.country)
            output.append(result)
        return output


# ============================================================
#  Super-7 Enrichment Logic
# ============================================================

def enrich_single(record: Super7Record, result: VerificationResult) -> Super7Record:

    # Canonical company_name remains untouched
    record.legal_name = result.legal_name
    record.legal_name_source_url = result.name_source_url
    record.legal_name_source_type = result.source_type
    record.verification_analysis = result.analysis

    # Fill missing Super-7 fields only
    if not record.street_address:
        record.street_address = result.address.street
    if not record.city:
        record.city = result.address.city
    if not record.state:
        record.state = result.address.state
    if not record.postal_code:
        record.postal_code = result.address.postal_code
    if not record.country:
        record.country = result.address.country
    if not record.phone_number:
        record.phone_number = result.phone_number

    # Merge evidence URLs
    existing = set(record.evidence_urls)
    for url in result.verification_sources:
        if url not in existing:
            record.evidence_urls.append(url)
            existing.add(url)

    return record


def enrich_batch(records: List[Super7Record], results: List[VerificationResult]) -> List[Super7Record]:
    enriched = []
    for rec, res in zip(records, results):
        enriched.append(enrich_single(rec, res))
    return enriched


# ============================================================
#  CSV Loading + Saving
# ============================================================

def load_csv(path: str) -> List[Super7Record]:
    records = []
    with open(path, newline='', encoding='utf-8') as f:
        reader = csv.DictReader(f)
        for row in reader:
            rec = Super7Record(
                company_name=row.get("company_name"),
                country=row.get("country"),
                street_address=row.get("street_address"),
                city=row.get("city"),
                state=row.get("state"),
                postal_code=row.get("postal_code"),
                phone_number=row.get("phone_number"),
            )
            records.append(rec)
    return records


def save_csv(records: List[Super7Record], path: str):
    if not records:
        return

    fieldnames = list(records[0].model_dump().keys())

    with open(path, "w", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()
        for rec in records:
            writer.writerow(rec.model_dump())


def save_json(records: List[Super7Record], path: str):
    with open(path, "w", encoding="utf-8") as f:
        json.dump([rec.model_dump() for rec in records], f, indent=2)


# ============================================================
#  MAIN (CSV ‚Üí Verified ‚Üí Enriched ‚Üí Output)
# ============================================================

if __name__ == "__main__":
    # üî¥ You manually enter your project id here
    project_id = "YOUR_PROJECT_ID_HERE"  # Example: "my-gcp-vertex-01"

    # Input CSV
    input_csv = "input.csv"

    print("üì• Loading CSV...")
    records = load_csv(input_csv)

    print(f"üîç Running corporate verification for {len(records)} companies...")
    verifier = CorporateVerifier(project_id)

    inputs = [
        CorporateVerificationInput(
            company_name=rec.company_name,
            country=rec.country
        )
        for rec in records
    ]

    results = verifier.verify_batch(inputs)

    print("üîß Enriching Super-7 records...")
    enriched_records = enrich_batch(records, results)

    print("üì§ Saving outputs...")
    save_csv(enriched_records, "output_enriched.csv")
    save_json(enriched_records, "output_enriched.json")

    print("\n‚úî All done!")
    print("‚Üí output_enriched.csv")
    print("‚Üí output_enriched.json")
