In [None]:
import re
from typing import Tuple
from app_v2_utils import normalize_model_display


def split_model_variant(model_variant: str) -> Tuple[str, str]:
    if not model_variant:
        return "", ""

    cleaned = model_variant.strip()

    # Normalized model (e.g. "I 20" -> "I20")
    model = normalize_model_display(cleaned).strip()

    # Build a flexible regex: allow spaces between every character
    # Example: "I20" -> "I\s*2\s*0"
    spaced_pattern = r"\s*".join(map(re.escape, model))

    # Remove the model from the beginning OR anywhere in string
    pattern = re.compile(rf"^{spaced_pattern}[\s\-:–—]*", re.IGNORECASE)

    variant = pattern.sub("", cleaned).strip()

    return model, variant


var = "I 20 - ACTIVE S VTVT"
split_model_variant(var)

In [None]:
from app_v2_utils import normalize_make_display, normalize_model_display, split_model_variant, init_car_file_entry, merge_insurer_data_into_car_map, load_json_data
from typing import Dict, Any
from pathlib import Path

load_acko_data = load_json_data
load_icici_data = load_json_data
load_cholams_data = load_json_data
load_royal_sundaram_data = load_json_data

def scan_all_car_data() -> Dict[str, Any]:
    """Scan all data files and extract unique makes, models, and variants"""
    extracted_dir = Path("extracted")
    car_data_map = {}
    icici_data_list = []
    cholams_data_list = []
    royal_sundaram_data_list = []

    acko_dir = extracted_dir / "acko"
    if acko_dir.exists():
        for file in acko_dir.glob("*.json"):
            try:
                data = load_acko_data(str(file))
            except Exception:
                continue
            car_info = data.get("car_info", {})
            make_raw = car_info.get("vehicle_make", "").strip()
            model_raw = car_info.get("vehicle_model", "").strip()
            variant = car_info.get("vehicle_variant", "").strip()

            make = normalize_make_display(make_raw)
            model = normalize_model_display(model_raw)

            print("acko" + " " + f"make: {make}, model: {model}, variant: {variant}")

            if make and model and variant:
                key = (make, model, variant)
                if key not in car_data_map:
                    car_data_map[key] = init_car_file_entry()
                car_data_map[key]["acko"].append(
                    {
                        "file": str(file),
                        "claim_status": (
                            file.stem.split("-")[-1]
                            if "-" in file.stem
                            else "not_claimed"
                        ),
                        "registration": car_info.get("registration_number", ""),
                    }
                )

    print("---------------------------------------")

    icici_dir = extracted_dir / "icici"
    if icici_dir.exists():
        for file in icici_dir.glob("*.json"):
            try:
                data = load_icici_data(str(file))
            except Exception:
                continue
            make_raw = data.get("manufacturer", "").strip()
            model_raw = data.get("model", "").strip()

            make = normalize_make_display(make_raw)
            model = normalize_model_display(model_raw)
            _, variant = split_model_variant(model_raw)

            print("icici" + " " + f"make: {make}, model: {model}, variant: {variant}")

            if make and model:
                icici_data_list.append(
                    {
                        "make": make,
                        "model": model,
                        "variant": variant,
                        "file": str(file),
                        "registration": (
                            file.stem.split("-")[0] if "-" in file.stem else ""
                        ),
                    }
                )

    print("---------------------------------------")

    cholams_dir = extracted_dir / "cholams"
    if cholams_dir.exists():
        for file in cholams_dir.glob("*.json"):
            try:
                data = load_cholams_data(str(file))
            except Exception:
                continue
            if isinstance(data, list) and len(data) > 0:
                car_info = data[0] if isinstance(data[0], dict) else {}
                make_raw = car_info.get("make", "").strip()
                model_raw = car_info.get("model", "").strip()
                variant_raw = car_info.get("variant", "").strip()

                make = normalize_make_display(make_raw)
                model = normalize_model_display(model_raw)
                _ , variant = split_model_variant(variant_raw)
                print(
                    "cholams"
                    + " "
                    + f"make: {make}, model: {model}, variant: {variant}"
                )

                if make and model:
                    cholams_data_list.append(
                        {
                            "make": make,
                            "model": model,
                            "variant": variant,
                            "file": str(file),
                            "registration": car_info.get("registration_number", ""),
                        }
                    )

    print("---------------------------------------")

    royal_sundaram_dir = extracted_dir / "royal_sundaram"
    if royal_sundaram_dir.exists():
        for file in royal_sundaram_dir.glob("*.json"):
            try:
                data = load_royal_sundaram_data(str(file))
            except Exception:
                continue
            car_details = data.get("car_details", {}) or {}
            make_raw = car_details.get("manufacturer", "").strip()
            model_variant_raw = car_details.get("model_variant", "").strip()
            model_part, variant_part = split_model_variant(model_variant_raw)

            make = normalize_make_display(make_raw)
            model = normalize_model_display(model_part)
            variant = variant_part

            print(
                "royal_sundaram"
                + " "
                + f"make: {make}, model: {model}, variant: {variant}"
            )

            if make and model:
                royal_sundaram_data_list.append(
                    {
                        "make": make,
                        "model": model,
                        "variant": variant,
                        "file": str(file),
                        "registration": car_details.get("registration_number", ""),
                        "claim_status": (
                            file.stem.split("-")[-1] if "-" in file.stem else ""
                        ),
                    }
                )

    merge_insurer_data_into_car_map(
        car_data_map,
        icici_data_list,
        "icici",
        ["file", "registration"],
    )
    merge_insurer_data_into_car_map(
        car_data_map,
        cholams_data_list,
        "cholams",
        ["file", "registration"],
    )
    merge_insurer_data_into_car_map(
        car_data_map,
        royal_sundaram_data_list,
        "royal_sundaram",
        ["file", "registration"],
        extra_fields_func=lambda entry: {"claim_status": entry["claim_status"]},
    )

    return car_data_map

scan_all_car_data()
