## 📌 CSV to JSON Converter for Test Prices
This notebook reads a CSV file containing test categories and prices, and converts it into a structured JSON file.

In [10]:
import csv
import json
import re
import pandas as pd

### Step 1: Define Helper Function
Function to detect if a row is a **category header**.

In [11]:
def is_category(text):
    """Checks if the row represents a category."""
    return bool(text) and text.strip() == text.strip().upper() and not text.strip().isdigit()

### Step 2: Main Function to Convert CSV → JSON

In [12]:
def csv_to_json(csv_file, json_file):
    # Load and skip first junk rows until 'TEST' appears
    df = pd.read_csv(csv_file, skiprows=4, names=["TEST", "PRICE"], usecols=[0, 1])
    data = []
    current_category = None

    for _, row in df.iterrows():
        test_name = str(row["TEST"]).strip() if pd.notna(row["TEST"]) else ""
        price_raw = str(row["PRICE"]).strip() if pd.notna(row["PRICE"]) else ""

        if is_category(test_name) and not price_raw:
            current_category = test_name.title()
        elif test_name and price_raw:
            try:
                price = int(re.sub(r"[^\d]", "", price_raw))
            except ValueError:
                price = None
            data.append({
                "category": current_category,
                "test": test_name.title(),
                "price": price
            })

    with open(json_file, "w", encoding="utf-8") as f:
        json.dump(data, f, ensure_ascii=False, indent=2)

    print(f"✅ Successfully wrote {len(data)} tests to {json_file}")


### Step 3: Run the Conversion
Make sure `tests.csv` exists in the same folder as this notebook before running.

In [13]:
csv_to_json("EPICONSULT_PRICELIST_2025.csv", "tests.json")

✅ Successfully wrote 258 tests to tests.json


In [None]:
import pandas as pd
import json
import re

def is_category_row(row):
    """Category rows have text in col 0, and the other columns are empty/NaN."""
    name = str(row[0]) if pd.notna(row[0]) else ""
    if not name.strip():
        return False
    others_empty = all(pd.isna(row.get(c)) or str(row.get(c)).strip()=="" for c in [1,2,3] if c in row.index)
    # Uppercase-ish headings, but not the literal 'TEST'
    return others_empty and name.strip().upper() == name.strip() and name.strip() != "TEST"

def parse_price(cell):
    """Extract a numeric naira amount from strings like '69,000.00' or '15,000'."""
    if cell is None or (isinstance(cell, float) and pd.isna(cell)):
        return None
    s = str(cell).strip()
    if not s or s in {"?", "N/A", "NA", "-", "--"}:
        return None
    s2 = s.replace("₦", "").replace(",", "")
    m = re.search(r"\d+(?:\.\d+)?", s2)
    if not m:
        return None
    return int(round(float(m.group(0))))

def csv_to_json(csv_file, json_file):
    df = pd.read_csv(csv_file, header=None, dtype=str)

    # Heuristic: pick the non-name column with the most numeric-looking cells as the price column.
    # (In your file this resolves to column index 2.)
    candidate_cols = [c for c in df.columns if c != 0]
    def numeric_score(series):
        s = series.dropna().astype(str)
        return (s.str.contains(r"\d").sum()) / max(len(s), 1)
    price_col = max(candidate_cols, key=lambda c: numeric_score(df[c])) if candidate_cols else None

    data = []
    current_category = None

    for _, row in df.iterrows():
        name = str(row.get(0)) if pd.notna(row.get(0)) else ""

        if is_category_row(row):
            current_category = name.strip().title()
            continue

        if name.strip().upper() in {"TEST", "", "NA"}:
            continue

        if name.strip():
            price = parse_price(row.get(price_col))
            if price is not None or current_category is not None:
                data.append({
                    "category": current_category,
                    "test": name.strip().title(),   # change to name.strip() if you want to preserve acronyms
                    "price": price
                })

    with open(json_file, "w", encoding="utf-8") as f:
        json.dump(data, f, ensure_ascii=False, indent=2)

    print(f"✅ Successfully wrote {len(data)} tests to {json_file}")

# Run:
csv_to_json("EPICONSULT_PRICELIST_2025.pdf", "tests.json")
