In [2]:
import pandas as pd
from pathlib import Path

# ===== EDIT THESE PATHS IF NEEDED =====
BASE_DIR   = Path(r"C:\Users\Julian.Diaz\OneDrive - XENITH CONSULTING PTY LTD\Documents\05_Geodatabases\00_Projects\99_IsaNorth_CaseStudy")
INPUT_CSV  = BASE_DIR / "Grid.csv"   # your exported CSV
OUTPUT_XLS = BASE_DIR / "Grid_ranking_by_company.xlsx"
# ======================================

# Read CSV
df = pd.read_csv(INPUT_CSV)

# Sanity check: show columns once
print("Columns:", list(df.columns))

# Make sure Ranking is numeric
df["Ranking"] = pd.to_numeric(df["Ranking"], errors="coerce")

def classify_company(row):
    """
    Normalised company label:
    - 'AVAILABLE' if Authorised is empty/NaN
    - '<Authorised>_(a)' if ExpiryDate is empty/NaN (applicant)
    - '<Authorised>' otherwise
    """
    auth_raw = row.get("Authorised", None)
    exp_raw  = row.get("ExpiryDate", None)

    # Empty / NaN Authorised â†’ AVAILABLE
    if pd.isna(auth_raw):
        return "AVAILABLE"
    auth = str(auth_raw).strip()
    if auth == "":
        return "AVAILABLE"

    # Applicant: no ExpiryDate
    if pd.isna(exp_raw) or str(exp_raw).strip() == "":
        return auth + "_(a)"

    # Granted with expiry date
    return auth

# Add normalised company column
df["CompanyNorm"] = df.apply(classify_company, axis=1)

# Group: how many grid cells per Ranking & Company
group = (
    df
    .groupby(["Ranking", "CompanyNorm"])
    .size()
    .reset_index(name="GridCount")
    .sort_values(["Ranking", "GridCount"], ascending=[True, False])
)

# Summary:
#   - NumCompanies per Ranking (excluding AVAILABLE)
#   - NumAvailableCells per Ranking
company_counts = (
    group[group["CompanyNorm"] != "AVAILABLE"]
    .groupby("Ranking")["CompanyNorm"]
    .nunique()
    .reset_index(name="NumCompanies")
)

available_counts = (
    group[group["CompanyNorm"] == "AVAILABLE"]
    .groupby("Ranking")["GridCount"]
    .sum()
    .reset_index(name="NumAvailableCells")
)

summary = pd.merge(company_counts, available_counts, on="Ranking", how="outer").fillna(0)
summary["NumCompanies"] = summary["NumCompanies"].astype(int)
summary["NumAvailableCells"] = summary["NumAvailableCells"].astype(int)
summary = summary.sort_values("Ranking")

# Write to Excel
with pd.ExcelWriter(OUTPUT_XLS, engine="openpyxl") as writer:
    group.to_excel(writer, sheet_name="ByRankingCompany", index=False)
    summary.to_excel(writer, sheet_name="Summary", index=False)

print("Done.")
print("Excel written to:", OUTPUT_XLS)


Columns: ['id', 'left', 'top', 'right', 'bottom', 'row_index', 'col_index', 'source', 'source2', 'evapor', 'cross_stra', 'pinch', 'shortcut', 'seal', 'trap', 'alt', 'leaching', 'occurrence', 'preserv', 'depth', 'p_source', 'p_source2', 'p_evapor', 'p_cross_st', 'p_pinch', 'p_shortcut', 'p_seal', 'p_trap', 'p_alt', 'p_leaching', 'p_occurren', 'p_preserv', 'p_depth', 'c_source', 'c_source2', 'c_evapor', 'c_cross_st', 'c_pinch', 'c_shortcut', 'c_seal', 'c_trap', 'c_alt', 'c_leaching', 'c_occurren', 'c_preserv', 'c_depth', 'row', 'col', 'cell_code', 'Prospectiv', 'Confidence', 'Fertility', 'Pathway', 'Explorabil', 'energy', 'p_energy', 'c_energy', 'Authorised', 'ExpiryDate', 'Ranking']
Done.
Excel written to: C:\Users\Julian.Diaz\OneDrive - XENITH CONSULTING PTY LTD\Documents\05_Geodatabases\00_Projects\99_IsaNorth_CaseStudy\Grid_ranking_by_company.xlsx
