# 🏇 Unity Filter Colab v4 — ATR PDF → Top-2 per Race

**What’s new in v4**
- Uses `pdfplumber`.
- Step 2: Friendly file picker for uploading PDF.
- Step 3+4: Splits races by `(R1)`, `(R2)` headers so each race is separate.
- Step 5: Only parses runner lines (ignores ATR Verdict, Top Tip, etc).
- Outputs Top-2 selections for *all races*.

In [None]:
# === Step 1: Install dependencies ===
!pip -q install pandas pdfplumber

In [None]:
# === Step 2: Upload ATR PDF (file picker) ===
from google.colab import files

def pick_pdf():
    print("📂 Please upload today’s ATR racecard PDF...")
    uploaded = files.upload()
    pdf_path = next(iter(uploaded.keys()))
    print("✅ Uploaded:", pdf_path)
    return pdf_path

pdf_path = pick_pdf()

In [None]:
# === Step 3+4: Extract races by header markers ===
import pdfplumber, re

def extract_races(path):
    races = []
    current = []
    with pdfplumber.open(path) as pdf:
        for page in pdf.pages:
            text = page.extract_text().splitlines()
            for line in text:
                if re.match(r"^\(R\d+\)", line):  # race header e.g. (R1)
                    if current:
                        races.append(current)
                        current = []
                current.append(line)
        if current:
            races.append(current)
    return races

races = extract_races(pdf_path)
print("✅ Found", len(races), "races")
for i, r in enumerate(races, 1):
    print(f"Race {i}: {len(r)} lines")

In [None]:
# === Step 5: Convert each race to DataFrame ===
import pandas as pd

def parse_race(lines):
    data = []
    for line in lines:
        if not line or not line[0].isdigit():  # only runner lines start with number
            continue
        parts = line.split()
        if len(parts) < 2:
            continue
        try:
            no = parts[0]
            draw = parts[1] if parts[1].startswith("(") else ""
            form = ""
            i = 2 if draw else 1
            # optional form string like "321F" etc
            import re
            if i < len(parts) and re.match(r"[0-9FURP\-]+", parts[i]):
                form = parts[i]
                i += 1
            # collect horse name until digits appear
            name_parts = []
            while i < len(parts) and not parts[i].isdigit():
                name_parts.append(parts[i])
                i += 1
            name = " ".join(name_parts)
            data.append({"name": name, "form": form})
        except:
            continue
    return pd.DataFrame(data)

race_dfs = [parse_race(r) for r in races]
print("✅ Parsed", len(race_dfs), "races")
for i, df in enumerate(race_dfs, 1):
    print(f"Race {i}: {len(df)} runners")

In [None]:
# === Step 6: Unity Filter Wrapper ===
class UnityFilter:
    def score_runner(self, row):
        score = 0.0
        for ch in str(row["form"]):
            if ch == "1": score += 3
            elif ch == "2": score += 2
            elif ch == "3": score += 1
            elif ch.upper() in "FURP": score -= 1
        return score

    def top2(self, race_df):
        df = race_df.copy()
        df["Score"] = df.apply(self.score_runner, axis=1)
        df = df.sort_values("Score", ascending=False).reset_index(drop=True)
        if len(df) >= 2:
            return df.iloc[0], df.iloc[1]
        elif len(df) == 1:
            return df.iloc[0], None
        else:
            return None, None

uf = UnityFilter()

In [None]:
# === Step 7: Run all races and show grid ===
results = []
for i, df in enumerate(race_dfs, start=1):
    if df.empty:
        continue
    top1, top2 = uf.top2(df)
    if top1 is not None:
        results.append({
            "Race #": i,
            "🥇 1st": top1["name"],
            "Score1": top1["Score"],
            "🥈 2nd": top2["name"] if top2 is not None else "",
            "Score2": top2["Score"] if top2 is not None else ""
        })

results_df = pd.DataFrame(results)
results_df

---
### Notes
- v4 reliably detects race headers `(R1)…(R7)` so each race is separated.
- Only runner lines (starting with numbers) are parsed; ATR Verdict etc. ignored.
- Replace `score_runner` with your full Unity logic when ready.