<a href="https://colab.research.google.com/github/darshanananth/LINE_DETECTION/blob/main/gemini.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# @title 1) Install dependencies (quiet)
!pip -q install --upgrade pip
!pip -q install kornia kornia-rs opencv-python matplotlib networkx

import sys, platform, torch
print(f"Python {sys.version.split()[0]} | PyTorch {torch.__version__} | CUDA? {torch.cuda.is_available()} | {platform.platform()}")

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.8 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.2/1.8 MB[0m [31m7.9 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m1.8/1.8 MB[0m [31m29.0 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m22.7 MB/s[0m eta [36m0:00:00[0m
[?25hPython 3.12.12 | PyTorch 2.10.0+cpu | CUDA? False | Linux-6.6.113+-x86_64-with-glibc2.35


In [2]:
# @title 2) Imports & helper functions (I/O, drawing, geometry)
from typing import List, Tuple, Dict
import cv2, json, math, numpy as np
import torch
import kornia as K
import kornia.feature as KF
import networkx as nx
from pathlib import Path
import matplotlib.pyplot as plt

def load_image_rgba(path: str):
    """
    Read an image with OpenCV (BGR), convert to RGB float32 in [0,1].
    """
    bgr = cv2.imread(path, cv2.IMREAD_COLOR)
    if bgr is None:
        raise FileNotFoundError(f"Could not read: {path}")
    rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)
    rgb_f = (rgb.astype(np.float32) / 255.0)
    return rgb_f, bgr  # rgb_f for Kornia, bgr for OpenCV drawing if needed

def prepare_for_sold2(rgb_f: np.ndarray, target_max_side: int = 800):
    """
    Always return a torch tensor T with shape [1,1,H,W], float32 in [0,1],
    optionally resized so max(H,W) <= target_max_side.

    Notes:
    - SOLD² (config=None) is tuned for ~300–800 px per side for best OOTB results.  # [3](https://github.com/cvg/SOLD2)
    - Kornia/SOLD² expects a grayscale batch [B,1,H,W].                             # [1](https://www.kornia.org/tutorials/nbs/line_detection_and_matching_sold2.html)[2](https://kornia.readthedocs.io/en/latest/models/sold2.html)
    """
    H, W = rgb_f.shape[:2]
    scale = 1.0
    if max(H, W) > target_max_side:
        scale = target_max_side / max(H, W)
        new_h, new_w = int(round(H * scale)), int(round(W * scale))
        rgb_f = cv2.resize(rgb_f, (new_w, new_h), interpolation=cv2.INTER_AREA)

    # Convert to tensor; handle both RGB [H,W,3] and grayscale [H,W].
    t = K.image_to_tensor(rgb_f, keepdim=False)  # -> [C,H,W] OR [1, H, W] if grayscale
    if t.dim() == 3:                              # [C,H,W] -> [1,C,H,W]
        t = t.unsqueeze(0)
    elif t.dim() == 4:
        pass  # already [B,C,H,W]
    else:
        raise RuntimeError(f"Unexpected tensor dims: {t.shape}")

    # Ensure 1 channel (grayscale)
    if t.shape[1] == 1:
        # already grayscale [B,1,H,W]
        pass
    elif t.shape[1] == 3:
        t = K.color.rgb_to_grayscale(t)          # -> [B,1,H,W]
    else:
        # If unusual number of channels, reduce to 1 via mean as a fallback.
        t = t.mean(1, keepdim=True)

    # Final safety check: [B,1,H,W]
    if not (t.dim() == 4 and t.shape[1] == 1):
        raise RuntimeError(f"Expected [B,1,H,W], got {t.shape}")

    return t, scale, rgb_f  # return the (possibly resized) RGB float image too

def ij_to_xy(lines_ij: np.ndarray) -> np.ndarray:
    """
    Kornia/SOLD² returns line segments as (N,2,2) in ij order (row, col).  # [3](https://www.kornia.org/tutorials/nbs/line_detection_and_matching_sold2.html)
    Convert to xy for OpenCV drawing.
    """
    l = lines_ij
    xy = np.zeros_like(l)
    xy[:,0,0] = l[:,0,1]; xy[:,0,1] = l[:,0,0]  # (x0,y0) = (j0,i0)
    xy[:,1,0] = l[:,1,1]; xy[:,1,1] = l[:,1,0]  # (x1,y1) = (j1,i1)
    return xy

def draw_lines_on_bgr(bgr: np.ndarray, lines_xy: np.ndarray, color=(0,0,255), thickness=2):
    out = bgr.copy()
    for seg in lines_xy:
        (x0,y0),(x1,y1) = seg.astype(int)
        cv2.line(out, (x0,y0), (x1,y1), color, thickness, cv2.LINE_AA)
    return out

def save_json_lines(path: str, lines_xy: np.ndarray, scale_back: float = 1.0):
    """
    Save endpoints to JSON: [{"x1":..,"y1":..,"x2":..,"y2":..}, ...]
    If you resized for inference, set scale_back=1/scale to map back to the original image size.
    """
    sb = float(scale_back)
    items = [{"x1": float(seg[0,0])*sb, "y1": float(seg[0,1])*sb,
              "x2": float(seg[1,0])*sb, "y2": float(seg[1,1])*sb} for seg in lines_xy]
    Path(path).parent.mkdir(parents=True, exist_ok=True)
    with open(path, "w") as f:
        json.dump({"lines": items}, f, indent=2)

def scale_lines(lines_xy: np.ndarray, factor: float) -> np.ndarray:
    """Scale line coordinates by a factor."""
    return lines_xy * float(factor)

def snap_points(points: np.ndarray, eps: float = 5.0) -> np.ndarray:
    """Merge nearby points within eps (pixels); returns snapped (M,2) array."""
    if len(points) == 0:
        return points
    pts = points.copy()
    used = np.zeros(len(pts), dtype=bool)
    clusters = []
    for i, p in enumerate(pts):
        if used[i]: continue
        cluster = [i]
        for j in range(i+1, len(pts)):
            if used[j]: continue
            if np.linalg.norm(pts[j] - p) <= eps:
                cluster.append(j); used[j] = True
        used[i] = True
        clusters.append(np.mean(pts[cluster], axis=0))
    return np.array(clusters, dtype=np.float32)

def build_connectivity_graph(lines_xy: np.ndarray, snap_eps: float = 5.0) -> nx.Graph:
    """Endpoints -> nodes (snapped); segments -> edges."""
    G = nx.Graph()
    if lines_xy.size == 0:
        return G
    endpoints = lines_xy.reshape(-1, 2)              # (2N,2)
    snapped = snap_points(endpoints, eps=snap_eps)   # (M,2)

    def nearest_idx(p):
        d = np.linalg.norm(snapped - p, axis=1)
        return int(np.argmin(d))

    for idx, (x,y) in enumerate(snapped):
        G.add_node(idx, x=float(x), y=float(y))
    for (x0,y0),(x1,y1) in lines_xy:
        u = nearest_idx(np.array([x0,y0], dtype=np.float32))
        v = nearest_idx(np.array([x1,y1], dtype=np.float32))
        if u != v:
            G.add_edge(u, v)
    return G

def show_image(title, img_rgb):
    plt.figure(figsize=(10,6))
    plt.imshow(img_rgb)
    plt.title(title)
    plt.axis('off')
    plt.show()

In [None]:
# @title 3) Load SOLD² (pretrained) — uses Kornia integration
device = 'cuda' if torch.cuda.is_available() else 'cpu'
sold2 = KF.SOLD2(pretrained=True, config=None).to(device).eval()
# Notes:
# - SOLD² is integrated into Kornia and can be loaded with pretrained weights as above.  # [1](https://github.com/cvg/SOLD2)[2](https://kornia.readthedocs.io/en/latest/models/sold2.html)
# - Kornia’s official tutorial uses this API and expects grayscale batches [B,1,H,W].      # [3](https://www.kornia.org/tutorials/nbs/line_detection_and_matching_sold2.html)

Downloading: "http://cmp.felk.cvut.cz/~mishkdmy/models/sold2_wireframe.pth" to /root/.cache/torch/hub/checkpoints/sold2_wireframe.pth


100%|██████████| 140M/140M [00:05<00:00, 26.6MB/s]


In [None]:
# @title 4) Quick demo on a sample image
sample_url = "https://github.com/cvg/SOLD2/raw/main/assets/images/terrace0.JPG"  # sample from the repo  # [1](https://github.com/cvg/SOLD2)
!wget -q -O /content/sample.jpg {sample_url}

rgb_f, bgr = load_image_rgba("/content/sample.jpg")
t, scale, resized_rgb = prepare_for_sold2(rgb_f, target_max_side=800)

# Sanity check: must be [1,1,H,W]
print("Input tensor shape to SOLD²:", t.shape)
assert t.dim() == 4 and t.shape[1] == 1, "Expected [B,1,H,W]"

with torch.inference_mode():
    outputs = sold2(t.to(device))

# Extract lines (ij -> xy) for drawing
line_segs_ij = outputs["line_segments"][0].detach().cpu().numpy()  # (N,2,2) in ij  # [3](https://www.kornia.org/tutorials/nbs/line_detection_and_matching_sold2.html)
lines_xy = ij_to_xy(line_segs_ij)

# Draw overlay on the (possibly resized) image
resized_bgr = cv2.cvtColor((resized_rgb*255).astype(np.uint8), cv2.COLOR_RGB2BGR)
overlay_bgr = draw_lines_on_bgr(resized_bgr, lines_xy, (0,0,255), 2)
overlay_rgb = cv2.cvtColor(overlay_bgr, cv2.COLOR_BGR2RGB)
show_image("SOLD² detected lines (red overlay)", overlay_rgb)

# Save overlay and JSON (at resized scale)
cv2.imwrite("/content/sample_sold2_overlay.png", overlay_bgr)
save_json_lines("/content/sample_sold2_lines.json", lines_xy, scale_back=1.0)
print("Saved:", "/content/sample_sold2_overlay.png", "and", "/content/sample_sold2_lines.json")

# (Optional) If you want lines at original resolution for downstream use:
if scale != 1.0:
    lines_xy_orig = scale_lines(lines_xy, factor=1.0/scale)
    save_json_lines("/content/sample_sold2_lines_original_size.json", lines_xy_orig, scale_back=1.0)
    print("Also saved original-size coordinates:", "/content/sample_sold2_lines_original_size.json")

In [None]:
# @title 5) Run SOLD² on your own images (upload)
from google.colab import files
uploaded = files.upload()  # select one or more diagram images

results = []
for fname in uploaded.keys():
    rgb_f, bgr = load_image_rgba(fname)
    t, scale, resized_rgb = prepare_for_sold2(rgb_f, target_max_side=1200)  # increase if lines are very thin
    print(f"\n{fname}: tensor {t.shape}, scale={scale:.3f}")

    with torch.inference_mode():
        outputs = sold2(t.to(device))

    line_segs_ij = outputs["line_segments"][0].detach().cpu().numpy()
    lines_xy = ij_to_xy(line_segs_ij)

    # Overlay on resized image
    resized_bgr = cv2.cvtColor((resized_rgb*255).astype(np.uint8), cv2.COLOR_RGB2BGR)
    overlay_bgr = draw_lines_on_bgr(resized_bgr, lines_xy, (0,0,255), 2)
    overlay_rgb = cv2.cvtColor(overlay_bgr, cv2.COLOR_BGR2RGB)
    show_image(f"{fname} — SOLD² overlay", overlay_rgb)

    out_png  = f"/content/{Path(fname).stem}_sold2_overlay.png"
    out_json = f"/content/{Path(fname).stem}_sold2_lines.json"
    cv2.imwrite(out_png, overlay_bgr)
    save_json_lines(out_json, lines_xy, scale_back=1.0)

    # (Optional) also export original-resolution coordinates
    if scale != 1.0:
        out_json_orig = f"/content/{Path(fname).stem}_sold2_lines_original_size.json"
        save_json_lines(out_json_orig, scale_lines(lines_xy, 1.0/scale), scale_back=1.0)

    results.append((out_png, out_json))
print("\nDone.")

In [None]:
# @title 6) (Optional) Build a simple connectivity graph for the last image
# Treat each segment’s endpoints as nodes (snapped), segments as edges.
# You can later connect detected symbols to the nearest node.

# If you just ran cell 5, 'lines_xy' refers to the last processed file.
G = build_connectivity_graph(lines_xy, snap_eps=5.0)
print(f"Graph: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges")

# Visualize graph on a blank canvas (same size as resized image used for overlay)
H, W = overlay_rgb.shape[:2]
canvas = np.ones((H, W, 3), dtype=np.uint8) * 255
for (u, v) in G.edges():
    x0, y0 = int(G.nodes[u]['x']), int(G.nodes[u]['y'])
    x1, y1 = int(G.nodes[v]['x']), int(G.nodes[v]['y'])
    cv2.line(canvas, (x0,y0), (x1,y1), (0,0,0), 1, cv2.LINE_AA)
for n in G.nodes():
    x, y = int(G.nodes[n]['x']), int(G.nodes[n]['y'])
    cv2.circle(canvas, (x,y), 3, (0,0,255), -1, cv2.LINE_AA)

show_image("Connectivity Graph (nodes + edges)", cv2.cvtColor(canvas, cv2.COLOR_BGR2RGB))

# Export graph as JSON
graph_path = "/content/sold2_graph.json"
with open(graph_path, "w") as f:
    json.dump({
        "nodes": [{"id": int(n), "x": float(G.nodes[n]["x"]), "y": float(G.nodes[n]["y"])} for n in G.nodes()],
        "edges": [{"u": int(u), "v": int(v)} for u, v in G.edges()]
    }, f, indent=2)
print("Saved graph to:", graph_path)