In [None]:
import csv
import math

def load_rows(path):
    rows = []
    with open(path) as f:
        reader = csv.DictReader(f)
        for row in reader:
            mmlu = float(row['MMLU'].strip('%'))
            bparams = float(row['B Params'])
            name = row['Model']
            rows.append((mmlu, bparams, name))
    return rows

def build_frontier(rows, groups):
    frontier = {}
    for _, cond in groups:
        candidates = [r for r in rows if cond(r[1])]
        best = max(candidates, key=lambda r: r[0])
        frontier[best[2]] = best
    return frontier

def overlaps(box, boxes):
    for bx1, by1, bx2, by2 in boxes:
        separate = box[2] < bx1 or box[0] > bx2
        separate = separate or box[3] < by1 or box[1] > by2
        if not separate:
            return True
    return False

def ccw(ax, ay, bx, by, cx, cy):
    return (cy - ay) * (bx - ax) > (by - ay) * (cx - ax)

def lines_intersect(seg1, seg2):
    x1, y1, x2, y2 = seg1
    x3, y3, x4, y4 = seg2
    test1 = ccw(x1, y1, x3, y3, x4, y4)
    test2 = ccw(x2, y2, x3, y3, x4, y4)
    test3 = ccw(x1, y1, x2, y2, x3, y3)
    test4 = ccw(x1, y1, x2, y2, x4, y4)
    return test1 != test2 and test3 != test4

def segment_hits_box(seg, box):
    x1, y1, x2, y2 = seg
    bx1, by1, bx2, by2 = box
    edges = [
        (bx1, by1, bx2, by1),
        (bx2, by1, bx2, by2),
        (bx1, by2, bx2, by2),
        (bx1, by1, bx1, by2),
    ]
    for edge in edges:
        if lines_intersect(seg, edge):
            return True
    start_inside = bx1 <= x1 <= bx2 and by1 <= y1 <= by2
    end_inside = bx1 <= x2 <= bx2 and by1 <= y2 <= by2
    return start_inside or end_inside

def segment_hits_boxes(seg, boxes):
    for box in boxes:
        if segment_hits_box(seg, box):
            return True
    return False

def box_hits_segments(box, segments):
    for seg in segments:
        if segment_hits_box(seg, box):
            return True
    return False

def segment_hits_segments(seg, segments):
    for s in segments:
        if lines_intersect(seg, s):
            return True
    return False

rows = load_rows('data.csv')

groups = [
    ('<10B', lambda b: b < 10),
    ('10-100B', lambda b: 10 <= b < 100),
    ('>100B', lambda b: b >= 100),
]

frontier = build_frontier(rows, groups)
frontier_names = set(frontier.keys())

x_min = 0
x_max = 100
b_min = min(b for _, b, _ in rows if b > 0)
b_max = max(b for _, b, _ in rows)
log_y_min = math.log10(b_min)
log_y_max = math.log10(b_max)

width = 800
height = 600
margin = 60

svg = []
svg.append(f'<svg xmlns="http://www.w3.org/2000/svg" width="{width}" height="{height}">')
svg.append(f'<line x1="{margin}" y1="{height - margin}" x2="{width - margin}" y2="{height - margin}" stroke="black"/>')
svg.append(f'<line x1="{margin}" y1="{height - margin}" x2="{margin}" y2="{margin}" stroke="black"/>')

for i in range(0, 101, 20):
    x = margin + (i - x_min) / (x_max - x_min) * (width - 2 * margin)
    svg.append(f'<line x1="{x}" y1="{height - margin}" x2="{x}" y2="{height - margin - 5}" stroke="black"/>')
    text_y = height - margin + 15
    svg.append(f'<text x="{x}" y="{text_y}" font-size="10" text-anchor="middle">{i}%</text>')

val = 1
while val <= b_max:
    if val >= b_min:
        log_val = math.log10(val)
        y = height - margin - (log_val - log_y_min) / (log_y_max - log_y_min) * (height - 2 * margin)
        svg.append(f'<line x1="{margin}" y1="{y}" x2="{margin + 5}" y2="{y}" stroke="black"/>')
        svg.append(f'<text x="{margin - 5}" y="{y + 4}" font-size="10" text-anchor="end">{val}</text>')
    val *= 10

boxes = []
lines = []
points = []

for mmlu, b, name in rows:
    x = margin + (mmlu - x_min) / (x_max - x_min) * (width - 2 * margin)
    log_y = math.log10(b)
    y = height - margin - (log_y - log_y_min) / (log_y_max - log_y_min) * (height - 2 * margin)
    points.append((name, x, y, mmlu, b))
    text_w = len(name) * 6
    text_h = 10
    label_y = y - 8
    if label_y - text_h < margin:
        label_y = y + text_h + 8
    if name in frontier_names:
        label_x = x + 8
        anchor = 'start'
        box = [label_x, label_y - text_h, label_x + text_w, label_y]
    else:
        label_x = x - 8
        anchor = 'end'
        box = [label_x - text_w, label_y - text_h, label_x, label_y]
    seg = (x, y, label_x, label_y - text_h / 2)
    attempts = 0
    while True:
        conflict = overlaps(box, boxes)
        conflict = conflict or box_hits_segments(box, lines)
        conflict = conflict or segment_hits_boxes(seg, boxes)
        conflict = conflict or segment_hits_segments(seg, lines)
        if not conflict or attempts > 200:
            break
        label_y += text_h + 2
        if name in frontier_names:
            box = [label_x, label_y - text_h, label_x + text_w, label_y]
        else:
            box = [label_x - text_w, label_y - text_h, label_x, label_y]
        seg = (x, y, label_x, label_y - text_h / 2)
        attempts += 1
    boxes.append(box)
    lines.append(seg)
    svg.append(f'<circle cx="{x}" cy="{y}" r="4" fill="blue"/>')
    svg.append(f'<line x1="{x}" y1="{y}" x2="{label_x}" y2="{label_y - text_h / 2}" stroke="gray"/>')
    svg.append(f'<text x="{label_x}" y="{label_y}" font-size="10" text-anchor="{anchor}">{name}</text>')

frontier_coords = []
for name, x, y, mmlu, b in sorted((p for p in points if p[0] in frontier_names), key=lambda p: p[4]):
    frontier_coords.append((x, y))
poly_points = ' '.join(f'{x},{y}' for x, y in frontier_coords)
svg.append(f'<polyline points="{poly_points}" fill="none" stroke="red"/>')

if frontier_coords:
    fx, fy = frontier_coords[1] if len(frontier_coords) > 1 else frontier_coords[0]
    label = 'Efficient Frontier'
    text_w = len(label) * 6
    text_h = 10
    label_x = fx + 10
    label_y = fy - 10
    box = [label_x, label_y - text_h, label_x + text_w, label_y]
    seg = (fx, fy, label_x, label_y - text_h / 2)
    attempts = 0
    while True:
        conflict = overlaps(box, boxes)
        conflict = conflict or box_hits_segments(box, lines)
        conflict = conflict or segment_hits_boxes(seg, boxes)
        conflict = conflict or segment_hits_segments(seg, lines)
        if not conflict or attempts > 200:
            break
        label_y += text_h + 2
        box = [label_x, label_y - text_h, label_x + text_w, label_y]
        seg = (fx, fy, label_x, label_y - text_h / 2)
        attempts += 1
    boxes.append(box)
    lines.append(seg)
    svg.append(f'<line x1="{fx}" y1="{fy}" x2="{label_x}" y2="{label_y - text_h / 2}" stroke="red"/>')
    svg.append(f'<text x="{label_x}" y="{label_y}" font-size="10" fill="red" text-anchor="start">{label}</text>')

svg.append('</svg>')
with open('graph.svg', 'w') as f:
    f.write('\n'.join(svg))
print('graph.svg generated')
