In [9]:
import math, csv, numpy as np

# Read data
rows = []
with open('data.csv', 'r') as f:
    for name, mmlu, b in csv.reader(f):
        # skip header
        if name == 'Model':
            continue
        rows.append((name, float(mmlu.rstrip('%')), float(b)))

# Determine efficient frontier
def is_dominated(row_i, rows):
    name_i, mmlu_i, b_i = row_i
    for name_j, mmlu_j, b_j in rows:
        if (mmlu_j >= mmlu_i and b_j <= b_i) and (mmlu_j > mmlu_i or b_j < b_i):
            return True
    return False

frontier_names = {name for (name, mmlu, b) in rows if not is_dominated((name, mmlu, b), rows)}

# Dimensions
width, height = 800, 500
margin = 60

# Coordinate transforms
mmlu_vals = [m for _, m, _ in rows]
b_vals = [b for _, _, b in rows]
min_mmlu, max_mmlu = min(mmlu_vals), max(mmlu_vals)
min_b, max_b = min(b_vals), max(b_vals)
log_min_b, log_max_b = math.log10(min_b), math.log10(max_b)

def point_x(m):
    return margin + (m - min_mmlu) / (max_mmlu - min_mmlu) * (width - 2*margin)

def point_y(b):
    # invert y-axis for log scale
    return margin + (1 - (math.log10(b) - log_min_b) / (log_max_b - log_min_b)) * (height - 2*margin)

# Points list (name, px, py)
points = [(name, point_x(m), point_y(b)) for (name, m, b) in rows]

# Split into left (non-frontier) and right (frontier) groups
left_points = [p for p in points if p[0] not in frontier_names]
right_points = [p for p in points if p[0] in frontier_names]

# Sort each group by vertical position (py)
left_sorted = sorted(left_points, key=lambda p: p[2])
right_sorted = sorted(right_points, key=lambda p: p[2])

# Compute centre of plotting area and spacing between stacked labels
centre_y = margin + (height - 2*margin) / 2
n_left = len(left_sorted)
n_right = len(right_sorted)
spacing_left = (height - 2*margin) / (n_left - 1) if n_left > 1 else 0
spacing_right = (height - 2*margin) / (n_right - 1) if n_right > 1 else 0
mid_left = (n_left - 1) / 2 if n_left > 0 else 0
mid_right = (n_right - 1) / 2 if n_right > 0 else 0

# Font sizes: adjust left labels to fit vertically
base_font_size = 10
if n_left > 0:
    max_label_height = (height - 2*margin) / n_left
    font_size_left = min(base_font_size, max(6, max_label_height * 0.8))
else:
    font_size_left = base_font_size
font_size_right = 10  # keep right labels consistent
char_width = 0.6

# Build dictionaries for labels with their positions and dimensions
left_labels = []
for i, (name, px, py) in enumerate(left_sorted):
    y_label = centre_y + (i - mid_left) * spacing_left
    w = len(name) * font_size_left * char_width
    h = font_size_left
    x_label = margin + 5
    left_labels.append({
        'name': name,
        'point': (px, py),
        'label_pos': (x_label, y_label),
        'width': w,
        'height': h,
        'font_size': font_size_left
    })

right_labels = []
for i, (name, px, py) in enumerate(right_sorted):
    y_label = centre_y + (i - mid_right) * spacing_right
    w = len(name) * font_size_right * char_width
    h = font_size_right
    x_label = width - margin - 5
    right_labels.append({
        'name': name,
        'point': (px, py),
        'label_pos': (x_label, y_label),
        'width': w,
        'height': h,
        'font_size': font_size_right
    })

# Build bounding boxes for labels used for collision detection
# Format: (xmin, ymin, xmax, ymax) keyed by name
boxes = {}
for lbl in left_labels:
    name = lbl['name']
    lx, ly = lbl['label_pos']
    w, h = lbl['width'], lbl['height']
    # left labels anchored from left, so box extends to the right
    boxes[name] = (lx, ly - h / 2, lx + w, ly + h / 2)
for lbl in right_labels:
    name = lbl['name']
    lx, ly = lbl['label_pos']
    w, h = lbl['width'], lbl['height']
    # right labels anchored from right, so box extends to the left
    boxes[name] = (lx - w, ly - h / 2, lx, ly + h / 2)

# Function to check if a line segment between two points intersects any label box except for the label's own
def segment_collision(seg_from, seg_to, exclude_name):
    x1, y1 = seg_from
    x2, y2 = seg_to
    xs = np.linspace(x1, x2, 30)
    ys = np.linspace(y1, y2, 30)
    for xi, yi in zip(xs, ys):
        for name, (xmin, ymin, xmax, ymax) in boxes.items():
            if name == exclude_name:
                continue
            if xmin < xi < xmax and ymin < yi < ymax:
                return True
    return False

# Determine connector anchors for left labels (choose top-right or bottom-right)
left_connectors = []
for lbl in left_labels:
    name = lbl['name']
    px, py = lbl['point']
    lx, ly = lbl['label_pos']
    w, h = lbl['width'], lbl['height']
    top_anchor = (lx + w, ly - h / 2)
    bottom_anchor = (lx + w, ly + h / 2)
    top_coll = segment_collision((px, py), top_anchor, name)
    bot_coll = segment_collision((px, py), bottom_anchor, name)
    if not top_coll:
        anchor = top_anchor
    elif not bot_coll:
        anchor = bottom_anchor
    else:
        anchor = top_anchor if abs(py - top_anchor[1]) < abs(py - bottom_anchor[1]) else bottom_anchor
    left_connectors.append((name, (px, py), anchor))

# Determine connector anchors for right labels (choose top-left or bottom-left)
right_connectors = []
for lbl in right_labels:
    name = lbl['name']
    px, py = lbl['point']
    lx, ly = lbl['label_pos']
    w, h = lbl['width'], lbl['height']
    top_anchor = (lx - w, ly - h / 2)
    bottom_anchor = (lx - w, ly + h / 2)
    top_coll = segment_collision((px, py), top_anchor, name)
    bot_coll = segment_collision((px, py), bottom_anchor, name)
    if not top_coll:
        anchor = top_anchor
    elif not bot_coll:
        anchor = bottom_anchor
    else:
        anchor = top_anchor if abs(py - top_anchor[1]) < abs(py - bottom_anchor[1]) else bottom_anchor
    right_connectors.append((name, (px, py), anchor))

# Begin constructing SVG lines. Attribute values are quoted to produce valid XML.
svg = []
# SVG header
svg.append(f'<svg width="{width}" height="{height}" xmlns="http://www.w3.org/2000/svg" font-family="sans-serif">')
# Background
svg.append(f'<rect x="0" y="0" width="{width}" height="{height}" fill="white"/>')
# Axes
x_axis_y = point_y(min_b)
svg.append(f'<line x1="{margin}" y1="{x_axis_y}" x2="{width - margin}" y2="{x_axis_y}" stroke="black"/>')
y_axis_x = margin
svg.append(f'<line x1="{y_axis_x}" y1="{margin}" x2="{y_axis_x}" y2="{height - margin}" stroke="black"/>')
# X-axis ticks and labels
for tick in range(int(min_mmlu), int(max_mmlu) + 1, 5):
    tx = point_x(tick)
    svg.append(f'<line x1="{tx}" y1="{x_axis_y}" x2="{tx}" y2="{x_axis_y + 5}" stroke="black"/>')
    svg.append(f'<text x="{tx}" y="{x_axis_y + 20}" font-size="8" text-anchor="middle">{tick}</text>')
# Y-axis ticks and labels for each power of 10
for exponent in range(int(math.floor(log_min_b)), int(math.ceil(log_max_b)) + 1):
    val = 10 ** exponent
    ty = point_y(val)
    svg.append(f'<line x1="{y_axis_x}" y1="{ty}" x2="{y_axis_x - 5}" y2="{ty}" stroke="black"/>')
    label_str = f'10^{exponent}'
    svg.append(f'<text x="{y_axis_x - 10}" y="{ty + 3}" font-size="8" text-anchor="end">{label_str}</text>')
# Draw data points
for name, px, py in points:
    color = 'red' if name in frontier_names else 'blue'
    svg.append(f'<circle cx="{px}" cy="{py}" r="3" fill="{color}"/>')
# Draw efficient frontier polyline if there are at least two points
frontier_list = []
for n, px, py in right_points:
    # find the parameter count value for sorting by b
    for nm, mmlu, b in rows:
        if nm == n:
            frontier_list.append((n, px, py, b))
            break
if len(frontier_list) > 1:
    frontier_list.sort(key=lambda t: t[3])
    parts = []
    for (_, px, py, _) in frontier_list:
        parts.append(f"{px},{py}")
    poly_points = ' '.join(parts)
    svg.append(f'<polyline fill="none" stroke="gray" stroke-width="1" points="{poly_points}"/>')
# Draw connectors and labels for left group
for name, (x1, y1), (x2, y2) in left_connectors:
    svg.append(f'<line x1="{x1}" y1="{y1}" x2="{x2}" y2="{y2}" stroke="gray" stroke-width="0.5"/>')
    lbl = next(l for l in left_labels if l['name'] == name)
    lx, ly = lbl['label_pos']
    fs = lbl['font_size']
    text_y = ly + fs * 0.35
    svg.append(f'<text x="{lx}" y="{text_y}" font-size="{fs}" text-anchor="start">{name}</text>')
# Draw connectors and labels for right group
for name, (x1, y1), (x2, y2) in right_connectors:
    svg.append(f'<line x1="{x1}" y1="{y1}" x2="{x2}" y2="{y2}" stroke="gray" stroke-width="0.5"/>')
    lbl = next(l for l in right_labels if l['name'] == name)
    lx, ly = lbl['label_pos']
    fs = lbl['font_size']
    text_y = ly + fs * 0.35
    svg.append(f'<text x="{lx}" y="{text_y}" font-size="{fs}" text-anchor="end">{name}</text>')
# Chart title and axis titles
svg.append(f'<text x="{width/2}" y="{margin/2}" font-size="14" text-anchor="middle" font-weight="bold">MMLU Score vs. Parameter Count</text>')
svg.append(f'<text x="{width/2}" y="{height - 10}" font-size="10" text-anchor="middle">MMLU Score (%25)</text>')
svg.append(f'<text x="15" y="{height/2}" font-size="10" text-anchor="middle" transform="rotate(-90,15,{height/2})">Parameters (log scale)</text>')
# Close SVG tag
svg.append('</svg>')

# Write SVG to file
with open('graph.svg', 'w') as f_out:
    for line in svg:
        f_out.write(line + '\n')
print('SVG chart saved to graph.svg')


SVG chart saved to graph.svg
