In [1]:
import pandas as pd

data = {
    "MMMU (val)": [
        46.6, 38.8, 41.1, 34.3, 34.9, 32.4, 38.2, 35.8
    ],
    "MathVista (testmini)": [
        58.4, 44.6, 47.8, 46.3, 28.7, 24.3, 39.8, 37.2
    ],
    "MMStar (val)": [
        55.9, 42.1, 47.5, 49.8, 48.3, 40.3, 39.1, 0
    ],
    "DocVQA (test)": [
        87.7, 81.6, 90.1, 86.9, 32.2, 70.5, 71.9, 81
    ],
    "TextVQA (val)": [
        74.9, 72.7, 79.7, 73.4, 56.0, 65.2, 74.1, 72.5
    ]
}

models = [
    "Idefics3 8B", "SmolVLM", "Qwen2-VL 2B", "InternVL2 2B",
    "PaliGemma 3B 448px", "moondream2", "MiniCPM-V-2", "MM1.5 1B", 
]
df = pd.DataFrame(data, index=models)

In [2]:
size = len(df.columns)

for col in df.columns:
    df[col + " rank"] = df[col].rank(ascending=False)
    df[col + " rank"] = (len(df) - (df[col + " rank"] - 1)) / len(df)
df["Win Rate"] = df.iloc[:, size:].mean(axis=1).round(2) * 100

In [3]:
df["RAM usage quantized"] = {
    "Idefics3 8B": 10.79,
    "SmolVLM": 3.14,
    "Qwen2-VL 2B": 11.93,
    "InternVL2 2B": 10.52,
    "PaliGemma 3B 448px": 4.5,
    "moondream2": 3.87,
}
df["RAM usage"] = {
    "Idefics3 8B": 17.67,
    "SmolVLM": 5.02,
    "Qwen2-VL 2B": 13.7,
    "InternVL2 2B": 10.52,
    "PaliGemma 3B 448px": 6.72,
    "moondream2": 3.87,
    "MiniCPM-V-2": 7.88,
}

In [None]:
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from matplotlib.offsetbox import OffsetImage, AnnotationBbox
from matplotlib.patches import Ellipse

def getImage(path, image_target=32, alpha=0.8):
    """Load and resize image to consistent target size"""
    img = plt.imread(path)
    img_zoom = image_target/img.shape[1]  # Calculate zoom to achieve target width
    return OffsetImage(img, zoom=img_zoom, alpha=alpha)


# Dictionary mapping models to their logo paths
logo_paths = {
    "Idefics3 8B": "logos/idefics.png",
    "SmolVLM": "logos/hf.png",
    "Qwen2-VL 2B": "logos/qwen.png",
    "InternVL2 2B": "logos/opengv.png",
    "PaliGemma 3B 448px": "logos/google.png",
    "moondream2": "logos/moondream.jpeg",
    "MiniCPM-V-2": "logos/openbmb.png",
}

# Create figure with appropriate size
fig, ax = plt.subplots(1, 1, figsize=(10, 6), dpi=300)

# Plot each point with its logo
for i, model in enumerate(df.index[:7]):
    x = df['RAM usage'][i]
    y = df['Win Rate'][i]
    
    # Add logo as marker
    if model in logo_paths:
        if model == "SmolVLM":
            logo = getImage(logo_paths[model], image_target=32, alpha=1)
        else:
            logo = getImage(logo_paths[model], image_target=32)
        ab = AnnotationBbox(logo, (x, y), frameon=False)
        ax.add_artist(ab)
    
    # Add model name label below the logo
    ax.annotate(" ".join(model.split(" ")[:2]), 
                (x, y),
                xytext=(0, -20),
                textcoords='offset points',
                fontsize=8,
                ha='center',
                va='top')

    ax.annotate('',
            xy=(4, 95),        # Point where the arrow points
            xytext=(12, 95),  # Position of the text
            fontsize=32,
            #color="gainsboro",
            color="dimgray",
            weight="bold",
            arrowprops=dict(color="dimgray", arrowstyle='-|>', alpha=1, lw=5),
            bbox=None)
    
    ax.text(8, 93, "cheaper/faster",
            #color="gainsboro",
            color="dimgray",
            weight="bold",
            fontsize=12,
            rotation=0,
            ha='center',
            va='top'
            )

    ax.annotate('',
            xy=(3.3, 90),        # Point where the arrow points
            xytext=(3.3, 30),  # Position of the text
            fontsize=48,
            #color="gainsboro",
            color="dimgray",
            weight="bold",
            arrowprops=dict(color="dimgray", arrowstyle='-|>', alpha=1, lw=5),
            bbox=None)
    
    ax.text(3.5, 60, "better",
            #color="gainsboro",
            color="dimgray",
            weight="bold",
            fontsize=12,
            rotation=90,
            ha='left',
            va='center'
            )
    

# Customize the plot
plt.xlabel('RAM Usage (GB)', fontsize=10)

# Create a multi-line y-axis label
ylabel_main = 'Win Rate (%)'
ylabel_sub = 'Measured on 5 popular VLM benchmarks'
ax.text(-0.098, 0.5, ylabel_main, 
        transform=ax.transAxes, 
        rotation=90, 
        fontsize=10, 
        color='black',
        va='center')
ax.text(-0.075, 0.5, ylabel_sub, 
        transform=ax.transAxes, 
        rotation=90, 
        fontsize=8, 
        color='dimgray',
        va='center')

plt.title('Smol Vision Model Ecosystem', fontsize=12, pad=15)

# Add grid and adjust layout
plt.grid(True, linestyle='--', alpha=0.3)

# Adjust the bottom margin to make room for labels
plt.subplots_adjust(bottom=0.2, left=0.2)  # Increased left margin for the new ylabel

# Set axis limits with some padding
x_min, x_max = df['RAM usage'][:7].min(), df['RAM usage'][:7].max()
y_min, y_max = df['Win Rate'][:7].min(), df['Win Rate'][:7].max()
plt.xlim(x_min * 0.7, x_max * 1.1)
plt.ylim(y_min * 0.5, y_max * 1.1)

# Format y-axis ticks to include percentage symbol
ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda y, _: '{:.0f}%'.format(y)))

width = 18*1.75  # Width of the ellipse
height = 100*1.9  # Height of the ellipse
#angle = -15  # Rotation angle in degrees
ellipse = Ellipse((x_max*1.1, 00), width, height, #angle=angle, 
                facecolor="#78bff5", alpha=0.8) #'lightblue' # #78bff5, # #3274B5
ax.add_patch(ellipse)


plt.show()