In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

plt.rcParams.update({'font.size': 22})

In [None]:
result_df = pd.read_csv("results.csv")
result_df.head()

In [None]:
# model results
models = result_df["model"].unique()

for model_name in models:
  llama_df = result_df[result_df["model"] == model_name]
  llama_df = llama_df[llama_df["framework"] != "paper"]
  llama_df = llama_df[llama_df["dataset"].isin(["winogrande", "arc", "truthfulqa", "hellaswag"])]
  llama_df.loc[llama_df["quantization"].isna(),"quantization"] = "None"
  llama_df = llama_df[llama_df["quantization"].isin(["q4_0", "q4_k", "q3_k", "AWQ", "GPTQ", "None"])]
  ax = sns.barplot(x="accuracy", y="dataset", hue="quantization", hue_order=[label for label in ["None", "q4_0", "q4_k", "q3_k", "AWQ", "GPTQ"] if label in llama_df.quantization.unique()], data=llama_df, orient='h')
  ax.set_title(f"{model_name} quantized accuracy")
  plt.savefig(f"figures/{model_name}_quantized_accuracy.png", dpi=300, bbox_inches='tight')
  plt.show()

In [None]:
specs_df = pd.read_csv("model_specs.csv")
specs_df.loc[specs_df["quantization"].isna(),"quantization"] = "None"
specs_df.head()

In [None]:
from scipy.spatial import ConvexHull
import matplotlib.pyplot as plt
from matplotlib.colors import to_rgba

# Filter the dataframe for the desired datasets
datasets = ["winogrande", "arc", "truthfulqa", "hellaswag"]
filtered_df = result_df[result_df["dataset"].isin(datasets)]
filtered_df = filtered_df[filtered_df["framework"] != "paper"]

# Create a separate plot for each dataset
for dataset in datasets:
  dataset_df = filtered_df[filtered_df["dataset"] == dataset]

  if dataset == "hellaswag":
    dataset_df = dataset_df[dataset_df.model != "gemma-2b"]
    dataset_df = dataset_df[dataset_df.model != "gemma-7b"]
  #dataset_df.loc[dataset_df["quantization"].isna(),"quantization"] = "None"
  print(dataset_df.model.unique())
  dataset_df = dataset_df.merge(specs_df, on=("model","quantization"))

  # Create a single plot for the model in the dataset
  plt.figure()
  ax = sns.scatterplot(x="size_in_memory", y="accuracy", hue="model", style="quantization", data=dataset_df)

  ax.set_title(f"{dataset}: Performance vs Model Size")
  ax.set_xlabel("Model Size (GB)")
  ax.set_ylabel("Accuracy (%)")
  ax.legend(prop={'size': 6}, ncol=2)
  plt.savefig(f"figures/{dataset}_performance_vs_size.pdf", dpi=300, bbox_inches='tight')
  plt.show()

In [None]:
result_df.replace(np.nan, "None", inplace=True)
specs_df.replace(np.nan, "None", inplace=True)
display(result_df.head(5))
display(specs_df.head(5))

In [None]:
from scipy.spatial import ConvexHull
import numpy as np

datasets = ["winogrande", "arc", "truthfulqa", "hellaswag"]
filtered_df = result_df[result_df["dataset"].isin(datasets)]
filtered_df = filtered_df[filtered_df["framework"] != "paper"]

total_datasets = len(datasets)
# Create a separate plot for each dataset
for i, dataset in enumerate(datasets):
  print(dataset)
  dataset_df = filtered_df[filtered_df["dataset"] == dataset]
  dataset_df = dataset_df.merge(specs_df, on=("model","quantization"))
  dataset_df = dataset_df.sort_values(by=["model", "quantization"])

  # Create a single plot for the model in the dataset
  plt.figure(figsize=(10,8))
  ax = sns.scatterplot(x="size_in_memory", y="accuracy", hue="model", style="quantization", data=dataset_df,
                       s=150, alpha=0.8)

  # Draw convex hulls for each model
  models = dataset_df["model"].unique()
  sorted_models = sorted(models)
  for model in sorted_models:
    # if dataset == "hellaswag" and (model == "gemma-7b" or model == "gemma-2b"):
    #   continue
    model_df = dataset_df[dataset_df["model"] == model]
    points = model_df[["size", "accuracy"]].values
    hull = ConvexHull(points)
    hull_points = points[hull.vertices]
    hull_points = np.append(hull_points, hull_points[0:1], axis=0)

    color = sns.color_palette("tab10")[sorted_models.index(model)]
    ax.fill(hull_points[:, 0], hull_points[:, 1], color=to_rgba(color, 0.3))

  # ax.set_title(f"{dataset}: Performance vs Model Size")
  ax.set_xlabel("Model Size (GB)")
  ax.set_ylabel("Accuracy (%)")
  ax.set_xscale('log')
  xticks = [1, 2, 4, 8, 16, 32]
  xticklabels = [f"{size}" if size != 0.5 else "½" for size in xticks]
  ax.set_xticks(xticks)
  ax.set_xticklabels(xticklabels, rotation=0)
  # ax.set_xlim(-0, xticks[-1])
  if dataset == 'arc':
    ax.legend(prop={'size': 14}, ncol=2, loc="lower right",
              columnspacing=-0.3)
  else:
    ax.get_legend().remove()
  ax.grid()
  plt.savefig(f"figures/{dataset}_performance_vs_size.pdf", dpi=300, bbox_inches='tight')
  plt.show()

In [None]:
# Configure pd display
pd.options.display.max_columns = None
pd.options.display.max_rows = None

## Extract tables

In [None]:
result_df.framework.unique()
result_df.quantization.unique()

In [None]:
specs_df.quantization.unique()

In [None]:
mapping_quant_framework = {
    "q3_k": "llamacpp",
    "q4_0": "llamacpp",
    "q4_k": "llamacpp",
    "AWQ": "pytorch",
    "GPTQ": "pytorch",
    "None": "pytorch"
}
specs_df["framework"] = specs_df["quantization"].map(mapping_quant_framework)
specs_df

In [None]:
merged_df = result_df.merge(specs_df, on=("model", "quantization"))
display(merged_df.head(5))

In [None]:
result_df[(result_df.quantization == "None") & (result_df.framework != 'paper') & (result_df.framework == 'pytorch')]

In [None]:
display(result_df[(result_df.quantization == "None") & (result_df.framework != 'paper')].head(10))
display(specs_df[specs_df.quantization == "None"].head(10))

In [None]:
merged_df[merged_df.quantization == "None"].model.unique()

In [None]:
# turn all floats to use 2 decimal places and display only that to string
acc_size_df_all = merged_df.groupby(["model", "quantization", "size_in_memory", "dataset", ])[["accuracy"]].sum()
# display(acc_size_df_all)

for model, group_df in acc_size_df_all.groupby("model").groups.items():
  display(model)
  cols = ['size_in_memory', 'accuracy']
  df_of_interest = acc_size_df_all.loc[group_df].reset_index()[["quantization", "size_in_memory", "dataset", "accuracy"]]
  df_of_interest[cols] = df_of_interest[cols].replace("None", np.nan)
  df_of_interest[cols] = df_of_interest[cols].astype(np.double).applymap(lambda x: f"{x:.2f}" if x else 'None')
  df_of_interest.set_index(["quantization", "size_in_memory", "dataset"], inplace=True)

  with open(f"acc_size_df_{model}.tex", "w") as f:
    f.write(df_of_interest.to_latex().replace('_', '\_'))

with open("acc_size_df_all.tex", "w") as f:
  f.write(acc_size_df_all.to_latex().replace('_', '\_'))