In [1]:
!wandb login 6c2c9e426d778cb5f8ebd597263a155a1c4bcb11

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin


In [2]:
import wandb
import polars as pl
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import re

api = wandb.Api()

entity = "cezary17"
project = "alsatian-quantized"
sweep_id_1 = "unfvnxfk"

[34m[1mwandb[0m: [32m[41mERROR[0m Failed to detect the name of this notebook. You can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mcezary[0m to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [3]:
sweep_1 = api.sweep(f"{entity}/{project}/{sweep_id_1}")
run = sweep_1.runs[0]

In [4]:
training_dataset = run.config['dataset']
model_dataset = run.config['model_dataset']

training_dataset, model_dataset

('imagenette', 'stanford-dogs')

In [5]:
file = run.file("output.log")
file.download(root='resnet152-outputs', replace=True)

<_io.TextIOWrapper name='resnet152-outputs/output.log' mode='r' encoding='UTF-8'>

In [6]:
def get_model_names(line: str):
    if line.startswith("Model snapshots found: "):
        model_names = line.split("Model snapshots found: ")[1]
        return eval(model_names)
    else:
        return None

In [7]:
sanity_check = []
result_lines = []
result_lines_quantized = []
model_names = None

curr_quantized = False
with open('resnet152-outputs/output.log', 'r') as file:
    lines = file.readlines()
    model_names = get_model_names(lines[0])
    print(model_names)
    for line in lines:
        if line.startswith("Loading model:"):
            sanity_check.append(line)
        elif line.startswith("Loss:"):
            if curr_quantized:
                result_lines_quantized.append(line)
            else:
                result_lines.append(line)
            curr_quantized = not curr_quantized
        elif line.startswith("Error loading model"):
            result_lines.append("ERROR")
            result_lines_quantized.append("ERROR")

['resnet152-ri-10-id-eZvv', 'resnet152-ri-10-id-qv0l', 'resnet152-ri-13-id-B6Ih', 'resnet152-ri-18-id-VWDl', 'resnet152-ri-19-id-sAhw', 'resnet152-ri-21-id-4hsB', 'resnet152-ri-7-id-VEzm']


In [8]:
result_lines, result_lines_quantized

(['Loss: 0.2803, Acc: 0.9103\n',
  'Loss: 0.2704, Acc: 0.9162\n',
  'ERROR',
  'ERROR',
  'ERROR',
  'ERROR',
  'ERROR'],
 ['Loss: 0.2971, Acc: 0.9093\n',
  'Loss: 0.2853, Acc: 0.9136\n',
  'ERROR',
  'ERROR',
  'ERROR',
  'ERROR',
  'ERROR'])

In [9]:
for i, line in enumerate(sanity_check):
    if model_names[i] not in line:
        print(f"Error in line {i}: {line}")

metrics_original = [
    (float(match.group(1)), float(match.group(2)))
    if (match := re.search(r"Loss: ([\d.]+), Acc: ([\d.]+)", line))
    else (np.nan, np.nan)
    for line in result_lines
]

metrics_quantized = [
    (float(match.group(1)), float(match.group(2)))
    if (match := re.search(r"Loss: ([\d.]+), Acc: ([\d.]+)", line))
    else (np.nan, np.nan)
    for line in result_lines_quantized
]

metrics_original, metrics_quantized

([(0.2803, 0.9103),
  (0.2704, 0.9162),
  (nan, nan),
  (nan, nan),
  (nan, nan),
  (nan, nan),
  (nan, nan)],
 [(0.2971, 0.9093),
  (0.2853, 0.9136),
  (nan, nan),
  (nan, nan),
  (nan, nan),
  (nan, nan),
  (nan, nan)])

In [10]:
result_tups = zip(model_names, metrics_original, metrics_quantized)
result_tups = list(result_tups)
result_tups

[('resnet152-ri-10-id-eZvv', (0.2803, 0.9103), (0.2971, 0.9093)),
 ('resnet152-ri-10-id-qv0l', (0.2704, 0.9162), (0.2853, 0.9136)),
 ('resnet152-ri-13-id-B6Ih', (nan, nan), (nan, nan)),
 ('resnet152-ri-18-id-VWDl', (nan, nan), (nan, nan)),
 ('resnet152-ri-19-id-sAhw', (nan, nan), (nan, nan)),
 ('resnet152-ri-21-id-4hsB', (nan, nan), (nan, nan)),
 ('resnet152-ri-7-id-VEzm', (nan, nan), (nan, nan))]

In [11]:
aggregated_data = []

for run in sweep_1.runs:
    training_dataset = run.config.get('dataset')
    model_dataset = run.config.get('model_dataset')

    file = run.file("output.log")
    file.download(root='resnet152-outputs', replace=True)

    with open('resnet152-outputs/output.log', 'r') as file:
        lines = file.readlines()
        model_names = get_model_names(lines[0])

        result_lines = []
        result_lines_quantized = []
        curr_quantized = False

        for line in lines:
            if line.startswith("Loss:"):
                if curr_quantized:
                    result_lines_quantized.append(line)
                else:
                    result_lines.append(line)
                curr_quantized = not curr_quantized
            elif line.startswith("Error loading model"):
                result_lines.append("ERROR")
                result_lines_quantized.append("ERROR")

        metrics_original = [
            (float(match.group(1)), float(match.group(2)))
            if (match := re.search(r"Loss: ([\d.]+), Acc: ([\d.]+)", line))
            else (np.nan, np.nan)
            for line in result_lines
        ]

        metrics_quantized = [
            (float(match.group(1)), float(match.group(2)))
            if (match := re.search(r"Loss: ([\d.]+), Acc: ([\d.]+)", line))
            else (np.nan, np.nan)
            for line in result_lines_quantized
        ]

        for model_name, (loss_orig, acc_orig), (loss_quant, acc_quant) in zip(model_names, metrics_original, metrics_quantized):
            aggregated_data.append({
                "training_dataset": training_dataset,
                "model_dataset": model_dataset,
                "model_name": model_name,
                "original_accuracy": acc_orig,
                "quantized_accuracy": acc_quant
            })

aggregated_df = pl.DataFrame(aggregated_data)

aggregated_df

training_dataset,model_dataset,model_name,original_accuracy,quantized_accuracy
str,str,str,f64,f64
"""imagenette""","""stanford-dogs""","""resnet152-ri-10-id-eZvv""",0.9103,0.9093
"""imagenette""","""stanford-dogs""","""resnet152-ri-10-id-qv0l""",0.9162,0.9136
"""imagenette""","""stanford-dogs""","""resnet152-ri-13-id-B6Ih""",,
"""imagenette""","""stanford-dogs""","""resnet152-ri-18-id-VWDl""",,
"""imagenette""","""stanford-dogs""","""resnet152-ri-19-id-sAhw""",,
…,…,…,…,…
"""image-woof""","""image-woof""","""resnet152-ri-14-id-KcHE""",0.932,0.8768
"""image-woof""","""image-woof""","""resnet152-ri-16-id-d8OP""",0.9219,0.8682
"""image-woof""","""image-woof""","""resnet152-ri-16-id-iY05""",0.9234,0.8697
"""image-woof""","""image-woof""","""resnet152-ri-3-id-ki6f""",0.9466,0.9005


In [12]:
aggregated_df.write_parquet("resnet152_sweep_data.parquet")
aggregated_df.write_csv("resnet152_sweep_data.csv")

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=7dd8a54b-b654-4e26-9ba8-1dc7a02da688' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>