In [1]:
import re
from pathlib import Path

from tensorboard.backend.event_processing import event_accumulator
import pandas as pd


baseline_avgs = {
    'ucf101': 73.85,
    'eurosat': 60.03,
    'oxford_flowers': 74.83,
    'dtd': 56.37,
    'fgvc_aircraft': 31.09,
    'caltech101': 95.40,
    'oxford_pets': 94.12,
    'stanford_cars': 68.65,
    'food101': 90.66,
    'sun397': 72.23,
    'imagenet': 70.22,
}

losses = dict()

from matplotlib import pyplot as plt

per_m_base = dict()
per_m_new = dict()

def analy(ds):
    def parse_tensorboard(path, scalars):
        """returns a dictionary of pandas dataframes for each requested scalar"""
        ea = event_accumulator.EventAccumulator(
            path,
            size_guidance={event_accumulator.SCALARS: 0},
        )
        _absorb_print = ea.Reload()
        # make sure the scalars are in the event accumulator tags
        assert all(
            s in ea.Tags()["scalars"] for s in scalars
        ), "some scalars were not found in the event accumulator"
        return {k: pd.DataFrame(ea.Scalars(k)) for k in scalars}

    # all dirs like data/fomo/output/CoPrompt_
    dirs = list(Path('data/fomo/output').glob('transferds*'))
    rs = []
    rsa = []
    for dir in dirs:
        seed1_train = dir / f'train_base/{ds}/shots_16/CoPrompt/coprompt/seed1/'
        tensorboard = seed1_train / 'tensorboard'
        clog = seed1_train / 'clog.txt'
        
        if not clog.exists():
            continue
            
        with open(clog, 'r') as f:
            lines = f.readlines()
            if len(lines) < 2:
                continue
            p = lines[-1][4:-5]
        
        # read from tensorboard)
        try:
            df = parse_tensorboard(tensorboard, ['test/accuracy'])
            test_acc = df['test/accuracy'].iloc[-1]['value']
        except Exception as e:
            continue
            
        rs.append((test_acc, p, str(dir)))
        
        seed1_test = dir / f'test_new/{ds}/shots_16/CoPrompt/seed1/'
        out = seed1_test / 'output.log'
# => result
# * total: 1,410
# * correct: 1,070
# * accuracy: 75.9%
# * error: 24.1%
# * macro_f1: 70.8%
        
        try:
            # use regex to match format above
            acc = re.search(r'accuracy: ([0-9.]+)%', out.read_text()).group(1)
            acc = float(acc)
            rsa.append((test_acc, p, acc, str(dir)))
        except Exception as e:
            pass
        
        
        
        
    print(len(rs), len(rsa))
    # for test_acc, p, m in list(sorted(rs, reverse=True))[:5]:
    #     print(f"[{test_acc}], {p}")
    #     print(m)
    # 
    print('---')
        # 
    #%%
    best = list(sorted(rsa, reverse=True, key=lambda x:  x[0] + x[2]))[0]
    best_dir = best[3]
    best_avg = (best[0] + best[2]) / 2
    seed1_train = Path(best_dir) / f'train_base/{ds}/shots_16/CoPrompt/coprompt/seed1/'
    tensorboard = seed1_train / 'tensorboard'
    df = parse_tensorboard(tensorboard, ['train/loss'])
    
    losses[ds] = df['train/loss']['value']
    
    
    for test_acc, p, acc, m in list(sorted(rsa, reverse=True, key=lambda x:  x[0] + x[2]))[:10]:
        avg = (test_acc + acc) / 2
        baseline_avg = baseline_avgs[ds]
        print(f"[{test_acc}], [{acc}], ({best_avg-avg}) [{avg-baseline_avg}] {p}")
        print(m)
        per_m_base.setdefault(m, []).append(test_acc)
        per_m_new.setdefault(m, []).append(acc)

    
    # plt.plot(df['train/loss']['step'], df['train/loss']['value'])
    
    
        

In [2]:
analy('ucf101')

2024-05-26 11:19:16.813108: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-05-26 11:19:16.849168: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


4 4
---
[75.64633178710938], [78.0], (0.0) [2.973165893554693] inspirational  
data/fomo/output/transferds
[74.81903076171875], [76.3], (1.2636505126953068) [1.7095153808593864] greatest  
data/fomo/output/transferds_nofilter
[74.81903076171875], [76.3], (1.2636505126953068) [1.7095153808593864] greatest  
data/fomo/output/transferds_noselect
[70.37228393554688], [77.3], (2.9870239257812443) [-0.013858032226551131] ironic  
data/fomo/output/transferds_noshared


In [3]:
analy('eurosat')

4 4
---
[89.21428680419922], [79.9], (0.0) [24.52714340209961] increased underdog gihc  
data/fomo/output/transferds
[82.57142639160156], [63.8], (11.371430206298825) [13.155713195800786] �fridaynightoceans carnival  
data/fomo/output/transferds_nofilter
[82.57142639160156], [63.8], (11.371430206298825) [13.155713195800786] �fridaynightoceans carnival  
data/fomo/output/transferds_noselect
[67.23809814453125], [76.6], (12.63809432983399) [11.889049072265621] increased underdog gihc  
data/fomo/output/transferds_noshared


In [4]:
# analy('dtd')

In [5]:
analy('oxford_pets') # 

4 4
---
[95.1089859008789], [98.2], (0.0) [2.534492950439443] catsofinstagram petday !  
data/fomo/output/transferds
[94.04572296142578], [97.0], (1.1316314697265568) [1.402861480712886] precise bonus handsome  
data/fomo/output/transferds_nofilter
[94.04572296142578], [97.0], (1.1316314697265568) [1.402861480712886] precise bonus handsome  
data/fomo/output/transferds_noselect
[90.37745666503906], [97.0], (2.965764617919916) [-0.4312716674804733] temporary petday vering  
data/fomo/output/transferds_noshared


In [6]:
for m in per_m_base:
    base_avg = sum(per_m_base[m]) / len(per_m_base[m])
    new_avg = sum(per_m_new[m]) / len(per_m_new[m])
    print(f"{m} {base_avg} {new_avg}")

data/fomo/output/transferds 86.65653483072917 85.36666666666667
data/fomo/output/transferds_nofilter 83.8120600382487 79.03333333333333
data/fomo/output/transferds_noselect 83.8120600382487 79.03333333333333
data/fomo/output/transferds_noshared 75.99594624837239 83.63333333333333


In [7]:
for ds, loss in losses.items():
    if len(loss) > 1000:
        steps = range(1000)
        # reduce resolution
        n_actual = len(loss)
        n_desired = 1000
        step_size = n_actual / n_desired
        loss = [loss[int(i * step_size)] for i in steps]
    print(ds, len(loss))
        
    plot = """
    \\begin{tikzpicture}
    \\begin{axis}[
        width=7cm,
        % title={Sparsity across the Network},
        % xlabel={Step},
        % ylabel={Train Loss},
        % xmin=0, xmax=5,
        % ymin=-0.05, ymax=1,
        % xtick=data,
        % ytick={0.0, 0.2,0.4,0.6,0.8,1},
        xticklabel={\pgfmathparse{\\tick}\pgfmathprintnumber{\pgfmathresult}\%},
        legend pos=south east,
        ymajorgrids=true,
        grid style=dashed,
    ]

        % weights
        \\addplot[
            % name path global=middle,
            % only marks,
            color=blue,
            % mark=square,
        ]
        coordinates {
             COORDINATES
        };
    \\end{axis}
    \\end{tikzpicture}
    """.replace('COORDINATES', '\n'.join([f'({x/len(loss)*100}, {y})' for x, y in zip(range(len(loss)), loss)]))
    with open(f'loss_{ds}.tex', 'w') as f:
        f.write(plot)

ucf101 1000
eurosat 320
oxford_pets 1000


In [8]:
loss_stanford_cars.tex

NameError: name 'loss_stanford_cars' is not defined