In [5]:
import pandas as pd
import plotly.express as px
import os

def create_path_if_not_existant(path):
    if not os.path.exists(path):
        os.makedirs(path)

### ZCCR Ablation Study

In [8]:
def text(representative, proj, clustering, recall):
    if proj=='pca' and representative=='centroid' and clustering=='agglomerative':
        return '<b>{}</b>'.format(str(recall))
    if representative=='centroid':
        return str(recall)
    return ''

create_path_if_not_existant('charts')

for dataset in ['coco', 'flickr30k']:
    for model in ['clip', 'albef']:
        for task in ['txt2img', 'img2txt', 'txt2txt', 'img2img']:

            # recall@10
            df = pd.read_csv('results/zccr_metrics.csv')
            df['n_points_per_cluster'] = df['n_points_per_cluster'].map(lambda e: str(e))
            df = df.rename(columns={'recall': 'R@10', 'n_points_per_cluster':'n_points_per_ImageNet_class',
                                    'n_seeds':'n_ImageNet_classes'})

            df = df[df['n_ImageNet_classes'] != 3]
            df = df[df['n_ImageNet_classes'] != 10]
            df = df[df['proj'] != 'tsne']
            df = df[df['dataset'] == dataset]
            df = df[df['model'] == model]
            df = df[df['task'] == task]
            df = df[df['cluster distance'] == 5]

            df['proj'] = df['proj'].map(lambda x: 'tsne' if x == 'tsne (pca)' else x)
            df['text'] = df.apply(lambda x: text(x['representative'], x['proj'], x['clustering'], x['R@10']), axis=1)
            # df = df[df['clustering'] == 'agglomerative']
            # df = df[df['representative'] == 'centroid']

            # df = df[df['n_points_per_cluster'] == 20]
            # df = df[df['cluster distance'] == 50]

            fig = px.line(df, x="n_points_per_ImageNet_class", y="R@10", text='text', color='clustering', line_dash='representative',
                        facet_row='proj', facet_col='n_ImageNet_classes', category_orders={"proj": ["None", "pca", "tsne"], "n_ImageNet_classes":[1,2,5]},
                        markers=True)
            fig.update_traces(textposition='bottom center')
            fig.update_layout(yaxis_range=[-0.2,1.2])
            fig.update_layout(
                autosize=False,
                width=1700,
                height=1200,
                showlegend=True,
                font=dict(
                        # family="Courier New, monospace",
                        size=20  # Set the font size here
                        # color="RebeccaPurple"
                )
            )

            fig.write_image('charts/{}_{}_{}_recall.png'.format(dataset,model,task))

### Comparison with Tag System

In [9]:
import pandas as pd
import plotly.express as px

In [19]:
def text(recall, row):
    if row == 'ZCCR (ALBEF)':
        return '<b>{}</b>'.format(str(recall))
    if row != 'ZCCR (CLIP)':
        return str(recall)
    return ''

for dataset in ['coco', 'flickr30k']:

    # BT
    df           = pd.read_csv('results/bt_metrics.csv')
    df           = df.rename(columns={'recall': 'R@10'})

    df           = df.loc[df['tokenization'] == '_lemma']
    df           = df[df['dataset']==dataset]
    df['Method'] = ['BT']*len(df)

    tagger       = df[['Method', 'task', 'dataset', 'n_seeds', 'n_points_per_class', 'pool size', 'R@10', 'map']]

    # BTBA
    df           = pd.read_csv('results/btba_metrics.csv')
    df           = df.rename(columns={'recall': 'R@10'})

    df           = df.loc[df['tokenization'] == '_lemma']
    df           = df[df['dataset']==dataset]
    df['Method'] = ['BTBA']*len(df)

    tagger_bert_2  = df[['Method', 'task', 'dataset', 'n_seeds', 'n_points_per_class', 'pool size', 'R@10', 'map']]

    # ZCCR recall@10
    df = pd.read_csv('results/zccr_metrics.csv')
    df = df.rename(columns={'recall': 'R@10', 'n_points_per_cluster':'n_points_per_class'})

    df = df[df['n_seeds'] != 3]
    df = df[df['proj']    == 'pca']
    df = df[df['dataset'] == dataset]
    df = df[df['clustering'] == 'agglomerative']
    df = df[df['representative'] == 'centroid']
    df = df[df['cluster distance'] == 5]
    df['Method'] = df['model'].map(lambda x: 'ZCCR ({})'.format(x.upper()))
    # df = df[df['model'] == model]

    CRS      = df[['Method', 'task', 'dataset', 'n_seeds', 'n_points_per_class', 'pool size', 'R@10', 'map']]
    df_merged = tagger.append(CRS, ignore_index=True)
    # df_merged = df_merged.append(tagger_bert, ignore_index=True)
    df_merged = df_merged.append(tagger_bert_2, ignore_index=True)

    df_merged['n_points_per_seed'] = df_merged['n_points_per_class'].map(lambda e: str(e))
    df_merged['text'] = df_merged.apply(lambda x: text(x['R@10'],x['Method']), axis=1)
    df_merged         = df_merged[df_merged['n_seeds'] != 10]

    df_merged         = df_merged.rename(columns={'n_points_per_seed':'n_points_per_ImageNet_class',
                            'n_seeds':'n_ImageNet_classes'})

    fig = px.line(df_merged, x="n_points_per_ImageNet_class", y="R@10", text='text', color='Method', #line_dash='representative', 
                facet_row='task', facet_col='n_ImageNet_classes', category_orders={"task": ["txt2img", "txt2txt", "img2txt", "img2img"]},
                color_discrete_map={
                "ZCCR (CLIP)": "#FF9DA6",
                "ZCCR (ALBEF)": "#EF553B",
                "BTBA": "#00CC99",
                "BT": "#636EFA"},
                markers=True)

    fig.update_traces(textposition='bottom center')
    fig.update_layout(yaxis_range=[-0.2,1.2])
    fig.update_layout(
        autosize=False,
        width=1550,
        height=1600,
        showlegend=True,
        font=dict(
                size=20
        )
    )

    fig.write_image('charts/{}_bt_comparison_recall.png'.format(dataset))


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.

