In [None]:
import os

import matplotlib.pyplot as plt
import pandas as pd

from paths.paths import BASE_RESULTS_DIR
import dimen_generation.dimen_generation_updated as dg
import dimen_generation.bumpplot as bp

In [None]:
YEAR = "2013"
RESULTS_DIR = os.path.join(BASE_RESULTS_DIR, YEAR)

In [None]:
df = pd.read_csv(os.path.join(RESULTS_DIR, 'embeddings.csv'), index_col=0)
df.head()

In [None]:
dim = dg.DimenGenerator(df)
dimensions = dim.generate_dimensions_from_seeds([("democrats", "Conservative")])
scores = dg.score_embedding(df, zip(["dem_rep"], dimensions))
scores.head(5)

In [None]:
fasttext = [x for x in scores.sort_values('dem_rep').index]
fasttext

In [None]:
waller = [
    'democrats',
    'EnoughLibertarianSpam',
    'hillaryclinton',
    'progressive',
    'BlueMidterm2018',
    'Enough_Sanders_Spam',
    'badwomensanatomy',
    'racism',
    'GunsAreCool',
    'Christians',
    'The_Farage',
    'new_right',
    'conservatives',
    'metacanada',
    'NoFapChristians',
    'TrueChristian',
    'The_Donald',
    'Conservative'
]

waller = [s for s in waller if s in fasttext]

In [None]:
rankings = []
for i,e in enumerate(waller):
    rankings.append({"Model": ["waller", "fasttext"], "Rank": [i+1, fasttext.index(e) + 1], "Subreddit": e})

In [None]:
def plot_bumpchart(elements):
    fig, ax = plt.subplots(figsize=(12,6))
    for element in elements:
        ax.plot(element["Model"], 
              element["Rank"], 
              "o-", # format of marker / format of line
              markerfacecolor="white",
              linewidth=3)

        ax.annotate(element["Subreddit"], 
                     xy = ("fasttext", element["Rank"][1]),
                    xytext = (1.01, element["Rank"][1])

                   )

        ax.annotate(element["Subreddit"], 
                    xy=("waller", element["Rank"][0]),
                    xytext=(-0.3, element["Rank"][0])

                   )


    plt.gca().invert_yaxis() # Para orden decreciente
    plt.yticks([i for i in range(1, len(waller)+1) ]); # Para marcar todos los ticks

    # Add axis labels and a title
    ax.set_xlabel('Model')
    ax.set_ylabel('Rank')
    ax.set_title('Comparison of Models on Subreddit Classification Task')

    # Elimina los bordes
    for spine in ax.spines.values():
        spine.set_visible(False)
    plt.tight_layout()
    plt.savefig(os.path.join(RESULTS_DIR, f'rankings_comparison_{YEAR}.png'), dpi=300, bbox_inches='tight')

In [None]:
plot_bumpchart(rankings)