# Load frequency data

In [1]:
import pandas as pd

In [2]:
frameworks = ["data-driven", "rdoc"]

In [3]:
df = {}
for framework in frameworks:
    df[framework] = pd.read_csv("data/freq_{}.csv".format(framework))

# Plot word clouds

In [4]:
from collections import OrderedDict
from wordcloud import WordCloud

In [5]:
def plot_wordclouds(framework, domains, lists, path="figures/", 
                    font="style/cmunbmr.ttf", print_fig=False, width=550):

    from wordcloud import WordCloud
    import matplotlib.pyplot as plt

    for i, dom in enumerate(domains):

        def color_func(word, font_size, position, orientation, 
                       random_state=None, idx=0, **kwargs):

            # Adapted from https://amueller.github.io/word_cloud/auto_examples/a_new_hope.html

            return palettes[framework][i]

        df = lists.loc[lists["DOMAIN"] == dom]
        tkns = [t.replace("_", " ") for t in df["TERM"]]
        freq = df["FREQUENCY"]
        
        tkn2freq = {t: f for t, f in zip(tkns, freq)}

        cloud = WordCloud(background_color="rgba(255, 255, 255, 0)", mode="RGB", 
                          max_font_size=100, prefer_horizontal=1, scale=20, margin=3,
                          width=width, height=width, font_path=font, 
                          random_state=42).generate_from_frequencies(frequencies=tkn2freq)

        fig = plt.figure()
        plt.axis("off")
        plt.imshow(cloud.recolor(color_func=color_func, random_state=42))
        
        file_name = "{}cloud_{}_{}.png".format(path, framework, dom)
        plt.savefig(file_name, dpi=500, bbox_inches="tight")
        
        if print_fig:
            print(dom)
            plt.show()
        plt.close()

In [6]:
# Hex color mappings
c = {"red": "#CE7D69", "orange": "#BA7E39", "yellow": "#CEBE6D", 
     "chartreuse": "#AEC87C", "green": "#77B58A", "blue": "#7597D0", 
     "magenta": "#B07EB6", "purple": "#7D74A3", "brown": "#846B43", "pink": "#CF7593"}

# Palettes for frameworks
palettes = {"data-driven": [c["blue"], c["magenta"], c["yellow"], c["green"], c["red"], c["purple"], 
                            c["chartreuse"], c["orange"], c["pink"], c["brown"]],
            "rdoc": [c["blue"], c["red"], c["green"], c["purple"], c["yellow"], c["orange"]],
            "dsm": [c["purple"], c["chartreuse"], c["orange"], c["blue"], c["red"], c["magenta"], 
                    c["yellow"], c["green"], c["brown"]]}

In [7]:
for framework in frameworks:
    domains = OrderedDict.fromkeys(df[framework]["DOMAIN"])
    plot_wordclouds(framework, domains, df[framework], width=200)