In [44]:
import os
import csv

import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from IPython.utils import io
import matplotlib
import pandas as pd
import numpy as np

In [45]:
COUNTS_DATA_PATH = os.path.join('data', "moral_foundation_scores", "counts", "recursive_morality_proportions_bryson.csv")
PLOT_SAVE_PATH = os.path.join('data', "moral_foundation_scores", "plots")

In [46]:
# PRES_TIME = {
#     "GroverCleveland": 1885,
#     "ChesterA.Arthur": 1881,
#     "UlyssesS.Grant": 1869,
#     "RutherfordB.Hayes": 1877,
#     "JamesA.Garfield": 1881,
#     "WilliamMcKinley": 1897,
#     "TheodoreRoosevelt": 1901,
#     "WilliamHowardTaft": 1909,
#     "HerbertHoover": 1929,
#     "WarrenG.Harding": 1921,
#     "WoodrowWilson": 1913,
#     "CalvinCoolidge": 1923,
#     "HarryS.Truman": 1945,
#     "FranklinD.Roosevelt": 1933,
#     "JohnF.Kennedy": 1961,
#     "DwightD.Eisenhower": 1953,
#     "LyndonB.Johnson": 1963,
#     "RichardNixon": 1969,
#     "GeraldR.Ford": 1974,
#     "GeorgeBush": 1989,
#     "JimmyCarter": 1977,
#     "RonaldReagan": 1981,
#     "WilliamJ.Clinton": 1993,
#     "GeorgeW.Bush": 2001,
#     "BarackObama": 2009,
#     "DonaldJ.Trump": 2017,
# }

In [47]:
PRES_TIME = {
    "Benjamin Harrison": 1888,
    "Grover Cleveland": 1885,
    "Chester A. Arthur": 1881,
    "Ulysses S. Grant": 1869,
    "Rutherford B. Hayes": 1877,
    "James A. Garfield": 1881,
    "William McKinley": 1897,
    "Theodore Roosevelt": 1901,
    "William Howard Taft": 1909,
    "Herbert Hoover": 1929,
    "Warren G. Harding": 1921,
    "Woodrow Wilson": 1913,
    "Calvin Coolidge": 1923,
    "Harry S. Truman": 1945,
    "Franklin D. Roosevelt": 1933,
    "John F. Kennedy": 1961,
    "Dwight D. Eisenhower": 1953,
    "Lyndon B. Johnson": 1963,
    "Richard Nixon": 1969,
    "Gerald R. Ford": 1974,
    "George Bush": 1989,
    "Jimmy Carter": 1977,
    "Ronald Reagan": 1981,
    "William J. Clinton": 1993,
    "George W. Bush": 2001,
    "Barack Obama": 2009,
    "Donald J. Trump": 2017,
}

In [48]:
def load_counts_data():
    president_moral_foundation_vector = {}
    with open(COUNTS_DATA_PATH, 'r') as f:
        reader = csv.reader(f, delimiter=",")

        header = next(reader)
        president_names = header[1:]

        for row in reader:
            if len(row) == 0:
                continue
            foundation_scores = row[1:]
            for i, pres_name in enumerate(president_names):
                if pres_name not in president_moral_foundation_vector:
                    president_moral_foundation_vector[pres_name] = []

                president_moral_foundation_vector[pres_name].append(float(foundation_scores[i]))

        # Convert to numpy array
        formated_data = {"vectors": [], "names": [], "times": []}
        for pres_name in president_names:
            formated_data["vectors"].append(np.array(president_moral_foundation_vector[pres_name]))
            formated_data["names"].append(pres_name)
            formated_data["times"].append(PRES_TIME[pres_name])

    return formated_data

In [49]:
def make_and_save_scatter_plot(plot_locations, labels, legend_categories, title, save_path, use_cmap=False, component_vector=None, individual_labels=None):
    # convert labels to int
    # labels = [legend_categories.index(label) for label in labels]
    df = pd.DataFrame(dict(x=plot_locations[:,0], y=plot_locations[:,1], label=labels))

    sns.set_style("darkgrid")
    fig = plt.figure(figsize=(20, 10))
    ax = fig.add_subplot(111)
    
    # color_list = sns.color_palette("viridis", len(legend_categories))
    color_list = sns.color_palette(n_colors=len(legend_categories))
    if use_cmap:
        color_list = sns.color_palette("viridis", as_cmap=True)
    sns.scatterplot(
        data = df,
        x="x", 
        y="y", 
        hue="label", 
        ax=ax, 
        s=40,
        legend= True, 
        palette=color_list,
    )
    ax.set_title(title)

    if component_vector is not None:
        plt.arrow(
            x=0,
            y=0,
            dx=component_vector[0],
            dy=component_vector[1],
            color="red",
            head_starts_at_zero=True,
            head_width=.03, 
            head_length=.01
        )

    if individual_labels is not None:
        for i, label in enumerate(individual_labels):
            ax.annotate(label, (plot_locations[i,0], plot_locations[i,1]))

    # legend_handels = []
    # for i, category in enumerate(legend_categories):
    #     legend_handels.append(matplotlib.patches.Patch(color= color_list[i], label=category))
    # ax.legend(handles=legend_handels)

    plt.savefig(save_path, dpi=600)
    plt.close()


In [50]:
def create_pca_plot(embeddings, labels, legend_categories, title_subject, save_path, individual_labels=None, img_name="pca.jpg"):
    pca = PCA(n_components=2)
    pca.fit(embeddings, labels)
    plot_locations = pca.transform(embeddings)
    plot_save_path = os.path.join(save_path, img_name)

    make_and_save_scatter_plot(
        plot_locations, 
        labels, 
        legend_categories, 
        f"PCA of {title_subject}", 
        plot_save_path, 
        use_cmap=True, 
        individual_labels=individual_labels
    )

def create_tsne_plot(embeddings, labels, legend_categories, title_subject, save_path, individual_labels=None, img_name="tsne.jpg"):
    with io.capture_output() as captured:
        perplexity = embeddings.shape[1] - 1
        tsne = TSNE(n_components=2, learning_rate='auto', perplexity=perplexity)
        plot_locations = tsne.fit_transform(embeddings)
        plot_save_path = os.path.join(save_path, img_name)

        make_and_save_scatter_plot(plot_locations, 
            labels, 
            legend_categories, 
            f"TSNE of {title_subject}", 
            plot_save_path, 
            use_cmap=True, 
            individual_labels=individual_labels
        )

In [56]:
# Load data
data = load_counts_data()

# Formatted vectors
vectors = data["vectors"]
vectors = np.vstack(vectors)

print(vectors.shape)
# Make sure all values are between 0 and 1
max_val = np.max(vectors)
min_val = np.min(vectors)
print(max_val, min_val)
vectors = (vectors - min_val) / (max_val - min_val)
max_val = np.max(vectors)
min_val = np.min(vectors)
print(max_val, min_val)

(27, 6)
0.008993119926141676 0.0
1.0 0.0


In [52]:
# Make PCA plot
create_pca_plot(data["vectors"], data["times"], ["Presidency Start Date"], "President Moral Foundations", PLOT_SAVE_PATH, individual_labels=data["names"], img_name="bryson_pca.jpg")

In [53]:
create_tsne_plot(vectors, data["times"], ["Presidency Start Date"], "President Moral Foundations", PLOT_SAVE_PATH, individual_labels=data["names"], img_name="bryson_tsne.jpg")