In [115]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import altair as alt

alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [116]:
USERS_ATTR = ["id_owner", "openness", "conscientiousness", "extraversion", 
             "agreeableness", "neuroticism"]
PLAYLISTS_ATTR = ["id_owner", "avg_valence", "avg_energy", 
                  "diversity_artists", "ratio_unpopulart_artists"]

In [117]:
# read in datasets
users = pd.read_csv("data/user_targets.csv")
playlists = pd.read_csv("data/df_playlists.csv")

In [118]:
# create genres df
playlists_cols = list(playlists.columns)
genres_start = playlists_cols.index("pop")
genres_cols = playlists_cols[genres_start:]
genres_df = playlists[genres_cols]

In [119]:
# filter for necessary attributes
users = users[USERS_ATTR]
playlists = playlists[PLAYLISTS_ATTR]

In [120]:
OCEAN = ["openness", "conscientiousness", "extraversion", 
         "agreeableness", "neuroticism"]

def get_dom_trait(row):
    dom_val = row.max()
    if (row == dom_val).sum() > 1:
        return "multi-dominant"
    return row.idxmax()

users["dom_trait"] = users[OCEAN].apply(get_dom_trait, axis=1).str.title()

In [121]:
# merge dataset
# each row is a unique playlist (not a unique user)
merged = pd.merge(users, playlists)

In [145]:
selection = alt.selection_interval()

scatter = alt.Chart(merged).mark_circle().encode(
    x=alt.X("avg_valence:Q", title="Average Valence"),
    y=alt.X("avg_energy:Q", title="Average Energy"),
).add_params(
    selection
).properties(
    title="Playlist Mood",
    width=500, 
    height=500
)

bars = alt.Chart(merged).mark_bar().transform_filter(
    selection & (alt.datum.dom_trait != 'Multi-Dominant')
).encode(
    x=alt.X(
        "dom_trait:N", 
        axis=alt.Axis(labelAngle=-45), 
        title="Dominant Personality Trait"
        ),
    y=alt.Y("count():Q", title="Count")
).add_params(
    selection
).properties(
    title="Dominant Trait of Playlist Owner",
    width=250, 
    height=500
)

viz1 = (scatter | bars)
viz1

*Important to note that playlist owners with multi-dominant personalities were not included in the bar chart.