In [1]:
import json
import pandas as pd

def length_in_seconds(length):
    if length == '0':
        return 180
    else:
        colon = length.find(':')
        mins = length[0:colon]
        seconds = length[colon+1:]
        return (int(mins) * 60 + int(seconds))

with open('all_lyrics_cleaned_v3.json', 'r') as file:
    lyric_data = json.load(file)

df = pd.DataFrame()

lst = []

for year in range(1960, 2019):
    for song in lyric_data[str(year)]:
        song['year'] = year
        if not song.get('genre'):
            song['genre'] = 'Other'
        if song.get('length'):
            song['length'] = length_in_seconds(song['length'])
        line = pd.DataFrame.from_records([song])
        lst.append(line)

df = pd.concat(lst, ignore_index=True)

df['length'] = df['length'].astype('float64')
df['num_verses'] = df['num_verses'].astype('float64')
df['num_words'] = df['num_words'].astype('float64')
df['year'] = df['year'].astype('float64')
df['num_unique_words'] = df['num_unique_words'].astype('float64')
df['density'] = df['density'].astype('float64')
df['similar_verses_score'] = df['similar_verses_score'].astype('float64')

In [18]:
import plotly.graph_objects as go

dataset = df.dropna()

# url = "https://raw.githubusercontent.com/plotly/datasets/master/gapminderDataFiveYear.csv"
# dataset = pd.read_csv(url)

years = [str(i) for i in range(1960, 2019)]

# make list of genres
genres = ['Pop', 'Rap', 'Rock', 'Other']
# make figure
fig_dict = {
    "data": [],
    "layout": {},
    "frames": []
}

# x axis: density, y axis: similar verses score
fig_dict["layout"]["xaxis"] = {"range": [0, 7], "title": "Song Density"}
# fig_dict["layout"]["yaxis"] = {"title": "Similar Verses Score"}
fig_dict["layout"]["yaxis"] = {"range": [0, 3], "title": "Similar Verses Score", "type": "log"}
fig_dict["layout"]["hovermode"] = "closest"
fig_dict["layout"]["sliders"] = {
    "args": [
        "transition", {
            "duration": 400,
            "easing": "cubic-in-out"
        }
    ],
    "initialValue": "1960",
    "plotlycommand": "animate",
    "values": years,
    "visible": True
}
fig_dict["layout"]["updatemenus"] = [
    {
        "buttons": [
            {
                "args": [None, {"frame": {"duration": 500, "redraw": False},
                                "fromcurrent": True, "transition": {"duration": 300,
                                                                    "easing": "quadratic-in-out"}}],
                "label": "Play",
                "method": "animate"
            },
            {
                "args": [[None], {"frame": {"duration": 0, "redraw": False},
                                  "mode": "immediate",
                                  "transition": {"duration": 0}}],
                "label": "Pause",
                "method": "animate"
            }
        ],
        "direction": "left",
        "pad": {"r": 10, "t": 87},
        "showactive": False,
        "type": "buttons",
        "x": 0.1,
        "xanchor": "right",
        "y": 0,
        "yanchor": "top"
    }
]

sliders_dict = {
    "active": 0,
    "yanchor": "top",
    "xanchor": "left",
    "currentvalue": {
        "font": {"size": 20},
        "prefix": "Year:",
        "visible": True,
        "xanchor": "right"
    },
    "transition": {"duration": 300, "easing": "cubic-in-out"},
    "pad": {"b": 10, "t": 50},
    "len": 0.9,
    "x": 0.1,
    "y": 0,
    "steps": []
}

# make data
year = 1960
for genre in genres:
    dataset_by_year = dataset[dataset["year"] == year]
    dataset_by_year_and_genre = dataset_by_year[dataset_by_year["genre"] == genre]

    data_dict = {
        "x": list(dataset_by_year_and_genre["density"]),
        "y": list(dataset_by_year_and_genre["similar_verses_score"]),
        "mode": "markers",
#         "text": list(dataset_by_year_and_genre["genre"]),
#         "text": list(dataset_by_year_and_genre["title"]),
        "text": [title + " - " + artist for title,artist in zip(dataset_by_year_and_genre['title'],dataset_by_year_and_genre['artist'])],
        "marker": {
            "sizemode": "area",
#             "sizeref": df["num_words"].max() / 20 ** 2,
            "sizeref": df["num_unique_words"].max() / 15 ** 2,
            "size": list(dataset_by_year_and_genre["num_unique_words"])
        },
        "name": genre
    }
    fig_dict["data"].append(data_dict)

# make frames
for year in years:
    frame = {"data": [], "name": year}
    for genre in genres:
        dataset_by_year = dataset[dataset["year"] == int(year)]
        dataset_by_year_and_genre = dataset_by_year[
            dataset_by_year["genre"] == genre]
        data_dict = {
            "x": list(dataset_by_year_and_genre["density"]),
            "y": list(dataset_by_year_and_genre["similar_verses_score"]),
            "mode": "markers",
#             "text": list(dataset_by_year_and_genre["genre"]),
#             "text": list(dataset_by_year_and_genre["title"]),
            "text": [title + " - " + artist for title,artist in zip(dataset_by_year_and_genre['title'],dataset_by_year_and_genre['artist'])],
            "marker": {
                "sizemode": "area",
#                 "sizeref": df["num_words"].max() / 20 ** 2,
                "sizeref": df["num_unique_words"].max() / 15 ** 2,
                "size": list(dataset_by_year_and_genre["num_unique_words"])
            },
            "name": genre
        }
        frame["data"].append(data_dict)

    fig_dict["frames"].append(frame)
    slider_step = {"args": [
        [year],
        {"frame": {"duration": 300, "redraw": False},
         "mode": "immediate",
         "transition": {"duration": 300}}
    ],
        "label": year,
        "method": "animate"}
    sliders_dict["steps"].append(slider_step)


fig_dict["layout"]["sliders"] = [sliders_dict]

fig = go.Figure(fig_dict)

fig.show()