In [1]:
import pandas as pd
import altair as alt
import os

In [2]:
# Load dataset
df = pd.read_csv("../data/raw/cleaned_dataset.csv")

In [3]:
# Create summary table grouped by Danceability

# Initialize column with default value and update depending on condition
df["dance_bin"] = "Mid" 
df.loc[df["Danceability"] < 0.4, "dance_bin"] = "Low"
df.loc[df["Danceability"] >= 0.7, "dance_bin"] = "High"

# Set order for category
bins = ["Low", "Mid", "High"]
df["dance_bin"] = pd.Categorical(df["dance_bin"], categories=bins)

summary = df.groupby("dance_bin")[["Likes", "Views"]].mean()
summary = summary.round(0).astype(int).reset_index()

summary

Unnamed: 0,dance_bin,Likes,Views
0,Low,368803,52332879
1,Mid,566328,80829862
2,High,861425,121742251


In [4]:
# Create Altair Visualization

chart = alt.Chart(summary).mark_bar().encode(
    alt.X("dance_bin:N", title="Danceability Level", 
          sort=["Low","Mid", "High"]),
    alt.Y("Views:Q", title="Average Likes", axis=alt.Axis(format="~s"))
).properties(
    title="Average Song Likes by Danceability Level",
    width=400,
    height=400
).configure_axis(
    labelFontSize=12,
    titleFontSize=16
).configure_title(
    fontSize=20
)

chart

In [5]:
# Save chart to img folder
output_path = "../img/danceability_eda.png"

chart.save(output_path, ppi=300)