In [1]:
# Imports
import pandas as pd
import numpy as np
import altair as alt
import sympy as sp
import math
import csv

In [2]:
spotify_data = pd.read_csv('data/spotify_2023.csv', delimiter=',')

# These features were encoded as the incorrect data type in the original data set, so we are tranforming them into numeric here before creating visuals
spotify_data['streams'] = pd.to_numeric(spotify_data['streams'], errors='coerce')
spotify_data['in_shazam_charts'] = pd.to_numeric(spotify_data['in_shazam_charts'], errors='coerce')
spotify_data['in_deezer_playlists'] = pd.to_numeric(spotify_data['in_deezer_playlists'], errors='coerce')

In [3]:
wrangled_data = spotify_data

# Create bins and labels
bins = [0, 20, 40, 60,80,100, float('inf')]
labels = ['0-20','21-40', '41-60','61-80', '81-100', '100+']

# Create a new column 'rank_bin' based on the bins
wrangled_data['rank_bin'] = pd.cut(wrangled_data['in_spotify_charts'], bins=bins, labels=labels, right=False)

month_to_season = {
    1: 'Winter',
    2: 'Winter',
    3: 'Spring',
    4: 'Spring',
    5: 'Spring',
    6: 'Summer',
    7: 'Summer',
    8: 'Summer',
    9: 'Fall',
    10: 'Fall',
    11: 'Fall',
    12: 'Winter'
}

# Create a new column 'season' based on the 'released_month' column
wrangled_data['season'] = wrangled_data['released_month'].map(month_to_season)

In [4]:
# Creating special charactersitic columns that are shifted to a 360 degree scale
wrangled_data['arc_danceability'] = wrangled_data['danceability_%'].div(100).round(2) * math.pi
wrangled_data['arc_acousticness'] = wrangled_data['acousticness_%'].div(100).round(2) * math.pi
wrangled_data['arc_energy'] = wrangled_data['energy_%'].div(100).round(2) * math.pi
wrangled_data['arc_valence'] = wrangled_data['valence_%'].div(100).round(2) * math.pi

In [5]:
wrangled_data.to_csv('data/wrangled_data.csv', encoding='utf-8', index=False)