In [2]:
import json
import math

In [9]:
f = open("all-songs.txt", "r")
BLACKLIST_SONGS = json.loads(f.read())
f.close()

f = open("song_analyses.txt", "r")
BLACKLIST_ANALYSIS = json.loads(f.read())
f.close()

f = open("original_songs.txt", "r")
METALLICA_SONGS = json.loads(f.read())
f.close()

f = open("original_song_analyses.txt", "r")
METALLICA_ANALYSIS = json.loads(f.read())
f.close()

f = open("all-artists.txt", "r")
ALL_ARTISTS = json.loads(f.read())
f.close()

f = open("genre_info.txt", "r")
GENRE_INFO = json.loads(f.read())
f.close()

In [10]:
# Create dictionary mapping song name to Spotify ID on Metallica (1991)

SONG_IDS = {}
for song in METALLICA_SONGS:
    remove_remastered = len(song["name"]) - len(" (Remastered)")
    processed_name = song["name"][0:remove_remastered]
    SONG_IDS[processed_name] = song["id"]

In [23]:
# Match each cover on the Blacklist to its original

for song in BLACKLIST_SONGS:
    if song["name"] in SONG_IDS:
        song["originals"] = [SONG_IDS[song["name"]]]
    else:
        if song["name"] == "Sad But True (Live)":
            song["originals"] = [SONG_IDS["Sad But True"]]
        elif song["name"] == "Don't Tread on Else Matters":
            song["originals"] = [SONG_IDS["Don't Tread on Me"], SONG_IDS["Nothing Else Matters"]]

f = open("all-songs.txt", "w")
f.write(json.dumps(BLACKLIST_SONGS, indent=4))
f.close()

In [14]:
# Compare features of original and cover

analysis_features = ["danceability", "energy", "speechiness", "acousticness", "instrumentalness", "liveness", "valence"]
other_features = ["key", "mode", "tempo", "loudness"]

csv_header = 'Title,Artists,Danceability,dDanceability,Energy,dEnergy,Speechiness,dSpeechiness,Acousticness,dAcousticness,Instrumentalness,dInstrumentalness,Liveness,dLiveness,Valence,dValence,Overall Difference,Key,dKey,Mode,dMode,Tempo,dTempo,Loudness, dLoudness\n'
csv_body = ""

for song in BLACKLIST_SONGS:

    csv = ""
    
    # This is just for me to look at
    song_artists = ""
    for artist in song['artists']:
        song_artists += artist['name'] + ", "
    song_artists = song_artists[0:len(song_artists) - 2]
    #song_display_name = song['name'] + " by " + song_artists
    csv += f"{song['name']},\"{song_artists}\","
    
    analysis = BLACKLIST_ANALYSIS[song['id']]

    # Most songs are a cover of one song so for now I'm going to only compare it with the first "original"
    original_analysis = METALLICA_ANALYSIS[song['originals'][0]]

    overall_difference = 0
    for feature in analysis_features:
        original_feature = original_analysis[feature]
        cover_feature = analysis[feature]
        difference = cover_feature - original_feature
        #print(f"{feature.upper()}:\tOriginal={original_feature},\tCover={cover_feature},\tDifference={difference}")
        overall_difference += difference ** 2
        csv += f"{cover_feature},{difference},"
    overall_difference = math.sqrt(overall_difference)
    #print("OVERALL DIFFERENCE: " + str(overall_difference))

    csv += str(overall_difference) + ","

    for feature in other_features:
        original_feature = original_analysis[feature]
        cover_feature = analysis[feature]
        difference = cover_feature - original_feature
        csv += f"{cover_feature},{difference},"
    csv_body += csv + "\n"

f = open("feature_differences.csv", "w")
f.write(csv_header + csv_body)
f.close()

In [18]:
# Compare features of original and cover and format data for Flourish

analysis_features = ["danceability", "energy", "speechiness", "acousticness", "instrumentalness", "liveness", "valence"]
other_features = ["key", "mode", "tempo", "loudness"]

csv_header = 'Title,Original Title,Artists,Danceability,Energy,Speechiness,Acousticness,Instrumentalness,Liveness,Valence,Key,Mode,Tempo,Loudness,Is Original\n'
csv_body = ""

for song in BLACKLIST_SONGS:

    csv = ""
    
    # This is just for me to look at
    song_artists = ""
    for artist in song['artists']:
        song_artists += artist['name'] + ", "
    song_artists = song_artists[0:len(song_artists) - 2]
    #song_display_name = song['name'] + " by " + song_artists
    
    analysis = BLACKLIST_ANALYSIS[song['id']]

    original_title = ""
    if song["name"] in SONG_IDS:
        original_title = song["name"]
    else:
        if song["name"] == "Sad But True (Live)":
            original_title = "Sad But True"
        elif song["name"] == "Don't Tread on Else Matters":
            original_title = "Don't Tread on Me"

    csv += f"{song['name']},{original_title},\"{song_artists}\","
            
    for feature in analysis_features + other_features:
        cover_feature = analysis[feature]
        csv += f"{cover_feature},"
        
    csv_body += csv + "Cover\n"

for song in METALLICA_SONGS:

    csv = ""
    analysis = METALLICA_ANALYSIS[song['id']]
    remove_remastered = len(song["name"]) - len(" (Remastered)")
    original_title = song["name"][0:remove_remastered]
    csv += f"{song['name']},{original_title},Metallica,"
            
    for feature in analysis_features + other_features:
        cover_feature = analysis[feature]
        csv += f"{cover_feature},"
        
    csv_body += csv + "Original\n"

f = open("song_features.csv", "w")
f.write(csv_header + csv_body)
f.close()

In [38]:
genre_occurrances = {}

for artist in ALL_ARTISTS:
    for genre in artist['genres']:
        if genre in genre_occurrances:
            if (artist['name'] != "Metallica" and not artist['name'] in genre_occurrances[genre]["artists"]):
                genre_occurrances[genre]["count"] +=1
                genre_occurrances[genre]["artists"].append(artist['name'])
        else:
            genre_occurrances[genre] = {}
            genre_occurrances[genre]["artists"] = []
            genre_occurrances[genre]["count"] = 0
            if (artist['name'] != "Metallica" and not artist['name'] in genre_occurrances[genre]["artists"]):
                genre_occurrances[genre]["count"] += 1
                genre_occurrances[genre]["artists"].append(artist['name'])

csv = "Genre,Category,Occurrances,Artists,Percent Female,Average Age\n"
for genre in genre_occurrances:
    category = "Misc."
    if ("rock" in genre):
        category = "Rock"
    elif ("metal" in genre or "thrash" in genre):
        category = "Metal"
    elif ("punk" in genre or "hardcore" in genre):
        category = "Punk"
    elif ("pop" in genre):
        category = "Pop"
    elif ("country" in genre or "folk" in genre or "americana" in genre):
        category = "Country / Folk"
    elif ("hip hop" in genre or "rap" in genre):
        category = "Hip-Hop / Rap"
    elif ("classical" in genre or "ensemble" in genre or "jazz" in genre):
        category = "Classical / Jazz"
    elif ("reggaeton" in genre or "dance" in genre):
        category = "Electronic"
    elif ("indie" in genre or "alternative" in genre or "alternativo" in genre or "grunge" in genre):
        category = "Alternative / Indie"
    csv += f"\"{genre}\",{category},{genre_occurrances[genre]["count"]},\"{", ".join(genre_occurrances[genre]["artists"])}\",{GENRE_INFO[genre]['percent_female']},{GENRE_INFO[genre]['average_age']}\n"

f = open("genre_occurrances.csv", "w")
f.write(csv)
f.close()

In [5]:
artist_dict = {}

for artist in ALL_ARTISTS:
    artist_dict[artist['id']] = artist

f = open("artist-dict.txt", "w")
f.write(json.dumps(artist_dict, indent=4))
f.close()

In [25]:
csv = "Artist,Genre 1,Genre 1 Percent Female, Genre 1 Average Age,Genre 2,Genre 2 Percent Female, Genre 2 Average Age,Genre 3,Genre 3 Percent Female, Genre 3 Average Age,Genre 4,Genre 4 Percent Female, Genre 4 Average Age,Genre 5,Genre 5 Percent Female, Genre 5 Average Age,Genre 6,Genre 6 Percent Female, Genre 6 Average Age,Genre 7,Genre 7 Percent Female, Genre 7 Average Age,Genre 8,Genre 8 Percent Female, Genre 8 Average Age,Overall Percent Female, Overall Average Age\n"

for song in BLACKLIST_SONGS:

    for artist in song['artists']:

        average_age = 0
        percent_female = 0
        
        song_artists.append(artist['name'])

        genre_details = [',,,',',,,',',,,',',,,',',,,', ',,,', ',,,',',,,']

        # Get genre details
        artist_details = artist_dict[artist['id']]
        num_genres = 0
        for genre in artist_details['genres']:
            # print(f"{genre}\taverage_age:{GENRE_INFO[genre]['average_age']}\tpercent_female:{GENRE_INFO[genre]['percent_female']}")
            if (GENRE_INFO[genre]['average_age'] != 0):
                average_age += GENRE_INFO[genre]['average_age']
                percent_female += GENRE_INFO[genre]['percent_female']
                song_average_age += GENRE_INFO[genre]['average_age']
                song_percent_female += GENRE_INFO[genre]['percent_female']
                num_genres += 1
                song_num_genres += 1
                #print(num_genres)
                genre_details[num_genres - 1] = f"{genre},{GENRE_INFO[genre]['average_age']},{GENRE_INFO[genre]['percent_female']},"
                #print(genre_details)

        if (num_genres > 0):
            average_age /= num_genres
            percent_female /= num_genres

        csv += f"\"{artist['name']}\",{"".join(genre_details)}{average_age},{percent_female}\n"

f = open("artist_average_demographics.csv", "w")
f.write(csv)
f.close()
            