In [1]:
# Import dependencies

import pandas as pd
from matplotlib import pyplot as plt
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from config import client_id, secret, api_key

In [2]:
# Establish connection to Spotify

client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [3]:
# Create empty lists to store data from API calls

artist_name = []
track_name = []
track_id = []
popularity = []
artist_id = []

# Run a for loop to call the API multiple times for a list of tracks

# i represents each call of the Spotify search API, range is maximum results, step 50 represents 50 results per search
for i in range(0,2000,50):
    
    # Save results of each search as a variable. Limit search to 50 results, use offset to skip previous result of iteration
    track_results = sp.search(q='year:2019', type='track', limit=50,offset=i)
    
    # Loop through each result to find values for selected fields
    # Enumerate creates a tuple of each track result for iteration. Example: (0, J. Cole), (1, Ariana Grande), etc.
    for i, t in enumerate(track_results['tracks']['items']):
        artist_id.append(t['artists'][0]['id'])
        artist_name.append(t['artists'][0]['name'])
        track_name.append(t['name'])
        track_id.append(t['id'])
        popularity.append(t['popularity'])

In [4]:
genres = []

for artist in range(0,2000):
    artist_results = sp.artist(artist_id[artist])
    genres.append(artist_results["genres"])

In [5]:
# Create empty lists to store audio feature values
danceability = []
energy = []
loudness = []
valence = []
tempo = []

for track in range(0,2000):
    try:
        audio_features = sp.audio_features(track_id[track])
        danceability.append(audio_features[0]["danceability"])
        energy.append(audio_features[0]["energy"])
        loudness.append(audio_features[0]["loudness"])
        valence.append(audio_features[0]["valence"])
        tempo.append(audio_features[0]["tempo"])
    except TypeError:
        danceability.append("None")
        energy.append("None")
        loudness.append("None")
        valence.append("None")
        tempo.append("None")

In [29]:
features_df = pd.DataFrame({"Track ID": track_id, "Artist Name": artist_name, "Track Name":track_name,
                            "Danceability":danceability, "Energy":energy, "Loudness":loudness,
                            "Valence":valence, "Tempo": tempo})
features_df.to_csv("audio_features.csv", index=False)
features_df.head()

Unnamed: 0,Track ID,Artist Name,Track Name,Danceability,Energy,Loudness,Valence,Tempo
0,2JvzF1RMd7lE3KmFlsyZD8,J. Cole,MIDDLE CHILD,0.837,0.364,-11.713,0.463,123.984
1,53CJANUxooaqGOtdsBTh7O,Lil Nas X,Old Town Road,0.908,0.517,-6.063,0.471,136.019
2,2Fxmhks0bxGSBdJ92vM42m,Billie Eilish,bad guy,0.708,0.418,-10.998,0.578,131.926
3,4kV4N9D1iKVxx1KLvtTpjS,Ariana Grande,"break up with your girlfriend, i'm bored",0.726,0.554,-5.29,0.335,169.999
4,4y3OI86AEP6PQoDE6olYhO,Jonas Brothers,Sucker,0.842,0.734,-5.065,0.952,137.958


In [30]:
genre_df = pd.DataFrame({"Artist ID": artist_id, "Artist Name": artist_name, "Genres": genres})
genre_df.to_csv("genres.csv", index=False)
genre_df.head()

Unnamed: 0,Artist ID,Artist Name,Genres
0,6l3HvQ5sa6mXTsMTB19rO5,J. Cole,"[conscious hip hop, nc hip hop, pop rap, rap]"
1,7jVv8c5Fj3E9VhNjxT4snq,Lil Nas X,[]
2,6qqNVTkY8uBg9cP3Jd7DAH,Billie Eilish,[electropop]
3,66CXWjxzNUsdJxJ2JdwvnR,Ariana Grande,"[dance pop, pop, post-teen pop]"
4,7gOdHgIoIKoe4i9Tta6qdD,Jonas Brothers,"[boy band, dance pop, pop, post-teen pop]"


In [32]:
popularity_df = pd.DataFrame({"Artist ID": artist_id, "Artist Name": artist_name, "Track ID": track_id,
                              "Track Name": track_name, "Popularity Score": popularity})
popularity_df.to_csv("popularity.csv", index=False)
popularity_df.head()

Unnamed: 0,Artist ID,Artist Name,Track ID,Track Name,Popularity Score
0,6l3HvQ5sa6mXTsMTB19rO5,J. Cole,2JvzF1RMd7lE3KmFlsyZD8,MIDDLE CHILD,96
1,7jVv8c5Fj3E9VhNjxT4snq,Lil Nas X,53CJANUxooaqGOtdsBTh7O,Old Town Road,95
2,6qqNVTkY8uBg9cP3Jd7DAH,Billie Eilish,2Fxmhks0bxGSBdJ92vM42m,bad guy,98
3,66CXWjxzNUsdJxJ2JdwvnR,Ariana Grande,4kV4N9D1iKVxx1KLvtTpjS,"break up with your girlfriend, i'm bored",98
4,7gOdHgIoIKoe4i9Tta6qdD,Jonas Brothers,4y3OI86AEP6PQoDE6olYhO,Sucker,98


In [21]:
split_genres = []
split_artists = []
split_track = []
split_popularity = []

for x in range(len(genres)):
    print(genres[x])
    for y in genres[x]:
        #print(y, artist_name[x])
        split_genres.append(y)
        split_artists.append(artist_name[x])
        split_track.append(track_name[x])
        split_popularity.append(popularity[x])

['conscious hip hop', 'nc hip hop', 'pop rap', 'rap']
[]
['electropop']
['dance pop', 'pop', 'post-teen pop']
['boy band', 'dance pop', 'pop', 'post-teen pop']
['cali rap', 'hip hop', 'pop', 'pop rap', 'rap', 'southern hip hop', 'trap music']
['rap', 'trap music']
['chicago rap']
['pop']
['dance pop', 'pop', 'post-teen pop']
['pop', 'post-teen pop']
[]
['dance pop', 'pop', 'post-teen pop']
['pop', 'rap']
['dance pop', 'pop', 'post-teen pop']
['electropop']
['dance pop', 'pop', 'post-teen pop']
['trap music']
['brostep', 'egyptian pop', 'progressive electro house']
['pop']
['dance pop', 'pop', 'post-teen pop']
['electropop']
['electropop']
['electropop']
['electropop']
['electropop']
['pop']
['dance pop', 'pop', 'post-teen pop']
['chicago rap']
['latin', 'latin hip hop', 'pop', 'reggaeton', 'tropical']
['electropop']
['dance pop', 'pop', 'post-teen pop']
['electropop']
['electropop']
['chicago rap']
['chicago rap']
['pop']
['dance pop', 'electropop', 'pop']
['electropop']
['pop']
['danc

[]
['modern rock']
['melodic metalcore', 'metalcore', 'rock', 'screamo']
['latin', 'latin pop', 'salsa', 'tropical']
['dance pop', 'pop', 'post-teen pop', 'r&b', 'tropical house', 'uk pop']
['alternative hip hop', 'dirty south rap', 'gangster rap', 'hip hop', 'new orleans rap', 'pop rap', 'rap', 'southern hip hop', 'underground hip hop']
['dance pop', 'pop', 'post-teen pop', 'r&b', 'tropical house', 'uk pop']
[]
['atmosphere']
['big room', 'brostep', 'dance pop', 'edm', 'electro house', 'house', 'pop', 'trance', 'tropical house']
['dance pop', 'electropop', 'pop', 'post-teen pop', 'r&b']
['big room', 'brostep', 'dance pop', 'edm', 'electro house', 'house', 'pop', 'trance', 'tropical house']
['dark trap', 'drill', 'hip hop', 'miami hip hop', 'pop rap', 'rap', 'trap music', 'underground hip hop']
['deep tropical house', 'edm', 'electro house', 'indie poptimism', 'pop edm', 'tropical house']
['big room', 'brostep', 'dance pop', 'edm', 'electro house', 'house', 'pop', 'trance', 'tropical h

In [34]:
split_df = pd.DataFrame({"Artist Name": split_artists, "Track Name": split_track,
                         "Genre": split_genres, "Popularity Score": split_popularity})
split_df.to_csv("split_genre.csv", index=False)
split_df.head()

Unnamed: 0,Artist Name,Track Name,Genre,Popularity Score
0,J. Cole,MIDDLE CHILD,conscious hip hop,96
1,J. Cole,MIDDLE CHILD,nc hip hop,96
2,J. Cole,MIDDLE CHILD,pop rap,96
3,J. Cole,MIDDLE CHILD,rap,96
4,Billie Eilish,bad guy,electropop,98


In [24]:
avg_pop_df = split_df.groupby("Genre").mean()

In [25]:
avg_pop_df.head()

Unnamed: 0_level_0,Popularity Score
Genre,Unnamed: 1_level_1
a cappella,66.0
acoustic pop,59.928571
adult standards,41.333333
album rock,49.0
alternative dance,61.833333
