# Genre Extraction from Spotify API

### Objective: To access spotify API and extract genres of each artist

## 1) Installation of `spotipy` package
- Only for first-time installation



In [0]:
#pip install spotipy

## 2)Importing of essential libraries in Python 3
Libraries imported: 
1. `spotipy`, 
2. `itertools`, 
3. `pandas`, 
4. `time`, 
5. `requests`

In [0]:
#importing packages

import spotipy
import itertools
import pandas as pd
from time import sleep
import requests



## 3) Accessing Spotify API
- Extract track audio features

In [0]:
# Get access to Spotify API to get track audio features (updated June 5, 2017). 
# Plug in the client ID and client secret you get from the registering with the Spotify API
from spotipy.oauth2 import SpotifyClientCredentials
client_credentials_manager = SpotifyClientCredentials(client_id='enter_your_client_id_here',
                                                      client_secret='enter_your_client_secret_here')
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)
sp.trace = False

## 4) Removing Confounders
- Christmas songs have been known to cause confounding of clusters
- A function `RemoveChristmas` was written to remove any songs associated with Christmas

In [0]:
    #function to remove christmas-related songs\n",
    def RemoveChristmas(y):\n",
        y = [x for x in y if not ('christmas' in x)]\n",
        y = [x for x in y if not ('pop christmas' in x)]\n",
        y = [x for x in y if not ('soul christmas' in x)]\n",
        y = [x for x in y if not ('jazz christmas' in x)]\n",
        y = [x for x in y if not ('classical christmas' in x)]\n",
        y = [x for x in y if not ('world christmas' in x)]\n",
        y = [x for x in y if not ('folk christmas' in x)]\n",
        y = [x for x in y if not ('country christmas' in x)]\n",
        y = [x for x in y if not ('celtic christmas' in x)]\n",
        y = [x for x in y if not ('christmas product' in x)]\n",
        y = [x for x in y if not ('latin christmas' in x)]\n",
        y = [x for x in y if not (\"children's christmas\" in x)]\n",
        y = [x for x in y if not ('christian christmas' in x)]\n",
        y = [x for x in y if not ('heavy christmas' in x)]\n",
        y = [x for x in y if not ('indie christmas' in x)]\n",
        y = [x for x in y if not ('punk christmas' in x)]\n",
        return y"

## 5) Genre finder function
- searches for genres `find_genres`

In [0]:
#genre finder function
def find_genres(x):
    if '+' in x:  # remove all the "+" characters. this was breaking the search
        x = x.replace('+', '')
    else:
        x = x
    result0 = sp.search(q='artist:' + x, type='artist')
    
    try:
        artist_1_genre = result0['artists']['items'][0]['genres']
    except IndexError:
        artist_1_genre = ['Index Error None']

    bwj0 = RemoveChristmas(artist_1_genre)
    artist_1_genre = bwj0

    if artist_1_genre == []:
        artist_1_genre = ['None']
    return artist_1_genre

## 6) Storing audio features data extracted from Spotify API
- reading extracted data into a csv file for further analysis
- first 5 rows of dataset printed below

In [0]:
#reading dataset
audio_set = pd.read_csv('SpotifyAudioFeaturesApril2019.csv', sep=',', index_col=[0])
audio_set.head()

Unnamed: 0_level_0,track_id,track_name,acousticness,danceability,duration_ms,energy,instrumentalness,key,liveness,loudness,mode,speechiness,tempo,time_signature,valence,popularity
artist_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
YG,2RM4jf1Xa9zPgMGRDiht8O,"Big Bank feat. 2 Chainz, Big Sean, Nicki Minaj",0.00582,0.743,238373,0.339,0.0,1,0.0812,-7.678,1,0.409,203.927,4,0.118,15
YG,1tHDG53xJNGsItRA3vfVgs,BAND DRUM (feat. A$AP Rocky),0.0244,0.846,214800,0.557,0.0,8,0.286,-7.259,1,0.457,159.009,4,0.371,0
R3HAB,6Wosx2euFPMT14UXiWudMy,Radio Silence,0.025,0.603,138913,0.723,0.0,9,0.0824,-5.89,0,0.0454,114.966,4,0.382,56
Chris Cooq,3J2Jpw61sO7l6Hc7qdYV91,Lactose,0.0294,0.8,125381,0.579,0.912,5,0.0994,-12.118,0,0.0701,123.003,4,0.641,0
Chris Cooq,2jbYvQCyPgX3CdmAzeVeuS,Same - Original mix,3.5e-05,0.783,124016,0.792,0.878,7,0.0332,-10.277,1,0.0661,120.047,4,0.928,0


## 7) Finding genres according to *artist_name* 

- Initializing an empty list for storage
- Iterating through the dataframe using a *for* loop
- Loops though *artist_name* column in dataframe and appends corresponding genres based on *artist_name*


In [0]:
#execute function for each song in the dataframe
new_list = []
for artist in comparison_df['artist_name']:
    while True:
        #the following try/except statements deal with the Connection Error that
        #emerges when we try to request too much info from Spotify's endpoint
        try:
            new_list.append(find_genres(artist))
        except:
            continue
        break

retrying ...1secs
retrying ...1secs
retrying ...1secs
retrying ...1secs
retrying ...1secs
retrying ...1secs
retrying ...1secs


## 8) Combining newly generated list of genre features to main dataframe

In [0]:
audio_set['genre'] = new_list

## 9) Saving the extracted dataset as a csv file

In [0]:
#saving the dataset
audio_set.to_csv('audioset_g.csv',index=True)