# Querying Genius API for Song Credits

In [1]:
import json
import numpy as np
import pandas as pd
import pickle
import requests
import time

In [2]:
with open('../data/genius_ids.json', 'r') as f:
    genius_ids = json.load(f)

In [20]:
df = pd.read_csv('../data/spotify_artists_clean.csv', index_col=0)

In [23]:
df['name'].iloc[1]

'"Weird Al" Yankovic'

#### Gathering Artist IDs from `genius_ids`

In [2]:
# genius_ids[0]

In [12]:
# genius_ids[0] - for a given artist & their associated search details
# genius_ids[0]['*NSYNC']['response']['hits'] - the actual search results
# genius_ids[0]['*NSYNC']['response']['hits'][0]['result']['primary_artist'] - information for the artist
# genius_ids[0]['*NSYNC']['response']['hits'][0]['result']['primary_artist']['id'] - Genius Artist ID
# genius_ids[0]['*NSYNC']['response']['hits'][0]['result']['primary_artist']['name'] - Genius Artist name

{'api_path': '/artists/8625',
 'header_image_url': 'https://s3.amazonaws.com/rapgenius/1362260617_51058047.png',
 'id': 8625,
 'image_url': 'https://s3.amazonaws.com/rapgenius/1362260617_51058047.png',
 'is_meme_verified': False,
 'is_verified': False,
 'name': "'N Sync",
 'url': 'https://genius.com/artists/N-sync'}

In [120]:
genius_artist_ids = []
i = -1

for artist in genius_ids:
    ar = {}
    i += 1
    for result in artist[df['name'].iloc[i]]['response']['hits']:
        ar[result['result']['primary_artist']['name']] = result['result']['primary_artist']['id']
    genius_artist_ids.append(dict({df['name'].iloc[i] : ar}))

In [121]:
genius_artist_ids[0]

{'*NSYNC': {"'N Sync": 8625}}

In [63]:
len(list(genius_artist_ids[2].values())[0].keys())

4

##### Dumping `genius_artist_ids` into json file

In [35]:
with open('../data/genius_artist_ids.json', 'w') as f:
    json.dump(genius_artist_ids, f)

I'm really not sure what the best strategy for retrieving all of the songs I need, though I'm considering a scorched earth routine, where I just retrieve every song corresponding to every artist id I've retrieved.

In [150]:
len(genius_artist_ids)

2184

##### Looking at Artists -w- More Than One Result

In [172]:
multiple_ids = []

for artist in genius_artist_ids:
    if len(list(artist.values())[0].keys()) > 1:
        multiple_ids.append(artist)

There are a shit ton of multiple results. I wonder what is the best way to filter through all of these.

In [113]:
multiple_ids[:5]

[{'10 Years': {'10 Years': 89600,
   'Tennis': 11394,
   'Bullet For My Valentine': 37666,
   'Travis Mendes': 346989}},
 {'112': {'112': 504, 'Jay Rock': 1403, 'Slim of 112': 665236}},
 {'2 Chainz': {'2 Chainz': 14325,
   '2 Chainz & Wiz Khalifa': 51112,
   'ScHoolboy Q, 2 Chainz & Saudi': 1357232}},
 {'21 Savage': {'21 Savage': 430404,
   '21 Savage & Metro Boomin': 980465,
   '21 Savage, Offset & Metro Boomin': 1249916}},
 {'311': {'311': 10440, 'Emily Dickinson': 717, 'Wale': 396}}]

In [114]:
multiple_ids_2 = []

for artist in multiple_ids:
    for k, v in artist.items():
        for key, value in v.items():
            if k == key:
                multiple_ids_2.append(dict({k : dict({key:value})}))

In [116]:
len(multiple_ids_2)

793

In [117]:
multiple_ids_2[:5]

[{'10 Years': {'10 Years': 89600}},
 {'112': {'112': 504}},
 {'2 Chainz': {'2 Chainz': 14325}},
 {'21 Savage': {'21 Savage': 430404}},
 {'311': {'311': 10440}}]

##### Looking at Length of all the Genius ID files

In [125]:
len(genius_artist_ids), len(multiple_ids), len(multiple_ids_2)

(2449, 1058, 793)

##### Removing Result from `genius_artist_ids` if it's part of `multiple_ids`

In [128]:
for artist in multiple_ids:
    if artist in genius_artist_ids:
        genius_artist_ids.remove(artist)

In [130]:
len(genius_artist_ids)

1391

##### Adding the Filtered `multiple_ids_2` results back into `genius_artist_ids`

In [131]:
for artist in multiple_ids_2:
    genius_artist_ids.append(artist)

In [133]:
len(genius_artist_ids)

2184

##### Dumping singluar `genius_artist_ids` into json

In [152]:
with open('../data/genius_artist_ids_single.json', 'w') as f:
    json.dump(genius_artist_ids, f)

##### Retrieving Old `genius_artist_ids` listing to Filter Down `multiple_ids` Results

In [153]:
with open('../data/genius_artist_ids.json', 'r') as f:
    genius_artist_ids = json.load(f)

In [154]:
len(genius_artist_ids)

2449

In [176]:
len(multiple_ids)

1058

In [195]:
filtered_ids = [list(row.keys())[0] for row in multiple_ids_2]
filtered_ids[3]

'21 Savage'

In [188]:
for artist in multiple_ids:
    if list(artist.keys())[0] in filtered_ids:
        multiple_ids.remove(artist)

In [196]:
list(multiple_ids[0].keys())[0]

'21 Savage'

##### Verdict On Multple ID entries

I can't seem to accurately remove all of the entries I need to...therefore, I'll run through the single entry IDs first, and go scorched earth on the other ones afterwards.

##### Retrieving `genius_artist_ids_single` to Query Genius API

In [3]:
with open('../data/genius_artist_ids_single.json', 'r') as f:
    genius_artist_ids_single = json.load(f)

In [20]:
len(genius_artist_ids_single)

2184

My first pass-through of the Genius API song ids will be for this many songs

### Querying Genius API with all artist_ids

In [207]:
list(list(genius_artist_ids_single[0].values())[0].values())[0]

8625

In [4]:
access_token = '&access_token=IwvfcN2fO8fNm5ufyApn1c8Z9X3Nud-Z6wJhTeKocdLDR_Jtj6bW6POwfT81u6Of'

In [5]:
with open('../data/genius_song_ids.json', 'w') as f:
    json.dump([], f)

In [30]:
for artist in genius_artist_ids_single[353:]:
    try:
        url = 'https://api.genius.com/artists/' + str(list(list(artist.values())[0].values())[0]) + '/songs/?sortsort=popularity&per_page=50' + access_token
    except:
        continue
    try:
        r = requests.get(url)
        song_ids.append(r.json())
    except:
        time.sleep(1)
        r = requests.get(url)
        song_ids.append(r.json())
with open('../data/genius_song_ids.json', mode='w', encoding='utf-8') as f:
    json.dump(song_ids, f)    

I'm including the first `try / except` because some of my singular id entries don't actually have an id. This seems strange. I'll have to investigate this after my loop is done processing.

In [29]:
genius_artist_ids_single[353]

{'DJ Magic Mike and DJ Magic Mike and the Royal Possethe Royal Posse': {}}

In [31]:
len(song_ids)

2178

##### Grabbing Remaining Song IDs

After going through the list of artist ids, I lost about 6 entries. I'm going to forego looking further into this until I actually grab all the songs I need.

#### Testing Song ID Against Genius Song Search

In [13]:
url_test = 'https://api.genius.com/songs/1352728'
headers = {'Authorization':'Bearer IwvfcN2fO8fNm5ufyApn1c8Z9X3Nud-Z6wJhTeKocdLDR_Jtj6bW6POwfT81u6Of'}

In [14]:
r_test = requests.get(url_test,
                      headers=headers)

In [19]:
# r_test.json()