### Getting Songs
This notebook goes through the steps of getting songs from the Genius API and updating existing songs in the database that reference those songs.

In [1]:
import requests
import cnfg
import json
import pickle
import time
from datetime import datetime
from collections import defaultdict
from urlparse import urlparse
from pymongo import MongoClient

In [2]:
config = cnfg.load(".genius_config")
client_key = config["client_key"]
client_secret = config["client_secret"]
access_token = 'uK247ank6jF7DY_E7TEMIdbuhT2IBVsVeFXh6AluxswFUfrlFNfXNu2pUFPrGv2c'
auth = 'Bearer '+access_token
req_start = 'https://api.genius.com'

In [78]:
client = MongoClient()
db = client.music
s = db.songs

In [4]:
def pickleLoad(filename):
    with open(filename, 'rb') as f:
        data = pickle.load(f) 
    return data

def pickleDump(data, filename):
    with open(filename, 'wb') as f:
        pickle.dump(data, f)

In [5]:
def api_call(validurl):
    call = validurl
    r = requests.get(call, headers={'Authorization':auth})
    if r.status_code==200:
        return json.loads(r.content)
    else:
        print r.content
        return r.content

In [6]:
def get_annotations(song):
    referenced = defaultdict(list)
    if type(song)==int:
        myrequest = req_start+'/referents?song_id='+str(song)
        r = requests.get(myrequest, headers={'Authorization':auth})
        refs_json = json.loads(r.content)
    elif type(song)==dict:
        refs_json = song
    for ref in refs_json['response']['referents']:
        for note in ref['annotations']:
            for child in note['body']['dom']['children']:
                if 'children' in child:
                    for element in child['children']:
                        if 'attributes' in element:
                            if 'href' in element['attributes']:
                                parsed = urlparse(element['attributes']['href'])
                                if parsed.netloc=="genius.com":
                                    try:
                                        apiurl = req_start+element['data']['api_path']
                                        annotype = urlparse(apiurl).path.split('/')[1]
                                        if annotype == 'albums':
                                            referenced['albums'].append(apiurl.encode('utf-8'))
                                        elif annotype == 'artists':
                                            referenced['artists'].append(apiurl.encode('utf-8'))
                                        elif annotype == 'songs':
                                            if urlparse(element['attributes']['href']).path.split('/')[-1][-6:] == 'lyrics':
                                                referenced['songs'].append(apiurl.encode('utf-8'))
                                            else:
                                                referenced['other'].append(apiurl.encode('utf-8'))
                                    except:
                                        continue
                                            
    return dict(referenced)

In [7]:
def description_refs(song):
    description = defaultdict(list)
    if type(song)==int:
        myrequest = req_start+'/songs/'+str(song)
        r = requests.get(myrequest, headers={'Authorization':auth})
        refs_json = json.loads(r.content)
    elif type(song)==dict:
        refs_json = song
    for paragraph in refs_json['response']['song']['description']['dom']['children']:
        if 'children' in paragraph:
            for child in paragraph['children']:
                if 'attributes' in child:
                    if 'href' in child['attributes']:
                        url = child['attributes']['href']
                        if urlparse(url).netloc=='genius.com':
                            try:
                                apiurl = req_start+child['data']['api_path']
                                annotype = urlparse(apiurl).path.split('/')[1]
                                if annotype == 'albums':
                                    description['albums'].append(apiurl.encode('utf-8'))
                                elif annotype == 'artists':
                                    description['artists'].append(apiurl.encode('utf-8'))
                                elif annotype == 'songs':
                                    if urlparse(child['attributes']['href']).path.split('/')[-1][-6:] == 'lyrics':
                                        description['songs'].append(apiurl.encode('utf-8'))
                                    else:
                                        description['other'].append(apiurl.encode('utf-8'))
                            except:
                                continue
    return dict(description)

In [8]:
# pass one of the two following:
# song['references'] for referents or
# song['description_refs'] for references in description

def get_neighbors(song):
    neighbors = defaultdict(list)
    for item, vals in song.iteritems():
        if item=='artists':
            for artist in vals:
                artistid = artist.rpartition('/')[2]
                if s.find({'featured_artists': {'$elemMatch':{'id':int(artistid)}}}).count():
                    name = s.find({'featured_artists': {'$elemMatch':{'id':int(artistid)}}}, 
                                  {'featured_artists': {'$elemMatch':{'id':int(artistid)}}}).next()
                    artistname = name['featured_artists'][0]['artist']
                    neighbors['artist'].append({'name': artistname, 'id': artistid})
                elif s.find({'writers': {'$elemMatch':{'id':int(artistid)}}}).count():
                    name = s.find({'writers': {'$elemMatch':{'id':int(artistid)}}}, 
                                  {'writers': {'$elemMatch':{'id':int(artistid)}}}).next()
                    artistname = name['writers'][0]['artist']
                    neighbors['artist'].append({'name': artistname, 'id': artistid})
                elif s.find({'producers': {'$elemMatch':{'id':int(artistid)}}}).count():
                    name = s.find({'producers': {'$elemMatch':{'id':int(artistid)}}}, 
                                  {'producers': {'$elemMatch':{'id':int(artistid)}}}).next()
                    artistname = name['producers'][0]['artist']
                    neighbors['artist'].append({'name': artistname, 'id': artistid})
                elif s.find({'artist_id': int(artistid)}, {'artist':1}).count():
                    name = s.find({'artist_id': int(artistid)}, {'artist': 1}).next()
                    neighbors['artist'].append({'name': name['artist'], 'id': artistid})
                else:
                    time.sleep(7)
                    name = api_call(artist)['response']['artist']['name']
                    neighbors['artist'].append({'name': name, 'id': artistid})
        elif item == 'albums':
            for album in vals:
                albumid = album.rpartition('/')[2]
                if s.find({'album.id': int(albumid)}).count():
                    name = s.find({'album.id': int(albumid)}, {'album.name': 1}).next()
                    albumname = name['album']['name']
                    neighbors['album'].append({'name': albumname, 'id': albumid})
                else:
                    time.sleep(7)
                    name = api_call(album)['response']['album']['name']
                    neighbors['album'].append({'name': name, 'id': albumid})
        elif item == 'songs':
            for song in vals:
                songid = song.rpartition('/')[2]
                if s.find({'id': int(songid)}).count(): 
                    data = s.find({'id': int(songid)}, {'title':1, 'artist':1}).next()
                    title = data['title']
                    artist = data['artist']
                    neighbors['song'].append({'title':title, 'artist':artist, 'id':songid})
                else:
                    neighbors['song_unannotated'].append({'id':songid})
        elif item == 'other':
            for other in vals:
                otherid = other.rpartition('/')[2]
                name = api_call(other)['response']['song']['title']
                neighbors['other'].append({'title':name, 'id':otherid})
                
    return neighbors

In [9]:
def annotate_song(songid):
    if s.find({'id':songid}).count() > 0:
        return s.find({'id':songid}).next()
    else:
        song_info = {}
        myrequest = req_start+'/songs/'+str(songid)
        r = requests.get(myrequest, headers={'Authorization':auth})
        song_json = json.loads(r.content)
        song_info['title'] = str(song_json['response']['song']['title'])
        song_info['artist'] = str(song_json['response']['song']['primary_artist']['name'])
        song_info['artist_id'] = song_json['response']['song']['primary_artist']['id']
        pickle_title = '_'.join(song_info['title'].split())+"_"+'_'.join(song_info['artist'].split())+".pkl"
        pickleDump(song_json, pickle_title)
        if song_json['response']['song']['album'] != None:
            song_info['album'] = {'name': str(song_json['response']['song']['album']['name']), 
                              'id': song_json['response']['song']['album']['id']}
        song_info['id'] = song_json['response']['song']['id']

        # may or may not exist
        song_info['featured_artists'] = []
        song_info['writers'] = []
        song_info['producers'] = []
        
        if song_json['response']['song']['sampled_songs']:
            song_info['sampled_songs'] = song_json['response']['song']['sampled_songs']
        if song_json['response']['song']['sampling_songs']:
            song_info['sampling_songs'] = song_json['response']['song']['sampling_songs']

        for artist in song_json['response']['song']['featured_artists']:
            if artist['name']!=song_info['artist']:
                song_info['featured_artists'].append({'artist':artist['name'], 'id':artist['id']})
        for artist in song_json['response']['song']['producer_artists']:
            if artist['name']!=song_info['artist']:
                song_info['producers'].append({'artist':artist['name'], 'id':artist['id']})
        for artist in song_json['response']['song']['writer_artists']:
            if artist['name']!=song_info['artist']:
                song_info['writers'].append({'artist':artist['name'], 'id':artist['id']})

        if not song_info['featured_artists']:
            del song_info['featured_artists']
        if not song_info['writers']:
            del song_info['writers']
        if not song_info['producers']:
            del song_info['producers']

        # references!!
        time.sleep(7)
        song_info['references'] = dict(get_neighbors(get_annotations(songid)))
        time.sleep(7)
        song_info['description_refs'] = dict(get_neighbors(description_refs(song_json)))
        
        pickled_annotation = pickle_title[0:-4]+"_annotated.pkl"
        pickleDump(song_info, pickled_annotation)
        return song_info

In [10]:
newsongs = [1531904,
 550920,
 620555,
 2396178,
 2284206,
 1839127,
 51227,
 526365,
 508933,
 2218017,
 2082,
 2310181,
 245798,
 520237,
 2416699,
 1953852,
 61,
 38978,
 730057,
 68,
 2283589,
 414018,
 1966157,
 1378382,
 92856,
 4186,
 2214586,
 2263139,
 69732,
 51301,
 835688,
 108,
 206963,
 215157,
 690295,
 2171,
 118483,
 1200259,
 897159,
 436363,
 1317005,
 446608,
 1824913,
 2196,
 1978519,
 2204,
 2437280,
 88225,
 65701,
 2275494,
 168,
 4270,
 1742877,
 102576,
 181,
 1128630,
 2368201,
 51388,
 2339009,
 192545,
 235725,
 911574,
 2001112,
 30941,
 2177246,
 32995,
 233,
 2286,
 2308338,
 156373,
 592901,
 72405,
 20525,
 1478928,
 405779,
 2414874,
 2255131,
 4381,
 1548580,
 1394985,
 440618,
 33067,
 379183,
 2423091,
 2138429,
 4417,
 1820994,
 28995,
 2435401,
 2390346,
 829090,
 737615,
 518484,
 387414,
 176475,
 2277244,
 958524,
 672108,
 192879,
 375,
 70012,
 1935742,
 2275721,
 29066,
 2443,
 396,
 622989,
 414099,
 47508,
 65947,
 35229,
 2275742,
 29087,
 41376,
 2177091,
 672155,
 1778090,
 399792,
 1778100,
 436662,
 444,
 2431426,
 688579,
 452,
 170443,
 2374092,
 82339,
 35293,
 680415,
 57824,
 733666,
 35301,
 1847784,
 2402796,
 207343,
 725500,
 4609,
 518,
 713548,
 78347,
 526,
 395791,
 2284049,
 16914,
 4636,
 2433565,
 1114654,
 2435615,
 4640,
 2400811,
 2609,
 2620,
 2433598,
 2302528,
 45634,
 2302535,
 45643,
 2430733,
 35414,
 2042461,
 210364,
 1790573,
 2275954,
 709240,
 634,
 515519,
 2112517,
 2421373,
 2329215,
 131712,
 731783,
 696428,
 51652,
 2210470,
 393899,
 131758,
 690,
 2124472,
 416444,
 92862,
 2142915,
 2396871,
 2423497,
 2409162,
 32546,
 718,
 694992,
 2057145,
 101077,
 58072,
 2388703,
 2155243,
 115451,
 725757,
 361214,
 2433799,
 338701,
 506641,
 1415961,
 66330,
 121627,
 117534,
 1891103,
 33573,
 1995564,
 33581,
 2423601,
 912179,
 822,
 826,
 1696577,
 174916,
 2892,
 446605,
 27472,
 160600,
 29529,
 47967,
 185191,
 47982,
 437103,
 880,
 2327412,
 2171992,
 1989637,
 246656,
 123873,
 2325382,
 2028423,
 279437,
 1850256,
 1702804,
 158615,
 1465241,
 2275787,
 66467,
 2397096,
 1392455,
 82860,
 1172403,
 27574,
 1782713,
 76731,
 2328394,
 209861,
 2413513,
 433098,
 293841,
 667130,
 2325471,
 848,
 2311139,
 1182694,
 115688,
 58346,
 2163879,
 50156,
 431093,
 49680,
 60408,
 2270213,
 610479,
 6320,
 1060,
 2081829,
 637994,
 349229,
 1629231,
 502834,
 230455,
 578626,
 1420355,
 1971271,
 302157,
 547918,
 2296914,
 1674324,
 1555542,
 494680,
 1113,
 668175,
 2292829,
 746593,
 390244,
 720401,
 2417511,
 466028,
 709741,
 279669,
 15479,
 312444,
 2413699,
 93317,
 1940620,
 56477,
 1954979,
 3239,
 2329769,
 11435,
 459948,
 1799343,
 23070,
 640182,
 1973431,
 361656,
 31934,
 2311363,
 2311373,
 373966,
 1240,
 2307291,
 1559777,
 2286821,
 38118,
 468201,
 13556,
 1942775,
 1277,
 2426112,
 380162,
 19715,
 410884,
 722193,
 2295060,
 44311,
 376091,
 1860893,
 707870,
 2153759,
 658724,
 1318,
 30258,
 521525,
 2297738,
 392510,
 357695,
 48453,
 2315590,
 699959,
 94434,
 13648,
 378195,
 13656,
 2409818,
 1779037,
 1944935,
 533873,
 389694,
 451958,
 722296,
 19348,
 38270,
 720259,
 433542,
 62862,
 2332048,
 23955,
 583,
 310705,
 290230,
 110010,
 2119099,
 2262460,
 329151,
 38340,
 476615,
 56776,
 112079,
 417235,
 1791444,
 1493,
 591,
 2086372,
 114153,
 1517,
 703741,
 1620904,
 372211,
 2018804,
 556538,
 9723,
 245249,
 403031,
 2262544,
 914961,
 1283,
 2117152,
 513574,
 7722,
 3627,
 226866,
 130616,
 1637950,
 56902,
 2055751,
 1931490,
 1612,
 52814,
 8120,
 156955,
 742999,
 720473,
 1869407,
 1980001,
 394854,
 2295400,
 1574514,
 2145913,
 118395,
 210558,
 491142,
 89736,
 2434697,
 693900,
 2024045,
 2416274,
 2100885,
 2176665,
 2123421,
 607906,
 1365667,
 1242795,
 122542,
 54967,
 523961,
 2408122,
 2176703,
 124611,
 2178762,
 5835,
 700112,
 20179,
 218837,
 601817,
 607964,
 743136,
 1666786,
 124643,
 2373349,
 428668,
 2178795,
 2275965,
 319225,
 114429,
 2228864,
 1560323,
 452357,
 716553,
 1805,
 1820438,
 983001,
 2342690,
 2332455,
 2420521,
 1541932,
 2324269,
 698675,
 2268248,
 51003,
 734517,
 1943366,
 2414561,
 118602,
 87884,
 2287448,
 2414425,
 2408283,
 200546,
 61287,
 282475,
 409461,
 24440,
 741241,
 186250,
 729410,
 3984,
 2254750,
 307105,
 427939,
 206760,
 2068393,
 1996715,
 2334638,
 530356,
 2308425,
 51128,
 1800122,
 92490,
 2181057,
 2379715,
 33782,
 1993,
 1698,
 73037,
 1935314,
 2205656,
 2424793,
 1978332,
 71649,
 1986530,
 57315,
 1402857,
 2215917]

In [11]:
unable_to_insert = []

In [12]:
for songid in newsongs:
    if s.find({'id':songid}).count()>0:
        print "song id ", songid, " in Mongo already"
        continue
    else:
        try:
            annotated = annotate_song(songid)
            s.insert_one(annotated)
            print "!!!inserted ", songid, " to Mongo!!!!"
            time.sleep(7)
        except:
            unable_to_insert.append(songid)
            continue

!!!inserted  680415  to Mongo!!!!
!!!inserted  57824  to Mongo!!!!
!!!inserted  733666  to Mongo!!!!
!!!inserted  35301  to Mongo!!!!
!!!inserted  1847784  to Mongo!!!!
!!!inserted  2402796  to Mongo!!!!
!!!inserted  207343  to Mongo!!!!
!!!inserted  725500  to Mongo!!!!
!!!inserted  4609  to Mongo!!!!
!!!inserted  518  to Mongo!!!!
!!!inserted  78347  to Mongo!!!!
!!!inserted  526  to Mongo!!!!
!!!inserted  395791  to Mongo!!!!
!!!inserted  2284049  to Mongo!!!!
!!!inserted  16914  to Mongo!!!!
!!!inserted  4636  to Mongo!!!!
!!!inserted  2433565  to Mongo!!!!
!!!inserted  1114654  to Mongo!!!!
!!!inserted  2435615  to Mongo!!!!
!!!inserted  4640  to Mongo!!!!
!!!inserted  2400811  to Mongo!!!!
!!!inserted  2609  to Mongo!!!!
!!!inserted  2620  to Mongo!!!!
!!!inserted  2433598  to Mongo!!!!
!!!inserted  2302528  to Mongo!!!!
!!!inserted  45634  to Mongo!!!!
!!!inserted  2302535  to Mongo!!!!
!!!inserted  45643  to Mongo!!!!
!!!inserted  2430733  to Mongo!!!!
!!!inserted  35414  to Mo

#### before updating

#### After updating

In [21]:
s.find({'id':2415289}).next()

{u'_id': ObjectId('56ef4e178957421b672a9613'),
 u'album': {u'id': 132506, u'name': u'Views From The 6'},
 u'artist': u'Drake',
 u'artist_id': 130,
 u'description_refs': {u'album': [{u'id': u'132506',
    u'name': u'Views From The 6'}],
  u'other': [{u'id': u'2415314', u'title': u'War Pain'}],
  u'song': [{u'artist': u'Drake',
    u'id': u'2413886',
    u'title': u'OVO Sound Radio Episode 14 Tracklist'}]},
 u'id': 2415289,
 u'producers': [{u'artist': u'Noah \u201c40\u201d Shebib', u'id': 27612},
  {u'artist': u'Boi-1da', u'id': 662},
  {u'artist': u'Brian Bennett', u'id': 634949},
  {u'artist': u'Cubeatz', u'id': 32429}],
 u'references': {u'album': [{u'id': u'120604', u'name': u'The Life of Pablo'}],
  u'artist': [{u'id': u'1632', u'name': u'Tory Lanez'},
   {u'id': u'1632', u'name': u'Tory Lanez'},
   {u'id': u'27638', u'name': u'KeY Wane'},
   {u'id': u'1319', u'name': u'Meek Mill'}],
  u'other': [{u'id': u'1791', u'title': u'Gorgeous'}],
  u'song': [{u'artist': u'Tory Lanez',
    u'i

In [20]:
def insert_neighbors(neighbors):
    cant_insert = []
    for songid in neighbors:
        if s.find({'id':songid}).count()>0:
            print "song id ", songid, " in Mongo already"
            continue
        else:
            try:
                annotated = annotate_song(songid)
                s.insert_one(annotated)
                print "inserted ", songid, " to Mongo!!"
                to_insert = {'id':songid, 'title':annotated['title'], 'artist':annotated['artist']}
                s.update({'references.song_unannotated':{'$elemMatch':{'id':str(songid)}}},
                         {'$push': {'references.song': to_insert},
                          '$unset': {'references.song_unannotated': {'$elemMatch':{'id':str(songid)}}}
                         })
                s.update({'description_refs.song_unannotated':{'$elemMatch':{'id':str(songid)}}},
                 {'$push': {'description_refs.song': to_insert
                            },
                  '$unset': {'description_refs.song_unannotated': {'$elemMatch':{'id':str(songid)}}}
                 })
                time.sleep(7)
            except:
                cant_insert.append(songid)
                continue
    return cant_insert

In [118]:
insert_neighbors(more_songs)

calling artistid:  211094
artist name:  Remy Boyz
calling artistid:  216609
artist name:  Fetty Wap
calling artistid:  19635
artist name:  Monty
calling artistid:  324091
artist name:  P-Dice
inserted 



 496445  to Mongo!!
calling albumid:  132506
album name:  Views From The 6
inserted  2413886  to Mongo!!
calling other:  https://api.genius.com/songs/672
inserted  1812  to Mongo!!
inserted  58344  to Mongo!!
inserted  525473  to Mongo!!
calling artistid:  1630
artist name:  Ciara
inserted  2213836  to Mongo!!
inserted  583542  to Mongo!!
calling artistid:  338
artist name:  Busta Rhymes
calling artistid:  338
artist name:  Busta Rhymes
inserted  539654  to Mongo!!
song id  539654  in Mongo already
inserted  2274243  to Mongo!!
calling albumid:  143267
album name:  EVOL
calling albumid:  120604
album name:  The Life of Pablo
calling artistid:  626678
artist name:  Desiigner
calling artistid:  19139
artist name:  Shy Glizzy
inserted  2422513  to Mongo!!
calling albumid:  129455
album name:  T R A P S O U L
inserted  2315263  to Mongo!!
calling artistid:  2197
artist name:  Future
inserted  2172126  to Mongo!!
calling artistid:  59
artist name:  2Pac
inserted  2311370  to Mongo!!
song id

In [43]:
updatethis = [b['id'] for b in list(s.find({'references.song_unannotated': {'$exists':1}}, {'_id':0, 'id':1}))]
updatethis.extend([c['id'] for c in list(s.find({'references.song_unannotated': {'$exists':1}}))])
updatethis = list(set(updatethis))

In [80]:
def update_annotations(songid):
    songstoget = []
    
    try:
        song_info = s.find({'id':songid}).next()
        ref_songs = []
        failed_songs = []
        for element in song_info['references']['song_unannotated']:
            for k, v in element.iteritems():
                if s.find({'id':int(v)}).count() > 0:
                    vals = s.find({'id':int(v)}, {'artist':1, 'title':1, '_id':0}).next()
                    to_insert = {'id':v, 'artist':vals['artist'], 'title':vals['title']}
                    ref_songs.append(to_insert)
                else:
                    failed_songs.append({'id': v})
        if ref_songs and failed_songs:
            s.update_one({'id':songid}, {'$unset': {'references.song': ""}})
            s.update_one({'id':songid}, {'$unset': {'references.song_unannotated':""}})
            s.update_one({'id':songid}, {'$addToSet': {'references.song': {'$each': ref_songs}}}) 
            s.update_one({'id':songid}, {'$addToSet': {'references.song_unannotated': {'$each': failed_songs}}})
            song_info['references']['song'] = ref_songs
            song_info['references']['song_unannotated'] = failed_songs
        elif ref_songs and not failed_songs:
            s.update_one({'id':songid}, {'$unset': {'references.song': ""}})
            s.update_one({'id':songid}, {'$unset': {'references.song_unannotated':""}})
            s.update_one({'id':songid}, {'$addToSet': {'references.song': {'$each': ref_songs}}})
            song_info['references']['song'] = ref_songs
            del song_info['references']['song_unannotated']
        elif failed_songs and not ref_songs:
            s.update_one({'id':songid}, {'$unset': {'references.song': ""}})
            s.update_one({'id':songid}, {'$unset': {'references.song_unannotated':""}})
            s.update_one({'id':songid}, {'$addToSet': {'references.song_unannotated': {'$each': failed_songs}}})
            del song_info['references']['song']
            song_info['references']['song_unannotated'] = failed_songs
        
        if failed_songs:
            songstoget.extend([f['id'] for f in failed_songs])
    except:
        print "unable to update referent annotations: ", songid
    
    try: 
        song_info = s.find({'id':songid}).next()
        desc_songs = []
        failed_songs = []
        for element in song_info['description_refs']['song_unannotated']:
            for k, v in element.iteritems():
                if s.find({'id':int(v)}).count() > 0:
                    vals = s.find({'id':int(v)}, {'artist':1, 'title':1, '_id':0}).next()
                    to_insert = {'id':v, 'artist':vals['artist'], 'title':vals['title']}
                    desc_songs.append(to_insert)
                else:
                    failed_songs.append({'id': v})
        if desc_songs and failed_songs:
            s.update_one({'id':songid}, {'$unset': {'description_refs.song': ""}})
            s.update_one({'id':songid}, {'$unset': {'description_refs.song_unannotated':""}})
            s.update_one({'id':songid}, {'$addToSet': {'description_refs.song': {'$each': desc_songs}}})
            s.update_one({'id':songid}, {'$addToSet': {'description_refs.song_unannotated': {'$each': failed_songs}}})
            song_info['description_refs']['song'] = desc_songs
            song_info['description_refs']['song_unannotated'] = failed_songs
        elif desc_songs and not failed_songs:
            s.update_one({'id':songid}, {'$unset': {'description_refs.song': ""}})
            s.update_one({'id':songid}, {'$unset': {'description_refs.song_unannotated':""}})
            s.update_one({'id':songid}, {'$addToSet': {'description_refs.song': {'$each': desc_songs}}})
            song_info['description_refs']['song'] = desc_songs
            del song_info['description_refs']['song_unannotated']
        elif failed_songs and not desc_songs:
            s.update_one({'id':songid}, {'$unset': {'description_refs.song': ""}})
            s.update_one({'id':songid}, {'$unset': {'description_refs.song_unannotated':""}})
            s.update_one({'id':songid}, {'$addToSet': {'description_refs.song_unannotated': {'$each': failed_songs}}})
            del song_info['description_refs']['song']
            song_info['description_refs']['song_unannotated'] = failed_songs
        
        if failed_songs:
            songstoget.extend([f['id'] for f in failed_songs])
    except:
        print "unable to update description refs: ", songid
    
    return songstoget

In [79]:
song_info = s.find({'id':40840}).next()
desc_songs = []
failed_songs = []
for element in song_info['references']['song_unannotated']:
    for k, v in element.iteritems():
        if s.find({'id':int(v)}).count() > 0:
            vals = s.find({'id':int(v)}, {'artist':1, 'title':1, '_id':0}).next()
            to_insert = {'id':v, 'artist':vals['artist'], 'title':vals['title']}
            ref_songs.append(to_insert)
        else:
            failed_songs.append({'id': v})   

In [84]:
failed_songs

[{'id': u'2424'}, {'id': u'40852'}]

In [83]:
if failed_songs and not desc_songs:
    s.update_one({'id':songid}, {'$unset': {'description_refs.song': ""}})
    s.update_one({'id':songid}, {'$unset': {'description_refs.song_unannotated':""}})
    s.update_one({'id':songid}, {'$addToSet': {'description_refs.song_unannotated': {'$each': failed_songs}}})
    del song_info['description_refs']['song']
    song_info['description_refs']['song_unannotated'] = failed_songs

TRUE


In [49]:
songstoget = []
for sid in updatethis:
    songstoget.append(update_annotations(sid))

unable to update referent annotations:  393899
unable to update description refs:  393899
unable to update description refs:  2398213
unable to update referent annotations:  32546
unable to update description refs:  32546
unable to update referent annotations:  668175
unable to update description refs:  668175
unable to update description refs:  2163216
unable to update referent annotations:  2396178
unable to update description refs:  2396178
unable to update referent annotations:  51227
unable to update description refs:  51227
unable to update referent annotations:  4636
unable to update description refs:  4636
unable to update description refs:  1742877
unable to update referent annotations:  517
unable to update description refs:  517
unable to update referent annotations:  4640
unable to update description refs:  4640
unable to update referent annotations:  192545
unable to update description refs:  192545
unable to update referent annotations:  726052
unable to update descriptio

In [50]:
2424 in songstoget

False

In [74]:
songstoget

[[{'id': u'2292582'}, {'id': u'672664'}],
 [{'id': u'3280'}],
 [{'id': u'52561'}, {'id': u'161090'}, {'id': u'168190'}],
 [{'id': u'215155'}],
 [{'id': u'32795'}],
 [{'id': u'1780118'}, {'id': u'672'}, {'id': u'675638'}],
 [{'id': u'448558'},
  {'id': u'415593'},
  {'id': u'2355205'},
  {'id': u'2346582'},
  {'id': u'2348856'},
  {'id': u'297237'},
  {'id': u'2437479'},
  {'id': u'741987'},
  {'id': u'2355205'},
  {'id': u'2382690'},
  {'id': u'2346582'}],
 [{'id': u'1185'}, {'id': u'5342'}],
 [{'id': u'1267'}],
 [{'id': u'62635'}, {'id': u'93457'}, {'id': u'5737'}, {'id': u'54471'}],
 [{'id': u'139184'}, {'id': u'90472'}, {'id': u'90478'}, {'id': u'90477'}],
 [{'id': u'156640'}],
 [{'id': u'89'}, {'id': u'730'}],
 [{'id': u'196894'}],
 [{'id': u'1360'}],
 [{'id': u'58634'}],
 [{'id': u'1866016'}, {'id': u'117558'}],
 [{'id': u'156640'}],
 [{'id': u'466897'}, {'id': u'110325'}, {'id': u'380538'}],
 [{'id': u'2276633'}, {'id': u'2376081'}],
 [{'id': u'49616'}, {'id': u'2135788'}],
 [{'i