# Recomend Artist Based on Embeddings

In [1]:
%run ./wikipedia_setup.ipynb

In [37]:
import gensim
from gensim.models.doc2vec import TaggedDocument, Doc2Vec
from sklearn.neighbors import KDTree
from sklearn.cluster import AgglomerativeClustering

In [59]:
pd.set_option('display.max_rows', 100)

## Configure DB & Load Model

In [3]:
db = sqlite3.Connection("var/data/music.db")

In [4]:
doc_model = Doc2Vec.load("var/models/artist_detail_model.d2v")

2021-10-30 19:36:42,190 - gensim.utils - INFO - loading Doc2Vec object from var/models/artist_detail_model.d2v
2021-10-30 19:36:42,264 - gensim.utils - INFO - loading dv recursively from var/models/artist_detail_model.d2v.dv.* with mmap=None
2021-10-30 19:36:42,264 - gensim.utils - INFO - loading wv recursively from var/models/artist_detail_model.d2v.wv.* with mmap=None
2021-10-30 19:36:42,266 - gensim.utils - INFO - setting ignored attribute cum_table to None
2021-10-30 19:36:43,135 - gensim.utils - INFO - Doc2Vec lifecycle event {'fname': 'var/models/artist_detail_model.d2v', 'datetime': '2021-10-30T19:36:43.135033', 'gensim': '4.1.2', 'python': '3.7.6 (tags/v3.7.6:43364a7ae0, Dec 19 2019, 00:42:30) [MSC v.1916 64 bit (AMD64)]', 'platform': 'Windows-10-10.0.19041-SP0', 'event': 'loaded'}


## Load Artst + Embeddings

In [18]:
artists = pd.read_sql(f"""
    select *
    from artist_embedding
""", db)
artists['DetailEmbedding'] = artists.DetailEmbedding.apply(lambda de: np.array(list(map(float, de.split(",")))))

## Find Neighbors For Artists

Use a KDTree to find artist close to others.

In [19]:
# Create a KDTree over embeddings to search
ad_emb = np.vstack(artists.DetailEmbedding)
ad_neighbors = KDTree(ad_emb)

In [20]:
def get_similar_artists(artist_pattern, n):
    s_artist = artists[artists.Artist.str.contains(artist_pattern)].iloc[0]
    n_distances, neighbors = ad_neighbors.query(s_artist.DetailEmbedding.reshape(1, -1), k=n)

    neighbor_rows = artists.iloc[neighbors[0]].copy()
    neighbor_rows['Distance'] = n_distances[0]
    return neighbor_rows

In [21]:
get_similar_artists("Dr. Dre", 10)

Unnamed: 0,Artist,ArtistLink,DetailEmbedding,Distance
2817,Dr. Dre,https://en.wikipedia.org/wiki/Dr._Dre,"[1.2855527, -0.0500333, -1.283109, -3.730988, ...",0.0
6721,N.W.A,https://en.wikipedia.org/wiki/N.W.A,"[0.9409786, -1.3837426, -2.867902, -1.1560463,...",8.350523
3095,Eminem,https://en.wikipedia.org/wiki/Eminem,"[-0.5083133, -2.3587565, -2.728328, -2.3337731...",8.480308
6034,Makaveli,https://en.wikipedia.org/wiki/Tupac_Shakur,"[1.4039732, -1.9706135, -1.3985945, -1.7411282...",8.913099
45,2Pac,https://en.wikipedia.org/wiki/Tupac_Shakur,"[1.367418, -1.822259, -1.4461393, -1.6682366, ...",8.965055
9797,The Notorious B.I.G.,https://en.wikipedia.org/wiki/The_Notorious_B....,"[1.0675318, 0.08797085, -0.092092246, -2.70204...",8.974415
65,50 Cent,https://en.wikipedia.org/wiki/50_Cent,"[0.34396002, -0.42005056, -2.005829, -2.337675...",9.007605
2938,Eazy-E,https://en.wikipedia.org/wiki/Eazy-E,"[1.8762194, 0.7717415, -3.0955117, -0.19510518...",9.010661
6982,Notorious B.I.G.,https://en.wikipedia.org/wiki/Notorious_B.I.G.,"[1.0067303, 0.14022663, -0.1228546, -2.7223122...",9.014412
9442,The D.O.C.,https://en.wikipedia.org/wiki/The_D.O.C.,"[0.89909476, 0.50968736, -3.1806424, -0.389132...",9.015563


In [22]:
get_similar_artists("Nirvana", 10)

Unnamed: 0,Artist,ArtistLink,DetailEmbedding,Distance
6936,Nirvana,https://en.wikipedia.org/wiki/Nirvana_(band),"[0.28319836, 1.0070648, -1.67811, -1.3276137, ...",0.0
5425,Kurt Cobain,https://en.wikipedia.org/wiki/Kurt_Cobain,"[-0.6089347, 0.43105546, 0.28856748, -0.839133...",8.385896
9015,Sweet 75,https://en.wikipedia.org/wiki/Sweet_75,"[-0.7353841, 1.6106955, -2.2669494, 0.27626446...",8.97028
1970,Ciccone Youth,https://en.wikipedia.org/wiki/Ciccone_Youth,"[0.65132445, 0.0708135, 0.13343593, 0.49504712...",9.272674
8669,Sonic Youth,https://en.wikipedia.org/wiki/Sonic_Youth,"[0.64971334, -0.0028599463, 0.1993272, 0.41892...",9.420928
4549,Jane's Addiction,https://en.wikipedia.org/wiki/Jane%27s_Addiction,"[0.2790259, -0.31124517, -0.7799071, 0.9336407...",9.713855
3414,Foo Fighters,https://en.wikipedia.org/wiki/Foo_Fighters,"[-0.9212452, -0.05392002, -1.9442375, -0.02863...",10.033896
2553,Dee Gees,https://en.wikipedia.org/wiki/Foo_Fighters,"[-1.1625684, 0.15792744, -2.187276, 0.2859022,...",10.043255
10647,Violent Femmes,https://en.wikipedia.org/wiki/Violent_Femmes,"[-0.8325488, 0.5266511, -0.4793426, 0.6328272,...",10.084339
4244,Hüsker Dü,https://en.wikipedia.org/wiki/H%C3%BCsker_D%C3%BC,"[-0.12512562, -0.945129, -1.4524004, 0.6408224...",10.131278


In [23]:
get_similar_artists("Nine Inch Nails", 10)

Unnamed: 0,Artist,ArtistLink,DetailEmbedding,Distance
6928,Nine Inch Nails,https://en.wikipedia.org/wiki/Nine_Inch_Nails,"[1.060734, -0.25217286, -1.725567, -0.23710454...",0.0
6929,"Nine Inch Nails, Jane's Addiction and Street S...",https://en.wikipedia.org/wiki/Nine_Inch_Nails,"[1.1367074, -0.12394637, -1.5993673, -0.210905...",0.909239
10322,Tool,https://en.wikipedia.org/wiki/Tool_(band),"[2.425879, 0.058206216, -2.363124, 0.30551565,...",8.75967
8531,Skinny Puppy,https://en.wikipedia.org/wiki/Skinny_Puppy,"[0.34352, 1.5137974, -1.1172683, -0.07200697, ...",9.132811
6116,Marilyn Manson,https://en.wikipedia.org/wiki/Marilyn_Manson_(...,"[-0.73967016, 0.0073882705, -1.2480562, -0.800...",10.029504
6487,Ministry,https://en.wikipedia.org/wiki/Ministry_(band),"[1.0117749, -0.5220631, -1.9405884, 0.31129205...",10.683318
7585,Prototype,https://en.wikipedia.org/wiki/Prototype_(band),"[0.16111772, -0.26046318, -1.1244873, 2.593899...",10.848428
7693,Rage Against the Machine,https://en.wikipedia.org/wiki/Rage_Against_the...,"[-0.862439, -2.2870255, -0.6182816, 1.4990528,...",10.878702
9953,The Smashing Pumpkins,https://en.wikipedia.org/wiki/The_Smashing_Pum...,"[3.0714421, -1.4383739, -2.0018141, -0.4146463...",10.880068
4203,How to Destroy Angels,https://en.wikipedia.org/wiki/How_to_Destroy_A...,"[0.14115435, 0.21004239, -0.5088485, 0.1839790...",10.909624


In [24]:
get_similar_artists("The Doors", 10)

Unnamed: 0,Artist,ArtistLink,DetailEmbedding,Distance
9489,The Doors,https://en.wikipedia.org/wiki/The_Doors,"[1.2109855, -1.7594427, -0.42090923, -1.716139...",0.0
10712,Warren Zevon,https://en.wikipedia.org/wiki/Warren_Zevon,"[0.51454633, 0.5419159, -1.0498676, -1.7367202...",9.56146
10206,Tim Hardin,https://en.wikipedia.org/wiki/Tim_Hardin,"[0.92015576, -1.6389208, -1.1367968, -0.104717...",10.045448
2924,Eagles,https://en.wikipedia.org/wiki/Eagles_(band),"[-0.16062242, 0.7036986, -1.9971437, -0.999429...",10.173196
9874,The Ramones,https://en.wikipedia.org/wiki/The_Ramones,"[0.3320279, 0.053347073, -1.1003983, 1.2281626...",10.305949
5849,Lou Reed,https://en.wikipedia.org/wiki/Lou_Reed,"[0.35972795, 0.43413144, 1.1458242, -1.1807466...",10.340633
7712,Ramones,https://en.wikipedia.org/wiki/Ramones,"[0.43494982, 0.1777931, -1.073549, 1.2050525, ...",10.410459
10275,Tom Petty and the Heartbreakers,https://en.wikipedia.org/wiki/Tom_Petty_and_th...,"[0.5346684, 1.3130541, -2.0048978, -1.4241321,...",10.453282
10058,"The Velvet Underground, Nico",https://en.wikipedia.org/wiki/The_Velvet_Under...,"[0.30003092, 0.52220464, 2.135478, -0.05985224...",10.469569
2772,Don Felder,https://en.wikipedia.org/wiki/Don_Felder,"[0.40788862, 1.1514858, -0.8904536, -1.0150033...",10.471077


In [34]:
get_similar_artists("Tyler, the Creator", 10)

Unnamed: 0,Artist,ArtistLink,DetailEmbedding,Distance
10478,"Tyler, the Creator","https://en.wikipedia.org/wiki/Tyler,_the_Creator","[0.44178477, -0.27782884, -1.7205541, 0.415640...",0.0
7024,Odd Future,https://en.wikipedia.org/wiki/Odd_Future,"[2.068722, -0.87144643, -2.714225, 1.0274805, ...",8.636011
4435,JPEGMafia,https://en.wikipedia.org/wiki/JPEGMafia,"[-0.3574364, -0.4912752, -0.68514496, 1.046391...",9.161615
5293,Kid Cudi,https://en.wikipedia.org/wiki/Kid_Cudi,"[-0.9032663, -0.8860003, -0.3372873, -0.594543...",9.512351
7839,Rich Homie Quan,https://en.wikipedia.org/wiki/Rich_Homie_Quan,"[-0.46195683, -0.72853684, -1.3761784, 0.07195...",9.730589
2359,Danny Brown,https://en.wikipedia.org/wiki/Danny_Brown,"[0.57242614, -1.2740331, -2.0439444, -0.868464...",9.881786
10339,Tory Lanez,https://en.wikipedia.org/wiki/Tory_Lanez,"[-1.1888053, -0.614222, -0.81849134, 1.3312767...",9.917665
139,ASAP Ferg,https://en.wikipedia.org/wiki/ASAP_Ferg,"[-0.7289018, -0.41083354, -3.4215078, 0.279719...",9.950637
9642,The Internet,https://en.wikipedia.org/wiki/The_Internet_(band),"[0.615246, -1.2516159, -0.41293225, -0.5307643...",10.029289
8063,Run the Jewels,https://en.wikipedia.org/wiki/Run_the_Jewels,"[-2.042726, -2.1163864, -1.8778237, 0.26470992...",10.242963


In [36]:
get_similar_artists("King Gizzard", 10)

Unnamed: 0,Artist,ArtistLink,DetailEmbedding,Distance
5331,King Gizzard & the Lizard Wizard,https://en.wikipedia.org/wiki/King_Gizzard_%26...,"[1.0206289, -1.4176286, -2.7226553, 2.827317, ...",0.0
5332,King Gizzard & the Lizard Wizard and Mild High...,https://en.wikipedia.org/wiki/King_Gizzard_%26...,"[0.97872293, -1.4257667, -2.8579824, 2.8810258...",1.000115
3931,Gyroscope,https://en.wikipedia.org/wiki/Gyroscope_(band),"[-0.004045004, 0.8720964, -2.5695677, 2.626075...",9.728286
9350,The Brave,https://en.wikipedia.org/wiki/The_Brave_(band),"[-2.8741932, 0.19269842, -3.5078435, 1.8769646...",9.852444
3128,Enter Shikari,https://en.wikipedia.org/wiki/Enter_Shikari,"[-0.6809488, -2.0099018, -0.3973276, 2.392696,...",10.124831
6974,Northlane,https://en.wikipedia.org/wiki/Northlane,"[-1.9955171, -0.7661425, -2.2066023, 4.764893,...",10.568205
7021,Oceansize,https://en.wikipedia.org/wiki/Oceansize,"[1.4254204, -2.0988803, 0.21671934, 4.6193886,...",10.89553
8463,Silent Planet,https://en.wikipedia.org/wiki/Silent_Planet,"[-0.81666434, -0.2288976, -1.894825, 3.4789212...",11.101444
9612,The Herd,https://en.wikipedia.org/wiki/The_Herd_(Austra...,"[-1.713963, -1.160503, -2.2674818, 2.2935603, ...",11.129957
8879,Stonefield,https://en.wikipedia.org/wiki/Stonefield_(band),"[-1.7262988, 1.3099293, -1.963079, 2.0165188, ...",11.173995


In [80]:
get_similar_artists("Good Charlotte", 40)

Unnamed: 0,Artist,ArtistLink,DetailEmbedding,Cluster,Distance
3796,Good Charlotte,https://en.wikipedia.org/wiki/Good_Charlotte,"[-2.2257192, -0.048755243, -2.3326244, 1.13214...",24,0.0
423,Amber Pacific,https://en.wikipedia.org/wiki/Amber_Pacific,"[-0.48234248, -0.27544785, -1.6196232, 1.99607...",52,7.785864
372,All Time Low,https://en.wikipedia.org/wiki/All_Time_Low,"[-1.7697186, -0.020244442, -2.568141, 1.032781...",24,8.049939
386,Allstar Weekend,https://en.wikipedia.org/wiki/Allstar_Weekend,"[-1.7306136, 0.03933968, -0.6582081, 2.1119907...",24,8.118259
9726,The Madden Brothers,https://en.wikipedia.org/wiki/The_Madden_Brothers,"[-0.15325317, -0.89423406, -1.7196397, -0.1960...",0,8.38877
6975,Northstar,https://en.wikipedia.org/wiki/Northstar_(band),"[-0.3678329, -0.086863816, -3.2799904, 2.21992...",52,8.420219
9446,The Dangerous Summer,https://en.wikipedia.org/wiki/The_Dangerous_Su...,"[-2.3299773, -0.36187315, -2.3552477, 1.692115...",52,8.504901
6681,MxPx,https://en.wikipedia.org/wiki/MxPx,"[0.551973, -0.31943566, -1.9139434, 2.0851002,...",52,8.579223
819,Bad Omens,https://en.wikipedia.org/wiki/Bad_Omens,"[-2.0120165, 0.34331718, -2.8623607, 2.1697276...",52,8.603495
7213,Paramore,https://en.wikipedia.org/wiki/Paramore,"[-1.9288765, -0.6675234, -2.8751972, 0.4874572...",24,8.656491


In [77]:
get_similar_artists("(?i)fall out boy", 20)

Unnamed: 0,Artist,ArtistLink,DetailEmbedding,Cluster,Distance
3256,Fall Out Boy,https://en.wikipedia.org/wiki/Fall_Out_Boy,"[-0.9527551, -1.9406043, -1.6220739, -0.639152...",24,0.0
7242,Patrick Stump,https://en.wikipedia.org/wiki/Patrick_Stump,"[-0.16210628, -2.6460173, -0.29198265, -1.4116...",24,8.223794
7213,Paramore,https://en.wikipedia.org/wiki/Paramore,"[-1.9288765, -0.6675234, -2.8751972, 0.4874572...",24,8.788439
7197,Panic! at the Disco,https://en.wikipedia.org/wiki/Panic!_at_the_Disco,"[-0.32114434, 0.111790664, 0.4643855, -0.77588...",24,9.024956
1200,Blink-182,https://en.wikipedia.org/wiki/Blink-182,"[1.8337098, -0.18554549, -2.2480779, -0.267367...",12,9.128789
1130,Black Kids,https://en.wikipedia.org/wiki/Black_Kids,"[-1.9656986, -0.38459587, -2.3543031, -0.06821...",0,9.166206
11083,blink-182,https://en.wikipedia.org/wiki/Blink-182,"[1.8423167, -0.10939487, -2.3524585, -0.149461...",12,9.201703
3796,Good Charlotte,https://en.wikipedia.org/wiki/Good_Charlotte,"[-2.2257192, -0.048755243, -2.3326244, 1.13214...",24,9.280091
372,All Time Low,https://en.wikipedia.org/wiki/All_Time_Low,"[-1.7697186, -0.020244442, -2.568141, 1.032781...",24,9.323557
9564,The Friday Night Boys,https://en.wikipedia.org/wiki/The_Friday_Night...,"[-1.1513455, -0.8268586, -1.6216462, 0.9671634...",52,9.420861


## Cluster Artists

In [47]:
cm = AgglomerativeClustering(distance_threshold=50, n_clusters=None)

In [48]:
artists['Cluster'] = cm.fit_predict(ad_emb)

In [52]:
artist_clusters = (artists
    .groupby('Cluster')
    .size()
    .reset_index()
    .rename(columns={0: 'Count'})
    .sort_values(by='Count', ascending = False)
)

In [60]:
(artists
    .groupby('Cluster')
    .apply(lambda c_df: list(c_df.sample(10).Artist if len(c_df) > 10 else c_df.Artist))
    .reset_index()
    .rename(columns={0: 'SampleArtists'})
)

Unnamed: 0,Cluster,SampleArtists
0,0,"[The Color Fred, Vulfpeck, All-4-One, Balance ..."
1,1,"[Emmelie de Forest, Agnes, Nazia and Zoheb, Da..."
2,2,"[Portugal. The Man, Carnifex, Bane, Chase Atla..."
3,3,"[Yoko Ono, Kim Gordon, and Thurston Moore, The..."
4,4,"[Ayaka, Faye Wong, Nana Mizuki, Nogizaka46, Ma..."
5,5,"[Caitlin Cary, Travis Morrison Hellfighters, T..."
6,6,"[Lena Katina, 2 Unlimited, Nervo, Ace of Base,..."
7,7,"[Neil Cicierega, David Byrne & Fatboy Slim, Ro..."
8,8,"[Twiztid, Bizarre, Cassidy, Dark Lotus, Kotton..."
9,9,"[Betty Johnson, Grace Slick / Paul Kantner / J..."


In [90]:
artists[artists.Artist == "Pink Floyd"]

Unnamed: 0,Artist,ArtistLink,DetailEmbedding,Cluster
7428,Pink Floyd,https://en.wikipedia.org/wiki/Pink_Floyd,"[0.041599546, -4.0036306, 2.5162368, -1.459433...",11


In [91]:
artists[artists.Cluster == 11].sample(100)

Unnamed: 0,Artist,ArtistLink,DetailEmbedding,Cluster
504,Andy Stewart,https://en.wikipedia.org/wiki/Andy_Stewart_(mu...,"[-1.4166086, -2.2193916, 0.27560267, 1.1297065...",11
9655,The Jeff Beck Group,https://en.wikipedia.org/wiki/The_Jeff_Beck_Group,"[-0.068981744, -2.5164418, -0.70135754, 0.3847...",11
3152,Eric Clapton,https://en.wikipedia.org/wiki/Eric_Clapton,"[-0.6286452, -3.7599585, -1.0668907, -0.079960...",11
813,Bad Company,https://en.wikipedia.org/wiki/Bad_Company,"[0.4348427, -4.1125865, -2.5312629, 0.6347459,...",11
4273,Ian Hunter,https://en.wikipedia.org/wiki/Ian_Hunter_(singer),"[-0.32886884, -1.3227445, 0.60036594, 1.548126...",11
7345,Pete Townshend,https://en.wikipedia.org/wiki/Pete_Townshend,"[1.9168913, -2.2830653, 2.5661125, -0.28090262...",11
3145,Eric Burdon & Jimmy Witherspoon,https://en.wikipedia.org/wiki/Eric_Burdon,"[-1.1035532, -2.6647227, -0.7674295, 1.090619,...",11
4441,Jack Bruce,https://en.wikipedia.org/wiki/Jack_Bruce,"[0.9137891, -3.296326, 0.7257387, 2.5910025, -...",11
4879,John Mayall & the Bluesbreakers,https://en.wikipedia.org/wiki/John_Mayall_%26_...,"[0.43612406, -2.896659, -2.9226446, 1.7336658,...",11
4013,Harry Secombe,https://en.wikipedia.org/wiki/Harry_Secombe,"[0.13529943, -1.8080721, -0.66634995, 1.346176...",11
