# Recomend Artist Based on Embeddings

In [1]:
%run ./wikipedia_setup.ipynb

In [2]:
import gensim
from gensim.models.doc2vec import TaggedDocument, Doc2Vec
from sklearn.neighbors import KDTree
from sklearn.cluster import AgglomerativeClustering

In [3]:
pd.set_option('display.max_rows', 100)

## Configure DB & Load Model

In [4]:
db = sqlite3.Connection("var/data/music.db")

In [5]:
doc_model = Doc2Vec.load("var/models/artist_detail_model.d2v")

2021-10-31 10:24:01,893 - gensim.utils - INFO - loading Doc2Vec object from var/models/artist_detail_model.d2v
2021-10-31 10:24:02,119 - gensim.utils - INFO - loading dv recursively from var/models/artist_detail_model.d2v.dv.* with mmap=None
2021-10-31 10:24:02,120 - gensim.utils - INFO - loading wv recursively from var/models/artist_detail_model.d2v.wv.* with mmap=None
2021-10-31 10:24:02,123 - gensim.utils - INFO - setting ignored attribute cum_table to None
2021-10-31 10:24:06,116 - gensim.utils - INFO - Doc2Vec lifecycle event {'fname': 'var/models/artist_detail_model.d2v', 'datetime': '2021-10-31T10:24:06.116360', 'gensim': '4.1.2', 'python': '3.7.6 (tags/v3.7.6:43364a7ae0, Dec 19 2019, 00:42:30) [MSC v.1916 64 bit (AMD64)]', 'platform': 'Windows-10-10.0.19041-SP0', 'event': 'loaded'}


## Load Artst + Embeddings

In [6]:
artists = pd.read_sql(f"""
    select *
    from artist_embedding
""", db)
artists['DetailEmbedding'] = artists.DetailEmbedding.apply(lambda de: np.array(list(map(float, de.split(",")))))

## Find Neighbors For Artists

Use a KDTree to find artist close to others.

In [7]:
# Create a KDTree over embeddings to search
ad_emb = np.vstack(artists.DetailEmbedding)
ad_neighbors = KDTree(ad_emb)

In [71]:
def get_similar_artists(artist_pattern, n):
    s_artist = artists[artists.Artist == artist_pattern].iloc[0]
    n_distances, neighbors = ad_neighbors.query(s_artist.DetailEmbedding.reshape(1, -1), k=n)

    neighbor_rows = artists.iloc[neighbors[0]].copy()
    neighbor_rows['Distance'] = n_distances[0]
    return neighbor_rows

In [109]:
test_artists = [
    "Dr. Dre",
    "Nirvana",
    "Nine Inch Nails",
    "The Doors",
    "Tyler, the Creator",
    "King Gizzard & the Lizard Wizard",
    "Snarky Puppy",
    "Pink Floyd",
    "Metallica",
    "Animals as Leaders",
    "Johnny Cash",
    "Thelonious Monk",
    "Frank Zappa",
    "Crystal Castles",
    "Aesop Rock"
]

In [110]:
for ta in test_artists:
    display(HTML(f"<h2>{ta}</h2>"))
    display(get_similar_artists(ta, 10))

Unnamed: 0,Artist,ArtistLink,DetailEmbedding,Cluster,Distance
2817,Dr. Dre,https://en.wikipedia.org/wiki/Dr._Dre,"[1.2855527, -0.0500333, -1.283109, -3.730988, ...",64,0.0
6721,N.W.A,https://en.wikipedia.org/wiki/N.W.A,"[0.9409786, -1.3837426, -2.867902, -1.1560463,...",8,8.350523
3095,Eminem,https://en.wikipedia.org/wiki/Eminem,"[-0.5083133, -2.3587565, -2.728328, -2.3337731...",64,8.480308
6034,Makaveli,https://en.wikipedia.org/wiki/Tupac_Shakur,"[1.4039732, -1.9706135, -1.3985945, -1.7411282...",8,8.913099
45,2Pac,https://en.wikipedia.org/wiki/Tupac_Shakur,"[1.367418, -1.822259, -1.4461393, -1.6682366, ...",8,8.965055
9797,The Notorious B.I.G.,https://en.wikipedia.org/wiki/The_Notorious_B....,"[1.0675318, 0.08797085, -0.092092246, -2.70204...",8,8.974415
65,50 Cent,https://en.wikipedia.org/wiki/50_Cent,"[0.34396002, -0.42005056, -2.005829, -2.337675...",64,9.007605
2938,Eazy-E,https://en.wikipedia.org/wiki/Eazy-E,"[1.8762194, 0.7717415, -3.0955117, -0.19510518...",8,9.010661
6982,Notorious B.I.G.,https://en.wikipedia.org/wiki/Notorious_B.I.G.,"[1.0067303, 0.14022663, -0.1228546, -2.7223122...",8,9.014412
9442,The D.O.C.,https://en.wikipedia.org/wiki/The_D.O.C.,"[0.89909476, 0.50968736, -3.1806424, -0.389132...",8,9.015563


Unnamed: 0,Artist,ArtistLink,DetailEmbedding,Cluster,Distance
6936,Nirvana,https://en.wikipedia.org/wiki/Nirvana_(band),"[0.28319836, 1.0070648, -1.67811, -1.3276137, ...",44,0.0
5425,Kurt Cobain,https://en.wikipedia.org/wiki/Kurt_Cobain,"[-0.6089347, 0.43105546, 0.28856748, -0.839133...",44,8.385896
9015,Sweet 75,https://en.wikipedia.org/wiki/Sweet_75,"[-0.7353841, 1.6106955, -2.2669494, 0.27626446...",27,8.97028
1970,Ciccone Youth,https://en.wikipedia.org/wiki/Ciccone_Youth,"[0.65132445, 0.0708135, 0.13343593, 0.49504712...",44,9.272674
8669,Sonic Youth,https://en.wikipedia.org/wiki/Sonic_Youth,"[0.64971334, -0.0028599463, 0.1993272, 0.41892...",44,9.420928
4549,Jane's Addiction,https://en.wikipedia.org/wiki/Jane%27s_Addiction,"[0.2790259, -0.31124517, -0.7799071, 0.9336407...",12,9.713855
3414,Foo Fighters,https://en.wikipedia.org/wiki/Foo_Fighters,"[-0.9212452, -0.05392002, -1.9442375, -0.02863...",12,10.033896
2553,Dee Gees,https://en.wikipedia.org/wiki/Foo_Fighters,"[-1.1625684, 0.15792744, -2.187276, 0.2859022,...",12,10.043255
10647,Violent Femmes,https://en.wikipedia.org/wiki/Violent_Femmes,"[-0.8325488, 0.5266511, -0.4793426, 0.6328272,...",34,10.084339
4244,Hüsker Dü,https://en.wikipedia.org/wiki/H%C3%BCsker_D%C3%BC,"[-0.12512562, -0.945129, -1.4524004, 0.6408224...",0,10.131278


Unnamed: 0,Artist,ArtistLink,DetailEmbedding,Cluster,Distance
6928,Nine Inch Nails,https://en.wikipedia.org/wiki/Nine_Inch_Nails,"[1.060734, -0.25217286, -1.725567, -0.23710454...",31,0.0
6929,"Nine Inch Nails, Jane's Addiction and Street S...",https://en.wikipedia.org/wiki/Nine_Inch_Nails,"[1.1367074, -0.12394637, -1.5993673, -0.210905...",31,0.909239
10322,Tool,https://en.wikipedia.org/wiki/Tool_(band),"[2.425879, 0.058206216, -2.363124, 0.30551565,...",31,8.75967
8531,Skinny Puppy,https://en.wikipedia.org/wiki/Skinny_Puppy,"[0.34352, 1.5137974, -1.1172683, -0.07200697, ...",31,9.132811
6116,Marilyn Manson,https://en.wikipedia.org/wiki/Marilyn_Manson_(...,"[-0.73967016, 0.0073882705, -1.2480562, -0.800...",31,10.029504
6487,Ministry,https://en.wikipedia.org/wiki/Ministry_(band),"[1.0117749, -0.5220631, -1.9405884, 0.31129205...",31,10.683318
7585,Prototype,https://en.wikipedia.org/wiki/Prototype_(band),"[0.16111772, -0.26046318, -1.1244873, 2.593899...",46,10.848428
7693,Rage Against the Machine,https://en.wikipedia.org/wiki/Rage_Against_the...,"[-0.862439, -2.2870255, -0.6182816, 1.4990528,...",31,10.878702
9953,The Smashing Pumpkins,https://en.wikipedia.org/wiki/The_Smashing_Pum...,"[3.0714421, -1.4383739, -2.0018141, -0.4146463...",12,10.880068
4203,How to Destroy Angels,https://en.wikipedia.org/wiki/How_to_Destroy_A...,"[0.14115435, 0.21004239, -0.5088485, 0.1839790...",27,10.909624


Unnamed: 0,Artist,ArtistLink,DetailEmbedding,Cluster,Distance
9489,The Doors,https://en.wikipedia.org/wiki/The_Doors,"[1.2109855, -1.7594427, -0.42090923, -1.716139...",44,0.0
10712,Warren Zevon,https://en.wikipedia.org/wiki/Warren_Zevon,"[0.51454633, 0.5419159, -1.0498676, -1.7367202...",9,9.56146
10206,Tim Hardin,https://en.wikipedia.org/wiki/Tim_Hardin,"[0.92015576, -1.6389208, -1.1367968, -0.104717...",66,10.045448
2924,Eagles,https://en.wikipedia.org/wiki/Eagles_(band),"[-0.16062242, 0.7036986, -1.9971437, -0.999429...",38,10.173196
9874,The Ramones,https://en.wikipedia.org/wiki/The_Ramones,"[0.3320279, 0.053347073, -1.1003983, 1.2281626...",44,10.305949
5849,Lou Reed,https://en.wikipedia.org/wiki/Lou_Reed,"[0.35972795, 0.43413144, 1.1458242, -1.1807466...",3,10.340633
7712,Ramones,https://en.wikipedia.org/wiki/Ramones,"[0.43494982, 0.1777931, -1.073549, 1.2050525, ...",44,10.410459
10275,Tom Petty and the Heartbreakers,https://en.wikipedia.org/wiki/Tom_Petty_and_th...,"[0.5346684, 1.3130541, -2.0048978, -1.4241321,...",38,10.453282
10058,"The Velvet Underground, Nico",https://en.wikipedia.org/wiki/The_Velvet_Under...,"[0.30003092, 0.52220464, 2.135478, -0.05985224...",3,10.469569
2772,Don Felder,https://en.wikipedia.org/wiki/Don_Felder,"[0.40788862, 1.1514858, -0.8904536, -1.0150033...",38,10.471077


Unnamed: 0,Artist,ArtistLink,DetailEmbedding,Cluster,Distance
10478,"Tyler, the Creator","https://en.wikipedia.org/wiki/Tyler,_the_Creator","[0.44178477, -0.27782884, -1.7205541, 0.415640...",70,0.0
7024,Odd Future,https://en.wikipedia.org/wiki/Odd_Future,"[2.068722, -0.87144643, -2.714225, 1.0274805, ...",70,8.636011
4435,JPEGMafia,https://en.wikipedia.org/wiki/JPEGMafia,"[-0.3574364, -0.4912752, -0.68514496, 1.046391...",70,9.161615
5293,Kid Cudi,https://en.wikipedia.org/wiki/Kid_Cudi,"[-0.9032663, -0.8860003, -0.3372873, -0.594543...",70,9.512351
7839,Rich Homie Quan,https://en.wikipedia.org/wiki/Rich_Homie_Quan,"[-0.46195683, -0.72853684, -1.3761784, 0.07195...",37,9.730589
2359,Danny Brown,https://en.wikipedia.org/wiki/Danny_Brown,"[0.57242614, -1.2740331, -2.0439444, -0.868464...",70,9.881786
10339,Tory Lanez,https://en.wikipedia.org/wiki/Tory_Lanez,"[-1.1888053, -0.614222, -0.81849134, 1.3312767...",70,9.917665
139,ASAP Ferg,https://en.wikipedia.org/wiki/ASAP_Ferg,"[-0.7289018, -0.41083354, -3.4215078, 0.279719...",70,9.950637
9642,The Internet,https://en.wikipedia.org/wiki/The_Internet_(band),"[0.615246, -1.2516159, -0.41293225, -0.5307643...",39,10.029289
8063,Run the Jewels,https://en.wikipedia.org/wiki/Run_the_Jewels,"[-2.042726, -2.1163864, -1.8778237, 0.26470992...",70,10.242963


Unnamed: 0,Artist,ArtistLink,DetailEmbedding,Cluster,Distance
5331,King Gizzard & the Lizard Wizard,https://en.wikipedia.org/wiki/King_Gizzard_%26...,"[1.0206289, -1.4176286, -2.7226553, 2.827317, ...",48,0.0
5332,King Gizzard & the Lizard Wizard and Mild High...,https://en.wikipedia.org/wiki/King_Gizzard_%26...,"[0.97872293, -1.4257667, -2.8579824, 2.8810258...",48,1.000115
3931,Gyroscope,https://en.wikipedia.org/wiki/Gyroscope_(band),"[-0.004045004, 0.8720964, -2.5695677, 2.626075...",48,9.728286
9350,The Brave,https://en.wikipedia.org/wiki/The_Brave_(band),"[-2.8741932, 0.19269842, -3.5078435, 1.8769646...",2,9.852444
3128,Enter Shikari,https://en.wikipedia.org/wiki/Enter_Shikari,"[-0.6809488, -2.0099018, -0.3973276, 2.392696,...",2,10.124831
6974,Northlane,https://en.wikipedia.org/wiki/Northlane,"[-1.9955171, -0.7661425, -2.2066023, 4.764893,...",2,10.568205
7021,Oceansize,https://en.wikipedia.org/wiki/Oceansize,"[1.4254204, -2.0988803, 0.21671934, 4.6193886,...",26,10.89553
8463,Silent Planet,https://en.wikipedia.org/wiki/Silent_Planet,"[-0.81666434, -0.2288976, -1.894825, 3.4789212...",52,11.101444
9612,The Herd,https://en.wikipedia.org/wiki/The_Herd_(Austra...,"[-1.713963, -1.160503, -2.2674818, 2.2935603, ...",48,11.129957
8879,Stonefield,https://en.wikipedia.org/wiki/Stonefield_(band),"[-1.7262988, 1.3099293, -1.963079, 2.0165188, ...",48,11.173995


Unnamed: 0,Artist,ArtistLink,DetailEmbedding,Cluster,Distance
8620,Snarky Puppy,https://en.wikipedia.org/wiki/Snarky_Puppy,"[-0.39370465, -0.9710802, -0.3010775, 1.362210...",0,0.0
6389,Michael League,https://en.wikipedia.org/wiki/Michael_League,"[-1.9903436, -1.3119059, 0.45480385, 0.9035634...",0,6.352669
1821,Charlie Hunter Trio,https://en.wikipedia.org/wiki/Charlie_Hunter,"[0.24391803, -1.1963154, 0.7925854, 0.4195682,...",0,6.977246
4664,Jeremy Pelt,https://en.wikipedia.org/wiki/Jeremy_Pelt,"[-0.7404448, -0.21755597, -0.30944538, 0.71368...",55,8.002839
5038,Julian Lage Trio,https://en.wikipedia.org/wiki/Julian_Lage,"[-0.9192201, -0.76052547, 0.5242695, 0.0459178...",0,8.099003
1870,Chicago Underground Trio,https://en.wikipedia.org/wiki/Chicago_Undergro...,"[-1.0360185, 0.05067508, -0.07860727, 1.172295...",0,8.117764
6132,Mark Guiliana,https://en.wikipedia.org/wiki/Mark_Guiliana,"[0.2553318, -0.78311366, 0.04803361, 0.731321,...",0,8.136742
3179,Ethan Iverson Quartet,https://en.wikipedia.org/wiki/Ethan_Iverson,"[-0.1736509, -0.41735303, 0.03144392, -0.02570...",0,8.210391
6866,Nicholas Payton,https://en.wikipedia.org/wiki/Nicholas_Payton,"[-0.06784756, -1.3772978, 0.22162648, 0.522334...",0,8.295596
4597,Jay Gonzalez,https://en.wikipedia.org/wiki/Jay_Gonzalez_(mu...,"[0.6028329, -0.14308384, -0.84398174, -0.03113...",0,8.296318


Unnamed: 0,Artist,ArtistLink,DetailEmbedding,Cluster,Distance
7428,Pink Floyd,https://en.wikipedia.org/wiki/Pink_Floyd,"[0.041599546, -4.0036306, 2.5162368, -1.459433...",11,0.0
9028,Syd Barrett,https://en.wikipedia.org/wiki/Syd_Barrett,"[0.91263825, -3.1036685, 2.2536554, 0.2122785,...",11,8.054463
7868,Rick Wright,https://en.wikipedia.org/wiki/Rick_Wright,"[0.71265364, -2.9450555, 1.0054953, -1.0867952...",11,8.727247
7855,Richard Wright,https://en.wikipedia.org/wiki/Richard_Wright_(...,"[0.4487919, -2.7181158, 1.1199858, -1.1038822,...",11,8.964512
7988,Roger Waters,https://en.wikipedia.org/wiki/Roger_Waters,"[-1.0401227, -3.395724, 1.2671676, 0.048215635...",11,9.383799
2454,David Gilmour,https://en.wikipedia.org/wiki/David_Gilmour,"[0.63639015, -1.7787879, 0.495369, -0.5057037,...",11,10.193016
6879,Nick Mason and Rick Fenn,https://en.wikipedia.org/wiki/Nick_Mason,"[1.6614282, -3.162382, -0.25954843, -0.7243272...",11,11.286024
6880,Nick Mason's Fictitious Sports,https://en.wikipedia.org/wiki/Nick_Mason,"[1.5449176, -3.1382313, -0.24004008, -0.698771...",11,11.359037
5565,Led Zeppelin,https://en.wikipedia.org/wiki/Led_Zeppelin,"[0.646523, -2.9011726, 0.033950884, 1.2880193,...",11,11.88391
10576,Van der Graaf Generator,https://en.wikipedia.org/wiki/Van_der_Graaf_Ge...,"[0.5818087, -0.32095703, 2.5187292, 1.8784775,...",56,12.482024


Unnamed: 0,Artist,ArtistLink,DetailEmbedding,Cluster,Distance
6358,Metallica,https://en.wikipedia.org/wiki/Metallica,"[0.021730565, -2.77789, -1.8969055, 1.7310175,...",31,0.0
6359,Metallica with the San Francisco Symphony,https://en.wikipedia.org/wiki/Metallica,"[0.0037658217, -2.7255077, -1.8805038, 1.79077...",31,0.886606
6298,Megadeth,https://en.wikipedia.org/wiki/Megadeth,"[1.4083267, -0.3182869, -3.092479, 3.811962, -...",31,8.36034
9035,System of a Down,https://en.wikipedia.org/wiki/System_of_a_Down,"[0.88598394, -0.6166408, -2.1424818, 1.9970595...",31,9.426858
8561,Slayer,https://en.wikipedia.org/wiki/Slayer,"[-0.22797705, -0.66055524, -3.217932, 4.278247...",31,9.534418
570,Anthrax,https://en.wikipedia.org/wiki/Anthrax_(America...,"[-0.49768257, -0.33564985, -2.3104866, 3.56090...",31,10.078333
7693,Rage Against the Machine,https://en.wikipedia.org/wiki/Rage_Against_the...,"[-0.862439, -2.2870255, -0.6182816, 1.4990528,...",31,10.253137
2735,Disturbed,https://en.wikipedia.org/wiki/Disturbed_(band),"[0.32168218, -0.15548263, -2.2631006, 5.072659...",31,10.765455
10322,Tool,https://en.wikipedia.org/wiki/Tool_(band),"[2.425879, 0.058206216, -2.363124, 0.30551565,...",31,10.815683
3915,Guns N' Roses,https://en.wikipedia.org/wiki/Guns_N%27_Roses,"[-0.12638988, 0.09558749, -3.6174486, 0.407881...",54,10.823952


Unnamed: 0,Artist,ArtistLink,DetailEmbedding,Cluster,Distance
537,Animals as Leaders,https://en.wikipedia.org/wiki/Animals_as_Leaders,"[-0.35485828, 0.101559095, -2.1269197, 2.75426...",2,0.0
9964,The Sound of Animals Fighting,https://en.wikipedia.org/wiki/The_Sound_of_Ani...,"[-1.0798234, -0.43699715, -1.684137, 1.5604655...",27,7.655813
7019,Oceano,https://en.wikipedia.org/wiki/Oceano_(band),"[-2.5284464, 0.6100266, -2.6459894, 3.671027, ...",2,7.775487
9215,Thank You Scientist,https://en.wikipedia.org/wiki/Thank_You_Scientist,"[-1.8664856, -0.63849837, -1.2175362, 2.050434...",52,7.806024
10600,Vektor,https://en.wikipedia.org/wiki/Vektor_(band),"[-0.7956045, 0.31125832, -1.8441229, 2.5985105...",46,7.824387
629,Arkaea,https://en.wikipedia.org/wiki/Arkaea,"[-0.6553504, 0.7150783, -1.8626635, 1.8232595,...",80,7.871587
510,Anew Revolution,https://en.wikipedia.org/wiki/Anew_Revolution,"[-1.1828316, -0.84064597, -2.3287814, 1.790817...",46,7.904165
10896,Wretched,https://en.wikipedia.org/wiki/Wretched_(metal_...,"[-2.088224, -0.18624419, -2.0847876, 2.525825,...",2,7.913867
8560,Slaves on Dope,https://en.wikipedia.org/wiki/Slaves_on_Dope,"[-0.43333918, -0.30965996, -2.3862603, 2.54786...",12,7.993073
648,Arsis,https://en.wikipedia.org/wiki/Arsis,"[-1.5795884, 0.63016796, -2.6634214, 3.1741107...",31,8.011841


Unnamed: 0,Artist,ArtistLink,DetailEmbedding,Cluster,Distance
4909,Johnny Cash,https://en.wikipedia.org/wiki/Johnny_Cash,"[1.4957788, -0.7835535, -0.96012264, 0.0758837...",35,0.0
4911,Johnny Cash and June Carter Cash,https://en.wikipedia.org/wiki/Johnny_Cash,"[1.6099122, -0.55471706, -1.0327983, 0.0464005...",35,0.855492
4910,Johnny Cash and June Carter,https://en.wikipedia.org/wiki/Johnny_Cash,"[1.4803188, -0.88242537, -1.1252922, 0.1017749...",35,0.96826
6350,Merle Haggard and George Jones,https://en.wikipedia.org/wiki/Merle_Haggard,"[2.1653876, 0.68343365, -1.9021673, 0.2865666,...",35,7.472667
6349,Merle Haggard,https://en.wikipedia.org/wiki/Merle_Haggard,"[1.9765968, 0.39040262, -2.2545104, 0.25365907...",35,7.490056
6351,Merle Haggard and the Strangers,https://en.wikipedia.org/wiki/Merle_Haggard,"[2.138545, 0.5211568, -2.2202218, 0.26090285, ...",35,7.618388
8019,Rosanne Cash,https://en.wikipedia.org/wiki/Rosanne_Cash,"[1.8095064, 0.22206005, -0.99992055, -1.717415...",84,8.931758
5059,June Carter Cash,https://en.wikipedia.org/wiki/June_Carter_Cash,"[0.523592, -0.014059163, -3.3669198, 0.0268521...",84,9.027341
3979,Hank Williams,https://en.wikipedia.org/wiki/Hank_Williams,"[0.48297596, -1.4262364, -2.1268718, -0.019925...",84,9.280165
9168,Tennessee Ernie Ford,https://en.wikipedia.org/wiki/Tennessee_Ernie_...,"[1.2066958, 0.19580296, -2.362876, 0.35580596,...",84,9.698516


Unnamed: 0,Artist,ArtistLink,DetailEmbedding,Cluster,Distance
10131,Thelonious Monk,https://en.wikipedia.org/wiki/Thelonious_Monk,"[1.3236248, -1.4610785, 2.1854656, 0.8591331, ...",81,0.0
1529,Bud Powell,https://en.wikipedia.org/wiki/Bud_Powell,"[-0.12226143, -1.0133735, 2.7336912, 0.3095159...",81,6.868418
1815,Charles Mingus,https://en.wikipedia.org/wiki/Charles_Mingus,"[0.4318651, -0.20629476, 1.2453475, 1.299351, ...",81,7.600607
1822,Charlie Parker,https://en.wikipedia.org/wiki/Charlie_Parker,"[0.90149647, -1.4503442, 2.5635028, 0.65616006...",81,7.768442
6467,Miles Davis,https://en.wikipedia.org/wiki/Miles_Davis,"[1.8203194, -1.3783013, 1.4133562, -0.08504401...",81,7.88445
6036,Mal Waldron,https://en.wikipedia.org/wiki/Mal_Waldron,"[0.09390967, -0.32788667, 2.3756695, 1.045799,...",81,8.229007
8673,Sonny Rollins,https://en.wikipedia.org/wiki/Sonny_Rollins,"[0.48749408, -1.9948779, 0.63102, 1.153534, -3...",55,8.235637
3051,Elmo Hope,https://en.wikipedia.org/wiki/Elmo_Hope,"[1.1894351, 0.117529616, 0.7056927, 1.8157315,...",81,8.317331
4424,J. J. Johnson,https://en.wikipedia.org/wiki/J._J._Johnson,"[1.0776122, -1.0214149, 0.26703256, -0.1398547...",55,8.491367
977,Benny Goodman,https://en.wikipedia.org/wiki/Benny_Goodman,"[0.749154, -2.868556, 0.46563745, -0.08983608,...",81,8.529526


Unnamed: 0,Artist,ArtistLink,DetailEmbedding,Cluster,Distance
3466,Frank Zappa,https://en.wikipedia.org/wiki/Frank_Zappa,"[1.3201275, -1.6736283, 0.49244577, 1.7891124,...",3,0.0
3468,Frank Zappa and The Mothers,https://en.wikipedia.org/wiki/Frank_Zappa,"[1.2510762, -1.4596589, 0.5700625, 1.8608723, ...",3,0.742593
3469,Frank Zappa and The Mothers of Invention,https://en.wikipedia.org/wiki/Frank_Zappa,"[1.233183, -1.4448514, 0.64113045, 1.8178601, ...",3,0.870435
3467,Frank Zappa & The Mothers of Invention with Ca...,https://en.wikipedia.org/wiki/Frank_Zappa,"[1.2147509, -1.4146931, 0.5676594, 1.9262733, ...",3,0.877467
9768,The Mothers of Invention,https://en.wikipedia.org/wiki/The_Mothers_of_I...,"[1.1942087, -0.80340695, 0.6805217, 1.8576227,...",3,8.336503
1681,Captain Beefheart,https://en.wikipedia.org/wiki/Captain_Beefheart,"[3.2096143, -1.3189019, 3.1504781, -0.00977150...",3,11.386404
1682,Captain Beefheart & his Magic Band,https://en.wikipedia.org/wiki/Captain_Beefheart,"[3.0575578, -1.3231854, 3.06541, -0.04069842, ...",3,11.397961
1683,Captain Beefheart & the Magic Band,https://en.wikipedia.org/wiki/Captain_Beefheart,"[2.9553223, -1.1626133, 3.130786, 0.15107195, ...",3,11.512735
6446,Mike Bloomfield/Al Kooper/Stephen Stills,https://en.wikipedia.org/wiki/Mike_Bloomfield,"[3.100735, -0.67464507, 0.01797964, 0.2652832,...",63,12.477339
6445,Mike Bloomfield & Al Kooper,https://en.wikipedia.org/wiki/Mike_Bloomfield,"[3.3074453, -0.54840726, 0.083469346, 0.289399...",63,12.595344


Unnamed: 0,Artist,ArtistLink,DetailEmbedding,Cluster,Distance
2206,Crystal Castles,https://en.wikipedia.org/wiki/Crystal_Castles,"[-2.3786213, 1.720056, -0.6421294, 0.97738504,...",45,0.0
347,Alice Glass,https://en.wikipedia.org/wiki/Alice_Glass,"[-1.6071162, 2.9547224, -0.7794519, -0.3689876...",45,6.572272
9466,The Departure,https://en.wikipedia.org/wiki/The_Departure,"[-0.79209393, 0.72695607, -0.7511203, 0.550726...",26,8.496254
9918,The Safety Fire,https://en.wikipedia.org/wiki/The_Safety_Fire,"[-2.9172258, 0.049108624, -1.7587222, 3.059865...",2,8.608218
1144,Black Peaks,https://en.wikipedia.org/wiki/Black_Peaks_(band),"[-0.24953727, -0.7948105, -1.5882419, 0.258613...",26,8.638066
10363,Trailer Trash Tracys,https://en.wikipedia.org/wiki/Trailer_Trash_Tr...,"[-1.5663157, -0.12509324, -0.90989083, 0.38639...",0,8.678374
1178,Blanck Mass,https://en.wikipedia.org/wiki/Blanck_Mass,"[-0.7644815, -0.15446673, 0.27153748, 0.812709...",27,8.679305
1188,Bleached,https://en.wikipedia.org/wiki/Bleached,"[-1.1339824, 0.21663594, -1.3985174, -0.196395...",27,8.725605
10783,White Rose Movement,https://en.wikipedia.org/wiki/White_Rose_Movement,"[-1.6042454, -0.73409456, -1.1034228, 0.423407...",26,8.811384
1966,Chvrches,https://en.wikipedia.org/wiki/Chvrches,"[-2.0698385, 0.9466864, 1.8596033, -0.27650324...",26,8.869539


Unnamed: 0,Artist,ArtistLink,DetailEmbedding,Cluster,Distance
217,Aesop Rock,https://en.wikipedia.org/wiki/Aesop_Rock,"[1.5709599, -2.3995297, -0.96275586, 1.1011693...",8,0.0
218,Aesop Rock and Blockhead,https://en.wikipedia.org/wiki/Aesop_Rock,"[1.5408995, -2.2402227, -0.89001673, 1.0210788...",8,1.09393
2999,El-P,https://en.wikipedia.org/wiki/El-P,"[1.6139878, -1.9222982, -1.3806736, 0.9058491,...",39,8.592907
1206,Blockhead,https://en.wikipedia.org/wiki/Blockhead_(music...,"[0.17684881, -0.9057878, 0.0033948997, -0.8360...",39,9.582348
4960,Jonwayne,https://en.wikipedia.org/wiki/Jonwayne,"[1.32209, -0.45423606, 0.4005551, -0.27543113,...",39,9.650269
7093,Open Mike Eagle,https://en.wikipedia.org/wiki/Open_Mike_Eagle,"[-0.2557377, -0.70402896, 1.1051924, -0.410294...",39,9.755743
4158,Homeboy Sandman,https://en.wikipedia.org/wiki/Homeboy_Sandman,"[1.5858854, -0.054282162, -0.8565406, -0.73803...",39,9.844695
7659,R.A. the Rugged Man,https://en.wikipedia.org/wiki/R.A._the_Rugged_Man,"[0.46239978, 0.12132466, -0.3845071, 1.0211447...",39,10.024866
8698,Souls of Mischief,https://en.wikipedia.org/wiki/Souls_of_Mischief,"[0.2850436, -2.064198, -1.1176888, 0.7820858, ...",39,10.050105
7167,P.O.S,https://en.wikipedia.org/wiki/P.O.S_(rapper),"[1.0501184, 1.230038, -0.6957069, 1.671424, 0....",39,10.057933


## Try Mixing Artists To Find References

In [131]:
def find_close_mixed(artist_names, n=20):
    
    ref_artists = artists[artists.Artist.isin(artist_names)]
    ref_emb = np.vstack(ref_artists.DetailEmbedding).mean(axis=0)
    
    scores, ids = ad_neighbors.query(ref_emb.reshape(1, -1), k=n)
    close_artists = artists.iloc[ids[0]].copy()
    close_artists['SimScore'] = scores[0]
    close_artists = close_artists.sort_values(by='SimScore', ascending=True)
    
    return close_artists

In [140]:
find_close_mixed(['The Cure', 'Animals as Leaders', 'System of a Down'])

Unnamed: 0,Artist,ArtistLink,DetailEmbedding,Cluster,SimScore
629,Arkaea,https://en.wikipedia.org/wiki/Arkaea,"[-0.6553504, 0.7150783, -1.8626635, 1.8232595,...",80,6.36411
4339,In Case of Fire,https://en.wikipedia.org/wiki/In_Case_of_Fire,"[-2.0683682, -0.13448018, -1.4897661, 0.505864...",26,6.47947
10753,Wellwater Conspiracy,https://en.wikipedia.org/wiki/Wellwater_Conspi...,"[-0.11677661, -0.7548648, -1.8598219, 1.609214...",46,6.548231
754,AxeWound,https://en.wikipedia.org/wiki/AxeWound,"[-1.7212716, -0.46619728, -1.6760972, 1.593793...",46,6.574576
6529,Mob Rules,https://en.wikipedia.org/wiki/Mob_Rules_(band),"[-1.0012428, -1.2749624, -1.7759936, 1.1756712...",46,6.839533
3328,Fight,https://en.wikipedia.org/wiki/Fight_(band),"[-0.97717875, -0.4489926, -1.7863394, 2.228654...",46,6.873934
7081,One Minute Silence,https://en.wikipedia.org/wiki/One_Minute_Silence,"[-0.53409046, -0.51835215, 0.45480764, 2.85845...",26,6.899177
9777,The Mute Gods,https://en.wikipedia.org/wiki/The_Mute_Gods,"[-0.5455548, -0.12845905, -0.7078378, 1.512511...",46,6.954371
6846,New Device,https://en.wikipedia.org/wiki/New_Device,"[-1.2462641, -0.26549223, -1.3422089, 1.258723...",46,6.96079
8716,Spawn of Possession,https://en.wikipedia.org/wiki/Spawn_of_Possession,"[-1.0086046, 0.27147156, -1.0598925, 3.3937385...",46,7.072101


## Cluster Artists

In [105]:
cm = AgglomerativeClustering(distance_threshold=50, n_clusters=None)

In [106]:
artists['Cluster'] = cm.fit_predict(ad_emb)

In [107]:
artist_clusters = (artists
    .groupby('Cluster')
    .size()
    .reset_index()
    .rename(columns={0: 'Count'})
    .sort_values(by='Count', ascending = False)
)

In [142]:
artist_clusters = pd.merge(
    artist_clusters,
    (artists
        .groupby('Cluster')
        .apply(lambda c_df: list(c_df.sample(10).Artist if len(c_df) > 10 else c_df.Artist))
        .reset_index()
        .rename(columns={0: 'SampleArtists'})
    ),
    on = 'Cluster'
)

In [148]:
albums = pd.read_sql("select Artist, Album, Genre, Year from wp_albums_year_lists", db)

In [157]:
def top_genres(df):
    return (df[df.Genre.notnull() & (df.Genre != '')]
        .groupby('Genre')
        .size()
        .reset_index()
        .rename(columns={0: 'Count'})
        .sort_values(by='Count', ascending=False)
        .head(5)
    )

top_genres(albums)

Unnamed: 0,Genre,Count
1627,Hip hop,527
1861,Indie rock,346
727,Country,311
2401,Pop,278
3068,Rock,266


In [161]:
album_artist_cluster = pd.merge(
    albums,
    artists[['Artist', 'Cluster']],
    on = 'Artist'
)

def get_album_cluster_info(df):
    
    t_genres = list(top_genres(df).Genre.values)
    
    return pd.Series({
        'Year_min': int(df.Year.min()),
        'Year_max': int(df.Year.max()),
        'Year_mean': int(df.Year.mean()),
        'Album Count': len(df),
        'Genres': t_genres
    })

clusters_w_album_info = album_artist_cluster.groupby('Cluster').apply(get_album_cluster_info)

clusters_w_album_info = pd.merge(
    artist_clusters,
    clusters_w_album_info.reset_index(),
    on = 'Cluster'
)
clusters_w_album_info = clusters_w_album_info.sort_values(by='Year_mean', ascending=False)
clusters_w_album_info

Unnamed: 0,Cluster,Count,SampleArtists,Year_min,Year_max,Year_mean,Album Count,Genres
19,19,166,"[Wendy, Brown Eyed Girls, Jay B, Woodz, AOA, B...",1999,2021,2017,318,"[K-pop, Pop, K-pop, dance, J-pop, Hip hop, EDM..."
77,18,42,"[Digga D, Kano, Wizkid, Olamide, Headie One, P...",2003,2021,2017,71,"[Hip hop, British hip hop, Soul, R&B, hip hop,..."
35,79,120,"[Santigold, Yung Baby Tate, New Politics, Jung...",2000,2022,2017,188,"[Hip hop, Indie rock, R&B, Alternative R&B, Pop]"
20,70,158,"[Macklemore & Ryan Lewis, Mustard, Free Nation...",1993,2022,2016,421,"[Hip hop, Hip hop, trap, R&B, Alternative hip ..."
10,65,205,"[Young Ejecta, Boy Harsher, Meiko, Emilíana To...",1990,2022,2016,280,"[Indie rock, Pop, Indie pop, Synth-pop, Indie ..."
9,15,216,"[Hannah Grace, Powfu, Josh Kumra, Sinéad Harne...",1992,2022,2015,376,"[Pop, Indie rock, Indie pop, Electronic, Indie..."
86,47,18,"[Alexandra Stan, Margaret, Rita Ora & Imanbek,...",1988,2021,2014,28,"[Pop, Alternative R&B, trap, Dance, Dance, tec..."
46,72,95,"[Clarence Clarity, John Maus, Micachu and the ...",1995,2022,2014,215,"[Experimental pop, Synth-pop, Electronic, Elec..."
47,45,95,"[Troye Sivan, Charlotte Cardin, Grimes, Karen ...",1995,2021,2014,179,"[Pop, Pop, R&B, Electropop, Electronic, Electr..."
82,83,28,"[J-Ax and Fedez, Francesca Michielin, Il Volo,...",1976,2021,2013,47,"[Pop, Pop, rock, Latin pop, Blues rock, Pop ro..."
