# PRACTICA 4b - CARLOS ARRANZ HERRERO
## PARTE 1 - El objetivo es usar el modelo word2vec para encontrar canciones similares utilizando un corpus en español

In [1]:
import pandas as pd
import numpy as np
import gensim.models.word2vec as w2v
import multiprocessing
import os
import re
import pprint
import sklearn.manifold
import matplotlib.pyplot as plt

unable to import 'smart_open.gcs', disabling that module


1. Cargamos el csv con las canciones en español

In [2]:
songs = pd.read_csv("hhgroups_merge_28_05.csv", header=0)
#songs.head()
songs.head(10)

Unnamed: 0,id,artista,cancion,album,letra,anyo,visitas
0,0,Denom,Machete (con Jarfaiter y Gente jodida),Medicina,"Para su nuevo disco ""Medicina"", Denom ha vuelt...",2019,126
1,1,Denom,Vacío (con Ivo Incuerdo),Medicina,"[Denom]\nYo que quería, yo que pedía vida,\nSe...",2019,361
2,2,Denom,El orgullo es fiel (con Juancho Marqués y Elio...,Medicina,"""El orgullo es fiel"" es uno de los cortes incl...",2019,262
3,3,Denom,Mueve mueve (con Fernandocosta),Medicina,"[Estribillo: Denom] (x2)\nMueve, mueve, mueve,...",2019,578
4,4,Jaro Desperdizio,Insomnia,"Sin álbum, es un vídeo suelto","[Estribillo]\nY en esta noche, ¿Quién me arrop...",2019,219
5,5,Khan,Lobo,"Sin álbum, es un vídeo suelto",Que nadie creía en leyends hasta que llegué yo...,2019,683
6,6,Dyo,Claridad [Productor desconocido],"Sin álbum, es un tema suelto","Me olvidé tomarme la pastilla otra vez,\nEstar...",2019,58
7,7,Sheldrack,Especial agradecimiento,Titiritero,Hace tiempo dejé de buscar el sentido de la vi...,2018,35
8,8,Arce,Huella (con Pitizion),Pedigrí,[Estribillo: Pitizon]\nNo te voy a negar que a...,2019,119
9,9,Arce,Boxer,Pedigrí,"Se preguntaron dónde está, me perdí como Wally...",2019,843


2. Cargamos la biblia como corpus

In [11]:
with open("biblia.txt", "r") as f:
    data = f.readlines()

biblia = ""
for i in data:
    biblia = biblia + i
corpus_biblia = biblia.split("\n")
corpus_biblia[1:20]

['',
 'Parte # 1 (INCLUYE LA LEY), los 10 primeros libros del AT: Gn, Ex, Lv, Nm, Dt, Jos, Jue, Rt, 1 S y 2 S',
 '',
 '',
 'LIBRO PRIMERO DE MOISÉS',
 '',
 'GÉNESIS',
 '',
 'La creación',
 '',
 'Génesis 1',
 '',
 'Génesis 1:1',
 '          En el principio creó Dios los cielos y la tierra.',
 '',
 ' Génesis 1:2',
 '          Y la tierra estaba desordenada y vacía, y las tinieblas',
 '          estaban sobre la faz del abismo, y el Espíritu de Dios se',
 '          movía sobre la faz de las aguas.']

## Construimos el vocabulario primero con el corpus de la biblia y luego con las canciones

In [40]:
corpus_biblia = []
 
for line in data:
    words = line.lower().split()
    corpus_biblia.append(words)

text_corpus = []
# for song in songs['text']: # ENGLISH
for song in songs['letra']: # SPANISH
    words = song.lower().split()
    text_corpus.append(words)


    # Dimensionality of the resulting word vectors.
# more dimensions, more computationally expensive to train
# but also more accurate
# more dimensions = more generalized
# num_features = 50
num_features = 100

# Minimum word count threshold.
min_word_count = 1

# Number of threads to run in parallel.
#more workers, faster we train
num_workers = multiprocessing.cpu_count()

# Context window length.
context_size = 9

downsampling = 1e-1

# Seed for the RNG, to make the results reproducible.
# random number generator
# deterministic, good for debugging
seed = 1

songs2vec_biblia = w2v.Word2Vec(
    sg=1,
    seed=seed,
    workers=num_workers,
    size=num_features,
    min_count=min_word_count,
    window=context_size,
    sample=downsampling
)

songs2vec = w2v.Word2Vec(
    sg=1,
    seed=seed,
    workers=num_workers,
    size=num_features,
    min_count=min_word_count,
    window=context_size,
    sample=downsampling
)

songs2vec_biblia.build_vocab(corpus_biblia)
songs2vec.build_vocab(text_corpus)

print ("corpus de la biblia: ",len(corpus_biblia))
print ("corpus de las canciones: ",len(text_corpus))

corpus de la biblia:  35186
corpus de las canciones:  9325


## ENTRENAMOS EL MODELO CON LAS CANCIONES

In [41]:
import time

# CANCIONES
start_time = time.time()

songs2vec.train(text_corpus, total_examples=songs2vec.corpus_count, epochs=10)

if not os.path.exists("trained"):
    os.makedirs("trained")

songs2vec.save(os.path.join("trained", "songs2vec.w2v"))

print("--- %s seconds ---" % (time.time() - start_time))

--- 91.6035692691803 seconds ---


## ENTRENAMOS EL MODELO CON LA BIBLIA

In [42]:
# BIBLIA
start_time = time.time()

songs2vec_biblia.train(corpus_biblia, total_examples=songs2vec_biblia.corpus_count, epochs=10)

if not os.path.exists("trained"):
    os.makedirs("trained")

songs2vec_biblia.save(os.path.join("trained", "songs2vec_biblia.w2v"))

print("--- %s seconds ---" % (time.time() - start_time))

--- 12.900712728500366 seconds ---


In [44]:
songs2vec = w2v.Word2Vec.load(os.path.join("trained", "songs2vec.w2v"))
songs2vec_biblia = w2v.Word2Vec.load(os.path.join("trained", "songs2vec_biblia.w2v"))

### Let's explore our model

#### Como estamos analizando palabras de canciones, es probable que salgan palabras quie rimen entre ellas "amor", "rencor", "dolor", y además tienen que ver

### CORPUS CANCIONES

In [45]:
songs2vec.wv.most_similar("amor")

[('amor,', 0.8003445863723755),
 ('platónico,', 0.7525262832641602),
 ('dolor', 0.7465886473655701),
 ('platónico...', 0.7451823949813843),
 ('platónico', 0.7396997213363647),
 ('odio,', 0.735668957233429),
 ('fónicos,', 0.7285569310188293),
 ('rencor,', 0.71053147315979),
 ('odio', 0.697514533996582),
 ('¿amor', 0.6955718398094177)]

In [46]:
songs2vec.wv.most_similar("muerte")

[('muerte,', 0.7952715158462524),
 ('suerte', 0.7297278642654419),
 ('vencida.', 0.7029434442520142),
 ('decidirá', 0.6993062496185303),
 ('dolorosa', 0.6943186521530151),
 ('dolorosa,', 0.6892216205596924),
 ('bebere', 0.6889551877975464),
 ('separe.', 0.6842770576477051),
 ('lenta.', 0.6833838224411011),
 ('incumplida,', 0.6825197339057922)]

In [47]:
songs2vec.wv.most_similar("vida")

[('vida,', 0.7875282764434814),
 ('complicada,', 0.6742469668388367),
 ('vida.', 0.6729293465614319),
 ('vida....', 0.6709336042404175),
 ('láminas,', 0.6671640872955322),
 ('muerte', 0.6626427173614502),
 ('saberla', 0.6601601839065552),
 ('semo', 0.6598530411720276),
 ('corta.', 0.659111499786377),
 ('divertida,', 0.6589784026145935)]

In [60]:
songs2vec.wv.most_similar("quiero")

[('puedo', 0.7458089590072632),
 ('escaparme,', 0.706802487373352),
 ('mí)', 0.6960821747779846),
 ('quiero,', 0.6954433917999268),
 ('viajar,', 0.6893172264099121),
 ('prefiero', 0.68848717212677),
 ('oirte,', 0.6872514486312866),
 ('besarte,', 0.6827871799468994),
 ('centrarme,', 0.6806821823120117),
 ('migre,', 0.6772598028182983)]

In [62]:
songs2vec.wv.most_similar(positive=['dinero'])

[('dinero,', 0.7281863689422607),
 ('¡money!', 0.695968747138977),
 ('ingresa', 0.6924116015434265),
 ('gastándose', 0.6904134750366211),
 ('innecesario,', 0.689932107925415),
 ('cobre.', 0.6898140907287598),
 ('parné,', 0.682694673538208),
 ('pasta', 0.6821595430374146),
 ('shitto,', 0.6790845990180969),
 ('rico', 0.6759052872657776)]

In [63]:
songs2vec.wv.most_similar(negative=['dinero'], positive=["te","quiero","vente"])

[('escucharme,', 0.6686912775039673),
 ('"egotripping"', 0.6678745746612549),
 ('consigues', 0.6581234931945801),
 ('triste!...', 0.6580321788787842),
 ('babe,', 0.6574562191963196),
 ('selmouni]', 0.653918981552124),
 ('fuiste)', 0.6535998582839966),
 ('decir;', 0.6508377194404602),
 ('túmbate,', 0.6506816148757935),
 ('despiertes,', 0.6490801572799683)]

### CORPUS BIBLIA

In [49]:
songs2vec_biblia.wv.most_similar("amor")

[('amor,', 0.7578495740890503),
 ('tito,', 0.7438755035400391),
 ('cristo;', 0.7399457693099976),
 ('ministro', 0.7332652807235718),
 ('creyentes', 0.7306680083274841),
 ('porque:', 0.7295238971710205),
 ('entrañable', 0.7286202311515808),
 ('justificados', 0.7276186943054199),
 ('epafras,', 0.7276151180267334),
 ('mansedumbre,', 0.7254500389099121)]

In [50]:
songs2vec_biblia.wv.most_similar("muerte")

[('digna', 0.7293793559074402),
 ('condenación,', 0.7277209758758545),
 ('justificados', 0.7143285870552063),
 ('transgresión', 0.7132445573806763),
 ('muerte.', 0.7062108516693115),
 ('condenado', 0.7044743895530701),
 ('persecución', 0.6962323188781738),
 ('fuésemos', 0.6957705020904541),
 ('fe.', 0.6936288475990295),
 ('haberlo', 0.691423773765564)]

In [51]:
songs2vec_biblia.wv.most_similar("vida")

[('eterna.', 0.8197063207626343),
 ('vida,', 0.7713834047317505),
 ('eterna;', 0.7343639135360718),
 ('eterna', 0.7116934657096863),
 ('eterna,', 0.7034258842468262),
 ('creer', 0.6934571266174316),
 ('vida.', 0.6912249326705933),
 ('incircuncisión', 0.6911702752113342),
 ('justificados', 0.6860959529876709),
 ('cuál', 0.682608962059021)]

In [52]:
songs2vec_biblia.wv.most_similar("quiero")

[('quisiera', 0.8435990810394287),
 ('queréis', 0.8229110240936279),
 ('soportar', 0.8122639656066895),
 ('quiero,', 0.8078714609146118),
 ('creéis,', 0.8034185171127319),
 ('digo.', 0.8012316226959229),
 ('hago.', 0.8007820844650269),
 ('aborrezco,', 0.7984345555305481),
 ('hijitos', 0.7981651425361633),
 ('queráis', 0.7978537082672119)]

In [56]:
songs2vec_biblia.wv.most_similar(positive=['dinero'])

[('oficio', 0.7043678760528564),
 ('rescate', 0.6941173076629639),
 ('cuenta', 0.6844251155853271),
 ('melquisedec.', 0.6637261509895325),
 ('costales,', 0.662918210029602),
 ('devolverá', 0.6509934067726135),
 ('recogió', 0.6508060693740845),
 ('poniéndolo', 0.6486629247665405),
 ('pueda.', 0.6463227272033691),
 ('pida', 0.6442655324935913)]

In [59]:
songs2vec_biblia.wv.most_similar(negative=['dinero'], positive=["te","quiero","vente"])

[('deslealtad,', 0.7425546646118164),
 ('copa;', 0.7396420836448669),
 ('saqueado;', 0.7384157180786133),
 ('mismo?', 0.7368906140327454),
 ('negaré.', 0.7336001992225647),
 ('perdonaré.', 0.7290297746658325),
 ('fueres,', 0.7289489507675171),
 ('quimam,', 0.7281951904296875),
 ('olvidaste', 0.727904200553894),
 ('iré;', 0.7272760272026062)]

## Words out of context
### CANCIONES

In [64]:
songs2vec.wv.doesnt_match("felicidad amar gozar odiar".split())

'felicidad'

In [67]:
songs2vec.wv.doesnt_match("dinero pasta gozar molar".split())

'gozar'

In [66]:
songs2vec.wv.doesnt_match("padre odio terrible rencor".split())

'terrible'

### BIBLIA

In [71]:
songs2vec_biblia.wv.doesnt_match("felicidad amar gozar odiar".split())

'amar'

In [72]:
songs2vec_biblia.wv.doesnt_match("dinero pasta gozar molar".split())

'gozar'

In [73]:
songs2vec_biblia.wv.doesnt_match("padre odio terrible rencor".split())

'padre'

### Semantic distance between words

In [74]:
def nearest_similarity_cosmul_songs(start1, end1, start2):
    similarities = songs2vec.wv.most_similar_cosmul(
        positive=[start2, start1],
        negative=[end1]
    )
    end2 = similarities[0][0]
    print("corpus songs --> {0} es a {1}, lo que {2} es a {3}".format(start1, end1, start2, end2))

def nearest_similarity_cosmul_biblia(start1, end1, start2):
    similarities = songs2vec_biblia.wv.most_similar_cosmul(
        positive=[start2, start1],
        negative=[end1]
    )
    end2 = similarities[0][0]
    print("corpus biblia --> {0} es a {1}, lo que {2} es a {3}".format(start1, end1, start2, end2))

In [78]:
nearest_similarity_cosmul_songs("amor", "eterno", "odio")
nearest_similarity_cosmul_biblia("amor", "eterno", "odio")

corpus songs --> amor es a eterno, lo que odio es a odio,
corpus biblia --> amor es a eterno, lo que odio es a azotes,


In [79]:
nearest_similarity_cosmul_songs("comer", "beber", "hablar")
nearest_similarity_cosmul_biblia("comer", "beber", "hablar")

corpus songs --> comer es a beber, lo que hablar es a hablar,
corpus biblia --> comer es a beber, lo que hablar es a lengua,


In [87]:
nearest_similarity_cosmul_songs("dormir", "casa", "comer")
nearest_similarity_cosmul_biblia("dormir", "casa", "comer")

corpus songs --> dormir es a casa, lo que comer es a dormirme
corpus biblia --> dormir es a casa, lo que comer es a agua,


#### With the word vector embeddings in place, it is now time to calculate the normalised vector sum of each song. This process can take some time since it has to be done for each of 57,000 songs.

In [90]:
print(songs2vec["amor"])

print(songs2vec_biblia["amor"])

[-0.5254424   0.37319165 -0.30702472 -0.20934884  0.1124659   0.2293098
  0.21506673  0.08879013 -0.35972598  0.28139934  0.19584037  0.11563857
 -0.08671127  0.39319503  0.32346365  0.21683618 -0.12912312  0.44109458
  0.32289228 -0.4623951   0.14719552 -0.32484028 -0.04854123  0.18578264
 -0.3030897  -0.6424029  -0.11447746  0.53604436 -0.35013852  0.28324503
 -0.40645504  0.33406526 -0.07561712  0.3132752   0.05100819 -0.1606493
 -0.44252422  0.2354863  -0.07696358 -0.0360834  -0.06698675  0.3004768
  0.13214628 -0.06089914  0.07311619 -0.01583848 -0.13704272 -0.6386071
 -0.06714108  0.2894227   0.02802522  0.00905222 -0.6399158   0.26712278
  0.09376041  0.32280967  0.17270052  0.12815373  0.32821867 -0.10069174
 -0.8761195   0.21279721  0.03650244  0.32115927 -0.2939463  -0.5381572
 -0.5345278  -0.4178409   0.00618707 -0.68029636  0.44012982 -0.2319916
  0.08290929  0.21455806  0.05366224  0.22050846  0.11611177  0.35100594
 -0.28166425  0.32877386 -0.10310304 -0.36666328 -0.11708

In [99]:

def songVector(row):
    vector_sum = 0.0
    words = row.lower().split()
    for word in words:
        vector_sum = vector_sum + songs2vec[word]
    vector_sum = vector_sum.reshape(1,-1)
    normalised_vector_sum = sklearn.preprocessing.normalize(vector_sum)
    return normalised_vector_sum

def bibliaVector(row):
    vector_sum = 0.0
    words = row.lower().split()
    for word in words:
        vector_sum = vector_sum + songs2vec_biblia[word]
    vector_sum = vector_sum.reshape(1,-1)
    normalised_vector_sum = sklearn.preprocessing.normalize(vector_sum)
    return normalised_vector_sum


import time
start_time = time.time()

songs['song_vector'] = songs['letra'].apply(songVector)

ning: Call to deprecated `__getitem__` (Method will be removed in 4.0.0, use self.wv.__getitem__() instead).
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """


In [102]:
#biblia_vector =  list(map(bibliaVector, data))

**t-sne and random song selection** 

The songs have 50 dimensions each. Application of t-sne is memory intensive and hence it is slightly easier on the computer to use a random sample of the 57,000 songs.

In [103]:
song_vectors = []
from sklearn.model_selection import train_test_split

train, test = train_test_split(songs, test_size = 0.9)

for song_vector in train['song_vector']:
    song_vectors.append(song_vector)

train.head(10)

Unnamed: 0,id,artista,cancion,album,letra,anyo,visitas,song_vector
7347,2,Dzano,Lágrimas de niño y corazón de hombre,Lágrimas de niño y corazón de hombre,"Si cada derrota es una nueva lección,\nel exam...",2015,468,"[[-0.08757594, 0.094053656, 0.071278654, 0.000..."
2623,38,Genioh y Cadierno,Coitos interruptus,Punto G,[ Genioh ]\n\n\nEntro y os penetro no quiero a...,2008,2906,"[[-0.042032324, 0.09648913, 0.055291865, 0.028..."
6807,12,MC Aese,Beso a beso,Munchies,[Estribillo]\nTu tan bruta yo parlante yo tan ...,2016,767,"[[-0.035372846, 0.08116135, 0.088149786, 0.014..."
6434,24,Maikel de la Calle,Or nah (Remix español),"Sin álbum, es un vídeo suelto",A ella le gusta todo lo que le doy yo\nempezam...,2012,27120,"[[-0.04307834, 0.082311735, 0.078367114, 0.045..."
374,44,5mentarios,Corazon de banana (con Shinoflow),Bootleg Vol. 1,"Vaya noche\ntequeron go,\n5comentario producci...",2009,1707,"[[-0.046998248, 0.054868694, 0.06501387, -0.01..."
3914,9,Sondkalle,Raúl Ramírez (con Al Compás),Niños robar,"[Estribillo]\nIdentidad,\ncrónicas de una vida...",2006,1829,"[[-0.041884754, 0.07781396, 0.059423115, 0.014..."
3333,33,Demo,La ciudad de los niños perdidos,Más allá,En este mar de maldad mi dedo anular te señala...,2005,2441,"[[-0.076037705, 0.06355412, 0.058231983, -0.00..."
8634,24,Akrez,Rosa mistica,"Sin álbum, es un tema suelto",Entro a mi corazón y de ahí no habrá salida\np...,2013,2678,"[[-0.070330754, 0.09607172, 0.057319473, 0.018..."
137,27,Soge Culebra,La pieza del puzzle (con Ambkor),Mar de cristal,"[Ambkor]\nSentado en el rellano,\nRayado, pens...",2018,2996,"[[-0.08005507, 0.08891444, 0.059647404, 0.0217..."
7318,28,Richy Galo,Tu valor,Decisión,[Estribillo]\nValórate de una vez creen ti ten...,2015,401,"[[-0.06011723, 0.10457453, 0.07881246, 0.03873..."


In [105]:
#biblia_vectors = []
#from sklearn.model_selection import train_test_split

#train_biblia, test_biblia = train_test_split(data, test_size = 0.9)


#for biblia_vector in train['song_vector']:
#    biblia_vectors.append(biblia_vector)

#train_biblia.head(10)

I had a fairly measly 4gb machine and wasn't able to generate a more accurate model. However, one can play around with the number of iterations, learning rate and other factors to fit the model better. If you have too many dimensions (~300+), it might make sense to use PCA first and then t-sne.

In [106]:
X = np.array(song_vectors).reshape((1864, 50))

start_time = time.time()
tsne = sklearn.manifold.TSNE(n_components=2, n_iter=250, random_state=0, verbose=2)

all_word_vectors_matrix_2d = tsne.fit_transform(X)

print("--- %s seconds ---" % (time.time() - start_time))

[t-SNE] Computing 91 nearest neighbors...
[t-SNE] Indexed 1864 samples in 0.006s...
[t-SNE] Computed neighbors for 1864 samples in 0.124s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1864
[t-SNE] Computed conditional probabilities for sample 1864 / 1864
[t-SNE] Mean sigma: 0.038290
[t-SNE] Computed conditional probabilities in 0.063s
[t-SNE] Iteration 50: error = 70.5571899, gradient norm = 0.1885397 (50 iterations in 0.709s)
[t-SNE] Iteration 100: error = 69.5248260, gradient norm = 0.1818851 (50 iterations in 0.605s)
[t-SNE] Iteration 150: error = 70.2356339, gradient norm = 0.1549153 (50 iterations in 0.613s)
[t-SNE] Iteration 200: error = 70.4157333, gradient norm = 0.1602388 (50 iterations in 0.678s)
[t-SNE] Iteration 250: error = 69.5551300, gradient norm = 0.1546763 (50 iterations in 0.762s)
[t-SNE] KL divergence after 250 iterations with early exaggeration: 69.555130
[t-SNE] KL divergence after 251 iterations: 1797693134862315708145274237317043567980705675258

In [107]:
df=pd.DataFrame(all_word_vectors_matrix_2d,columns=['X','Y'])

df.head(10)

train.head()

df.reset_index(drop=True, inplace=True)
train.reset_index(drop=True, inplace=True)

Joining two dataframes to obtain each song's corresponding X,Y co-ordinate.

In [108]:
two_dimensional_songs = pd.concat([train, df], axis=1)

two_dimensional_songs.head()

Unnamed: 0,id,artista,cancion,album,letra,anyo,visitas,song_vector,X,Y
0,2.0,Dzano,Lágrimas de niño y corazón de hombre,Lágrimas de niño y corazón de hombre,"Si cada derrota es una nueva lección,\nel exam...",2015.0,468.0,"[[-0.08757594, 0.094053656, 0.071278654, 0.000...",-2.227787,-0.053145
1,38.0,Genioh y Cadierno,Coitos interruptus,Punto G,[ Genioh ]\n\n\nEntro y os penetro no quiero a...,2008.0,2906.0,"[[-0.042032324, 0.09648913, 0.055291865, 0.028...",2.345802,-0.086516
2,12.0,MC Aese,Beso a beso,Munchies,[Estribillo]\nTu tan bruta yo parlante yo tan ...,2016.0,767.0,"[[-0.035372846, 0.08116135, 0.088149786, 0.014...",-2.199507,0.073037
3,24.0,Maikel de la Calle,Or nah (Remix español),"Sin álbum, es un vídeo suelto",A ella le gusta todo lo que le doy yo\nempezam...,2012.0,27120.0,"[[-0.04307834, 0.082311735, 0.078367114, 0.045...",2.176128,-0.008516
4,44.0,5mentarios,Corazon de banana (con Shinoflow),Bootleg Vol. 1,"Vaya noche\ntequeron go,\n5comentario producci...",2009.0,1707.0,"[[-0.046998248, 0.054868694, 0.06501387, -0.01...",-2.179562,-0.010528


**Plotting the results**

Using plotly, I plotted the results so that it becomes easier to explore similar songs based on their colors and clusters.

In [111]:
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
#ma
init_notebook_mode(connected=True)

import plotly.graph_objs as go

trace1 = go.Scatter(
    y = two_dimensional_songs['Y'],
    x = two_dimensional_songs['X'],
    text = two_dimensional_songs['cancion'],
    mode='markers',
    marker=dict(
        size= 10,#'7',
        color = np.random.randn(5717), #set color equal to a variable
        colorscale='Viridis',
        showscale=True
    )
)
data = [trace1]

iplot(data)

LA SIGUIENTE TIENE SENTIDO, SON ARTISTAS DE RAP POR LO QUE ESTA HACIENDO BIEN EL MODELO, EL CHOJIN, XHELAZ, R DE RUMBA, EL HINCHU

In [112]:
import plotly.graph_objects as go
import numpy as np

fig = go.Figure(data=go.Scatter(
    y = two_dimensional_songs['Y'],
    x = two_dimensional_songs['X'],
    text = two_dimensional_songs['artista']+ "_"+two_dimensional_songs['cancion'] ,
    mode='markers',
    marker=dict(
        size= 10,#'7',
        color = np.random.randn(5717), #set color equal to a variable
        colorscale='Viridis',
        showscale=True
    )
))

fig.show()