In [12]:
import os
import re
import string
import random

NEWLINE_TOKEN = 'newlinetoken'

In [13]:
class Vertex(object):
  def __init__(self, value):
    self.value = value
    self.adjacent = {}

  @property
  def neighbors(self):
    return [neighbor for (neighbor, _) in self.adjacent.items()]

  @property
  def neighbors_weights(self):
    return [weight for (_, weight) in self.adjacent.items()]
    
  def increment_edge(self, vertex):
    self.adjacent[vertex] = self.adjacent.get(vertex, 0) + 1

  def next_word(self):
    return random.choices(self.neighbors, weights=self.neighbors_weights)[0]

  def __str__(self):
    return f'Vertex: {self.value}'

In [14]:
class Graph(object):
  def __init__(self):
    self.vertices = {}

  def get_vertex(self, value):
    if value not in self.vertices:
      self.vertices[value] = Vertex(value)
    return self.vertices[value]
  
  def get_next_word(self, vertex):
    return self.vertices[vertex.value].next_word()

In [15]:
def get_words_from_text(text_path):
  with open(text_path, 'rb') as f:
    text = f.read().decode('utf-8')
    text = re.sub(r'\[(.+)\]', ' ', text)
    text = text.replace('\n', f' {NEWLINE_TOKEN} ')
    text = ' '.join(text.split())
    text = text.lower()
    text = text.translate(str.maketrans('', '', string.punctuation))
  return text.split()

In [16]:
def make_graph(words):
  graph = Graph()
  prev_word = None

  for word in words:
    word_vertex = graph.get_vertex(word)

    if prev_word:
      prev_word.increment_edge(word_vertex)
    
    prev_word = word_vertex
  
  return graph

In [17]:
def compose(graph, initial_word, length=50):
  composition = []

  word = graph.get_vertex(initial_word)
  for _ in range(length):
    composition.append(word.value)
    word = graph.get_next_word(word)

  return composition

In [25]:
words = []

for artist in os.listdir('songs'):
    for song in os.listdir('songs/{}'.format(artist)):
        words.extend(get_words_from_text('songs/{}/{}'.format(artist, song)))

In [31]:
len(words)

3176

In [32]:
len(set(words))

486

In [26]:
graph = make_graph(words)

In [27]:
composition = compose(graph, 'quiero', length=100)
for word in composition:
    if word == NEWLINE_TOKEN:
        print()
    else:
        print(word, end=' ')

quiero andar tan solo 
nadie sabe de la ciudad no encontras momento 


cuanto te llene algo que valga la hora azul 

pero no quiero un poco mas alla 
la hora azul 
todos guardamos algo que perderse 
no te quiero quise asi como lo que fui 

y sonreir 

no toda su error se mentir y me ves 
tarde comprendi 
lo unico que hacer 

nadie quiere andar 
hoy es medianoche y sonreir 
tarde comprendi 
no quererte 
pensacola pensacola 
ya lo 