In [15]:
import re
import sys
import json

import pandas as pd

import repetition_matrix

In [16]:
from lyricsgenius import Genius

genius = Genius(
    'in424DYZRH1oa--Iyzce_Ue6uLYNqt1Q6-Yq20PUSYl-PjvMNuuaX0YBiwcSFt9_',
    retries=50
)

In [17]:
song_parts_regex = r"\[Intro.*?\]|\[Verse [0-9].*?\]|\[Refrain.*?\]|\[Pre-Chorus.*?\]|\[Chorus.*?\]|\[Post-Chorus.*?\]|\[Hooks.*?\]|\[Riffs/Basslines.*?\]|\[Scratches.*?\]|\[Bridge.*?\]|\[Interlude.*?\]|\[Break.*?\]|\[Skit.*?\]|\[Collision.*?\]|\[Instrumental or Solo.*?\]|\[Ad lib.*?\]|\[Segue.*?\]|\[Outro.*?\]"

In [18]:
def get_chorus(song_object):
    raw_lyrics_list = song_object['lyrics'].split('\n')
    filtered_lyrics_list = filter(lambda line: line != '', raw_lyrics_list)
    chorus = []
    chorus_indicator = False
    for line in filtered_lyrics_list:
        if chorus_indicator:
            if re.match(song_parts_regex, line):
                break
            chorus.append(line)
        if re.match(r"\[Chorus.*?\]", line):
            chorus_indicator = True
    return chorus

In [32]:
def load_jsons():
    song_objects = []
    for i in range(0, 23500, 100):
        with open(f'test_data/song_jsons_{i}_to_{i+100}.json') as json_file:
            data = json.load(json_file)
            song_objects += data
    return song_objects

In [33]:
song_objects = load_jsons()

In [34]:
get_chorus(song_objects[7000])

["I'm hooked on a feelin'",
 "I'm high on believin'",
 "That you're in love with me"]

In [39]:
choruses_with_repetition_matrices = list(zip(
    [song_object['full_title'] for song_object in song_objects],
    [song_object['artist_names'] for song_object in song_objects],
    # [song_object['lyrics'] for song_object in song_objects],
    [get_chorus(song_object) for song_object in song_objects]
    # [create_chorus_repetition_matrix(get_chorus(song_object)) if get_chorus(song_object) else '' for song_object in song_objects]
))

choruses_with_repetition_matrices = list(filter(lambda song: song[2], choruses_with_repetition_matrices))
# choruses_with_repetition_matrices = [{'song': song, 'artist': artist, 'lyrics': lyrics, 'chorus': chorus, 'matrix': matrix} for song, artist, lyrics, chorus, matrix in choruses_with_repetition_matrices]
songs = [{'song': song, 'artist': artist, 'chorus': chorus} for song, artist, chorus in choruses_with_repetition_matrices]

In [42]:
songs = list(filter(lambda song: len(song['chorus']) >= 4, songs))
songs

[{'song': 'Blinding Lights by\xa0The\xa0Weeknd',
  'artist': 'The Weeknd',
  'chorus': ["I said, ooh, I'm blinded by the lights",
   "No, I can't sleep until I feel your touch",
   "I said, ooh, I'm drowning in the night",
   "Oh, when I'm like this, you're the one I trust",
   'Hey, hey, hey']},
 {'song': 'Radioactive by\xa0Imagine\xa0Dragons',
  'artist': 'Imagine Dragons',
  'chorus': ['Whoa-oh, whoa',
   "I'm radioactive, radioactive",
   'Whoa-oh, whoa',
   "I'm radioactive, radioactive"]},
 {'song': 'Sail by\xa0AWOLNATION',
  'artist': 'AWOLNATION',
  'chorus': ['Sail', 'Sail', 'Sail', 'Sail', 'Sail']},
 {'song': 'How Do I Live by\xa0LeAnn\xa0Rimes',
  'artist': 'LeAnn Rimes',
  'chorus': ['How do I live without you? I want to know',
   'How do I breathe without you if you ever go?',
   'How do I ever, ever survive?',
   'How do I, how do I, oh, how do I live?']},
 {'song': 'Counting Stars by\xa0OneRepublic',
  'artist': 'OneRepublic',
  'chorus': ["Lately, I've been, I've been l

In [43]:
df = pd.DataFrame(songs)

In [45]:
df.to_json('matrix_23500.json')

ValueError: 'index=False' is only valid when 'orient' is 'split' or 'table'

In [85]:
len(list(filter(lambda song: get_chorus(song), song_objects)))

6565

In [10]:
def create_chorus_repetition_matrix(text):
    lines = repetition_matrix.normalized_lines(text)
    vectors = repetition_matrix.vectors_from_lines(lines)
    matrix = repetition_matrix.generate_matrix(vectors)
    # with open('all_of_me.svg', 'w') as f:
    #     f.write(repetition_matrix.svg_for_matrix(matrix, low_color='E3FAEB', high_color='1DB954'))
    return repetition_matrix.svg_for_matrix(matrix, low_color='1DB954', high_color='E3FAEB')

In [11]:
create_chorus_repetition_matrix(get_chorus(song_objects[0]))

'<?xml version="1.0" encoding="utf-8"?><svg xmlns:svg="http://www.w3.org/2000/svg" xmlns="http://www.w3.org/2000/svg" width="512.0" height="512.0" version="1.0"><rect style="fill: #e3faeb" width="104.4" height="104.4" x="0" y="0" /><rect style="fill: #47c774" width="104.4" height="104.4" x="102.4" y="0" /><rect style="fill: #99e2b2" width="104.4" height="104.4" x="204.8" y="0" /><rect style="fill: #5fcf87" width="104.4" height="104.4" x="307.20000000000005" y="0" /><rect style="fill: #1db954" width="104.4" height="104.4" x="409.6" y="0" /><rect style="fill: #47c774" width="104.4" height="104.4" x="0" y="102.4" /><rect style="fill: #e3faeb" width="104.4" height="104.4" x="102.4" y="102.4" /><rect style="fill: #47c774" width="104.4" height="104.4" x="204.8" y="102.4" /><rect style="fill: #43c571" width="104.4" height="104.4" x="307.20000000000005" y="102.4" /><rect style="fill: #1db954" width="104.4" height="104.4" x="409.6" y="102.4" /><rect style="fill: #99e2b2" width="104.4" height="1