In [70]:
import pandas as pd
import numpy as np
import json
import ast
import os
import re

In [2]:
def format_lyrics(lyrics):
    lyrics = re.sub('[.,!:?\"\'«»]', '', lyrics)
    lyrics = re.sub('[-–—— ]+', ' ', lyrics)
    lyrics = lyrics.strip()
    lyrics = lyrics.lower()
    return lyrics

In [3]:
def get_part_lyrics(part):
    part = part[1:]
    return '\n'.join(part)

In [4]:
def get_part_info(part, all_singers, author, album, song):
    singer = None
    brackets = part[0]
    brackets_parts = brackets[1:-1].split(':')
    if len(brackets_parts) == 1:
        singer = all_singers[0]
        part_name = brackets_parts[0]
    else:
        part_singers = brackets_parts[1]
        part_singers = list(map(str.strip, re.split(',|&', part_singers)))
        for part_singer in part_singers:
            if part_singer in all_singers:
                singer = part_singer
                break
        part_name = brackets_parts[0]
    part_lyrics = get_part_lyrics(part)
    if part_lyrics == '' or not singer:
        return None
    return [author, album, song, singer, part_name, part_lyrics]

In [92]:
songs = pd.read_csv('songs_dataset.csv', index_col=0)
songs.head()

Unnamed: 0,Author,Album,Song,Featuring,Lyrics
0,$uicideBoy$,I Want to Die in New Orleans,King Tulip,[],"\n\n[Interlude: Max Beck]\n""They changed so mu..."
1,$uicideBoy$,I Want to Die in New Orleans,Bring Out Your Dead,[],\n\n[Intro: Koopsta Knicca & YUNG MUTT]\nYou g...
2,$uicideBoy$,I Want to Die in New Orleans,Nicotine Patches,[],\n\n[Intro (Distorted)]\nTriple 6 'till death\...
3,$uicideBoy$,I Want to Die in New Orleans,"10,000 Degrees",[],"\n\n[Intro: Lil Gin]\nLoad up my big nine, now..."
4,$uicideBoy$,I Want to Die in New Orleans,122 Days,[],"\n\n[Verse 1: LORD OF LONELINESS]\nUh, local b..."


In [93]:
songs.shape

(34753, 5)

In [197]:
def get_parts_from_song(song):
    feats = ast.literal_eval(song['Featuring'])
    author, album, song, _, lyrics = song.values
    all_singers = [author] + feats
    print(author, album, song, feats, all_singers)
    
    indices = [i for i, ltr in enumerate(lyrics) if ltr == '['] + [len(lyrics)]
    song_parts = [lyrics[indices[i]:indices[i+1]].strip().split('\n') for i, index in enumerate(indices[:-1])]
    song_parts_info = [get_part_info(song_part, all_singers, author, album, song) for song_part in song_parts]
    song_parts_info = list(filter(None, song_parts_info))
    
    return pd.DataFrame(song_parts_info, columns=['Author', 'Album', 'Song', 'Singer', 'Song Part', 'Lyrics'])

In [198]:
parts = pd.concat([songs.iloc[153:159, :].apply(get_parts_from_song, axis=1)], axis=0)

$uicideBoy$ Gray/Grey Loot ['Ramirez', 'Black Smurf'] ['$uicideBoy$', 'Ramirez', 'Black Smurf']
$uicideBoy$ Gray/Grey Loot ['Ramirez', 'Black Smurf'] ['$uicideBoy$', 'Ramirez', 'Black Smurf']
$uicideBoy$ Gray/Grey Pontiac $unfire [] ['$uicideBoy$']
$uicideBoy$ Gray/Grey Magnolia ['KirbLaGoop'] ['$uicideBoy$', 'KirbLaGoop']
$uicideBoy$ Gray/Grey Gabapentin Getaway [] ['$uicideBoy$']
$uicideBoy$ Gray/Grey $hrimp Poboy ['Supa Sortahuman'] ['$uicideBoy$', 'Supa Sortahuman']
$uicideBoy$ Gray/Grey Prettyleaf [] ['$uicideBoy$']


In [203]:
parts.loc[158]

Unnamed: 0,Author,Album,Song,Singer,Song Part,Lyrics
0,$uicideBoy$,Gray/Grey,$hrimp Poboy,Supa Sortahuman,Verse 3,"$uicidal tendencies, we are not the same\nTryn..."


In [55]:
songs[pd.notna(songs['Featuring'])]

Unnamed: 0,Author,Album,Song,Featuring,Lyrics
31,$uicideBoy$,KILL YOURSELF Part XIV: The Vulture Saga,Venom,['Shakewell'],"\n\n[Intro: D.J. Live Wire]\nRatatatatatata, b..."
41,$uicideBoy$,KILL YOURSELF Part XV: The Coast of Ashes Saga,Pictures,['Maxo Kream'],"\n\n[Chorus: Lil Noid]\nTry me, try me if you ..."
47,$uicideBoy$,Eternal Grey,BREAKDALAW2K16,['Pouya'],\n\n[Chorus: Three 6 Mafia & 7TH WARD CHARIZAR...
52,$uicideBoy$,Eternal Grey,Uglier,['Da$H'],\n\n[Verse 1: $LICK $LOTH]\nPale faces that su...
53,$uicideBoy$,Eternal Grey,Water $uicide,['Chris Travis'],\n\n[Verse 1: YUNG $NOW]\nFucking with the cli...
55,$uicideBoy$,Eternal Grey,275 $uicide,['Yung Simmie'],\n\n[Verse 1: BIG GRIEVE]\nCreepin' out the mo...
59,$uicideBoy$,Eternal Grey,Ultimate $uicide,['Denzel Curry'],"\n\n[Verse 1: YUNG PLAGUE]\nBitch, I'm Yung Pl..."
79,$uicideBoy$,Dark Side of the Clouds,Marlboros & White Widow,['Germ'],"\n\n[Intro: Crime Boss]\nHit, hit, hit—\nHits ..."
80,$uicideBoy$,Dark Side of the Clouds,Golden Calf,"['Fat Nick', 'Eddy Baker']",\n\n[Intro: Lord Infamous]\nBullets knocking n...
82,$uicideBoy$,Dark Side of the Clouds,Grayscale,"['Ramirez', 'Black Smurf', 'JGRXXN']",\n\n[Hook: Project Pat & Juicy J]\nNiggas ain'...
