In [335]:
import base64
from bs4 import BeautifulSoup, NavigableString, Tag
from copy import copy
from diff_match_patch import *
import functools
from google.protobuf.json_format import ParseDict
from google.protobuf.message import Message as ProtobufMessageType
from mscxyz import utils
from music21 import *
import nltk.data
from nltk.tokenize import word_tokenize
import os
from pathlib import Path
from Pro7_File_API_Python import presentation_pb2  # Used to decode *.pro files
import proto
import pyphen
from random import randint
import re
import shutil
from striprtf.striprtf import rtf_to_text
import sys
import uuid
from xmldiff import *
import copy

sent_detector = nltk.data.load('tokenizers/punkt/english.pickle')
dic = pyphen.Pyphen(lang='en_US')

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

home_dir = Path.expanduser(Path.home())

In [336]:
hymn_info = {}
def make_hymn_info(hymnal, number, music_type):
    hymn_info={'hymnal':hymnal, 'number':hymn_number, 'music_type':music_type}

    print("Collecting hymn info...")
    return hymn_info
def convert_protobuf_to_proto_plus(message):
    """Converts a protobuf message to a proto-plus message.

    Args:
        message: an instance of google.protobuf.message.Message

    Returns:
        A proto_plus version of the protobuf proto.
    """
    if isinstance(message, ProtobufMessageType):
        return proto.Message.wrap(message)
    elif isinstance(message, proto.Message):
        return message
    else:
        raise TypeError(
            f"Cannot convert type {type(message)} to a proto_plus protobuf."
        )
def get_hymn_media(hymn_info, media_type, hymnal_parent_dir="/Users/chaddorsey/dev/python/web_scraping/sundays_seasons_scrape/completed-hymns/"):
    #Find hymn media files
    print("Gathering hymn media...")

    music_type = hymn_info['music_type']
    number = hymn_info['number']
    hymnal = hymn_info['hymnal']
    #title = hymn_info['title']

    hymnal_dirs_path = hymnal_parent_dir #"/Users/chaddorsey/dev/python/web_scraping/sundays_seasons_scrape/completed-hymns/"
    hymnal_hymn_folders = os.listdir(hymnal_dirs_path + str(hymnal) + "/")

    print(hymnal_hymn_folders)

    #Find the directory that contains the hymn number
    for dir in hymnal_hymn_folders:
        if str(hymnal+"_"+number) in dir:
            hymn_dir = dir

    hymn_folder = str(hymnal_dirs_path) + str(hymnal) + "/" + str(hymn_dir) + "/"
    #hymn_folder = hymn_folder_raw.replace("(","\(").replace(")", "\)")
    #Record hymn folder in hymn_info
    hymn_info['hymn_folder'] = hymn_folder


    hymn_files = os.listdir(hymn_folder)
    print(hymn_files)

    #print(hymn_files)
    
    #If xml files specified, return list of paths to xml files
    if media_type == "musicxml":
        xml_file_paths = []

        harmony_found = False
        for file in hymn_files:
            
            if music_type == "harmony":
                    
                if "Harmony" in file and file.endswith(".musicxml"):
                    hymn_musicxml = file
                    hymn_musicxml_path = hymn_folder + hymn_musicxml
                    xml_file_paths.append([hymn_musicxml, hymn_musicxml_path])
                    harmony_found = True
                    print("Harmony file found")
        
        if harmony_found == False:
            if music_type == "harmony":
                print("No harmony file found. Using melody instead.")
                hymn_info['music_type'] = "melody"
                music_type = "melody"
            for file in hymn_files:
                if music_type == "melody":
                    if "Melody" in file and file.endswith(".musicxml"):
                        print("here")
                        hymn_musicxml = file
                        hymn_musicxml_path = hymn_folder + hymn_musicxml
                        xml_file_paths.append([hymn_musicxml, hymn_musicxml_path])

        return xml_file_paths
    
    
    #If png files specified, return list of paths to png files
    if media_type == "png":
        hymn_pngs = []
        png_file_paths = []

        for file in hymn_files:
            if file.endswith(".png"):
                hymn_pngs.append(file)
        hymn_pngs = sorted(hymn_pngs, key=lambda x: int(x.split("-")[-1].split(".")[0]))
        
        #Construct and return list of file paths for png
        file_path_list = []
        for png in hymn_pngs:
            png_file_paths.append([str(png), str(hymn_folder + png)])
    
        return png_file_paths


    if media_type == "text":
        text_file_paths = []
        for file in hymn_files:
            if file.endswith(".txt") or file.endswith(".rtf"):
                if "PermissionsForm" not in file:
                    text_file_paths.append([str(file), str(hymn_folder + file)])
        
        return text_file_paths
        
def unroll_and_clean(musicxml_file_path, music_info):
    #TODO — Identify final measure of the stanza and add a new system break to the following measure for each appended section during unrolling process
    #Read in and unroll musicxml file
    print("Unrolling and cleaning musicxml file...")
    filename = musicxml_file_path
    with open(filename, "r") as file:
        music_xml = file.read()
    soup = BeautifulSoup(music_xml, features='xml')

    music_type = music_info['music_type']

    #Remove title and attribution text
    try:
        for el in soup.find_all('words'):
            el.parent.parent.decompose()
    except:
        pass

    treble_part = soup.part.extract()
    #bass_part = soup.part.extract()

    #extract attributes and save for later
    try:
        treble_attributes = treble_part.attributes.extract()
    except AttributeError:
        pass

    verse_count = len(treble_part.find('lyric').find_next_siblings("lyric"))+1
    print(verse_count)

    holding_tag_treble = soup.new_tag("holding-tag-treble")
    print_tag = soup.new_tag("print-tag")

    #Add new-system = yes to the first measure's print tag
    treble_part.find('measure').print['new-system'] = "yes"

    for n in range(1,verse_count+1):
        match_string = "[^" + str(n) + "]"
        treble_temp = copy.copy(treble_part)
        
        for lyric in treble_temp.find_all('lyric',{'number':re.compile(match_string)}):
            lyric.decompose()
        for lyric in treble_temp.find_all('lyric'):
            lyric['number']=1
        
        holding_tag_treble.append(treble_temp)

        #If n = 1, pull the print tag out from the first measure into a saved tag
        if n == 1:
            print_tag = treble_temp.find('measure').print.extract()
            
        #Insert the print tag into the first measure of the verse
        holding_tag_treble.find('measure').insert(0,print_tag)
        part_tag = holding_tag_treble.find('part').unwrap()
        
    treble_complete = part_tag
    treble_complete.append(holding_tag_treble)
    treble_complete.find('holding-tag-treble').unwrap()

    #Add attributes back
    treble_complete.find('measure').print.insert_after(treble_attributes)
    #print(treble_complete)
    soup.find('score-partwise').append(treble_complete)


    #Do bass clef too if needed

    if music_type == "harmony":
        bass_part = soup.find('part').extract()
        try:
            bass_attributes = bass_part.attributes.extract()
        except AttributeError:
            pass

        #Add new-system = yes to the first measure's print tag
        bass_part.find('measure').print['new-system'] = "yes"
        
        holding_tag_bass = soup.new_tag("holding-tag-bass")

        for n in range(1,verse_count+1):
            match_string = "[^" + str(n) + "]"
            bass_temp = copy.copy(bass_part)
            
            for lyric in bass_temp.find_all('lyric',{'number':re.compile(match_string)}):
                lyric.decompose()
            for lyric in bass_temp.find_all('lyric'):
                lyric['number']=1
            
            holding_tag_bass.append(bass_temp)
            part_tag = holding_tag_bass.find('part').unwrap()
        
        bass_complete = part_tag
        bass_complete.append(holding_tag_bass)
        bass_complete.find('holding-tag-bass').unwrap()

        #Add attributes back
        bass_complete.find('measure').print.insert_after(bass_attributes)
        #print(bass_complete)
        soup.find('score-partwise').append(bass_complete)


    #Try removing extraneous voice 2 notes
    for el in soup.find_all('voice', text = re.compile('[2]')):
        el.parent.decompose()

    #Remove extra double barlines
    for el in soup.find_all('barline'):
        #el.string.replace_with("regular")
        el.decompose()

    #Create double barline tag for end
    barline_tag = soup.new_tag("barline")
    bar_style_tag = soup.new_tag("bar-style")
    bar_style_tag.string = "light-heavy"
    barline_tag.append(bar_style_tag)

    #Add back double barline tag at end TODO: CHECK THIS CODE
    if music_type == "harmony":
        for p_num in ['P1','P2']:
            if soup.find('part',{'id':p_num}).find_all('measure')[-1].find('barline') == None:
                soup.find('part',{'id':p_num}).find_all('measure')[-1].append(barline_tag)
            else:
                soup.find('part',{'id':p_num}).find_all('measure')[-1].find('barline').find('bar-style').string = "light-heavy"
    else:
        if soup.find('part',{'id':'P1'}).find_all('measure')[-1].find('barline') == None:
            soup.find('part',{'id':'P1'}).find_all('measure')[-1].append(barline_tag)
        else:
            soup.find('part',{'id':'P1'}).find_all('measure')[-1].find('barline').find('bar-style').string = "light-heavy"


    #Write unrolled and cleaned musicxml file out to disk

    filename_save = musicxml_file_path.replace(".musicxml","-unrolled.musicxml")

    f = open(filename_save, "w")
    f.write(str(soup))
    f.close()
    return filename_save


def parse_words_file(words_file_path, hymn_info):

    #Open and parse official lyrics file
    print("Parsing official lyrics file...")

    words_filename_load = words_file_path

    #words_filename_load = hymnal + "-" + hymn_number + "-"+ suffix +".rtf"
    with open(words_filename_load, "r") as file_words:
        music_words = file_words.read()

    music_words_txt = rtf_to_text(music_words).strip()

    hymn_info['title'] = music_words_txt.split("\n")[0].strip()

    verses_raw = music_words_txt.split("Text:")[0].strip().split("\n\n")[1:]

    #print(verses_raw[-1]) #.split("\n")[0])
    #verses_raw = verses_raw[0].split("\n\n")[0]
    #print(verses_raw)

    def onespace(string):
        splits = string.split()
        for spl in splits:
            if spl == '':
                splits.remove(spl)
        cleanline = " ".join(splits)
        return cleanline

    verses_newlines = []
    verses_text = []
    official_text_verses = []
    for verse in verses_raw:
        verses_newlines.append(re.sub(r"[\t]*", "", verse[1:]).strip())
        verses_text.append(re.sub(r"[\n]", " ", re.sub(r"[\t]*", "", verse[1:])).strip())


    for line in verses_text:
        line_clean = onespace(line)
        #splits = line.split()
        #for spl in splits:
        #    if spl == '':
        #        splits.remove(spl)
        #official_text_verses.append(" ".join(splits))
        official_text_verses.append(line_clean)
            #if (spl =='') or (spl == ' '):
            #splits.remove(spl)
    #print(new_verses_text[0])

    official_text_string = official_text_verses[0].strip()
    for i in range(1, len(official_text_verses)):
        official_text_verses[i].strip()
        add_string = " " + official_text_verses[i]
        official_text_string += add_string
    #official_text_string = " ".join(official_text_verses)

    print(official_text_string)
    return official_text_string
def musicxml_lyrics_extract(musicxml_file):
    #Open musicxml file and parse with music21
    print("Extracting OCR'ed and syllable-parsed lyrics from musicxml file...")

    s = converter.parse(musicxml_file)

    #Test for file success
    #s.analyze('key')

    #Get indexed lyrics
    all_lyrics = search.lyrics.LyricSearcher(s).index()

    #Parse indexed lyrics into a single-line string
    m21_lyrics_by_word = []
    for i in range(len(all_lyrics)):
        if i==0:
            m21_lyrics_by_word.append(all_lyrics[i][5])
        else:
            if all_lyrics[i][1] != all_lyrics[i-1][2]:
                entry = " "+all_lyrics[i][5]
            else:
                entry = all_lyrics[i][5]
            m21_lyrics_by_word.append(entry)
        m21_lyrics_line = "".join(m21_lyrics_by_word)

    return m21_lyrics_line

def compare_and_patch_xml_lyrics(official_lyrics, musicxml_file):
    #Compare and patch xml lyrics
    print("Comparing and patching OCR'ed lyrics...")

    s = converter.parse(musicxml_file)

    #Test for file success
    #s.analyze('key')

    #Get indexed lyrics
    all_lyrics = search.lyrics.LyricSearcher(s).index()

    #Parse indexed lyrics into a single-line string
    m21_lyrics_by_word = []
    for i in range(len(all_lyrics)):
        if i==0:
            m21_lyrics_by_word.append(all_lyrics[i][5])
        else:
            if all_lyrics[i][1] != all_lyrics[i-1][2]:
                entry = " "+all_lyrics[i][5]
            else:
                entry = all_lyrics[i][5]
            m21_lyrics_by_word.append(entry)
        m21_lyrics_line = "".join(m21_lyrics_by_word)
    
    #Using lyrics list from extract function, create reference list of lyrics with start and end indices
    m21_lyrics_list = []
    for lyric in search.lyrics.LyricSearcher(s).index():
        entry = []
        entry = [lyric[1],lyric[2],lyric[5],lyric[4]]
        m21_lyrics_list.append(entry)

    #print(m21_lyrics_list[0])


    #Creating diffs and patches
    from diff_match_patch import diff_match_patch
    dmp = diff_match_patch()

    patches = dmp.patch_make(m21_lyrics_line,official_lyrics)
    diffs = dmp.diff_main(m21_lyrics_line,official_lyrics)
    

    #for patch in patches[0:3]:
    #    print(patch)
    #for diff in diffs[0:3]:
    #    print(diff)

    ##To test location reassignment by diffs
    #dmp.diff_xIndex(diffs,15)


    m21_lyrics_list_corrected = []

    #Adjust indices for start and end points of corrected lyrics
    for lyric in m21_lyrics_list:
        entry=[]
        new_start = dmp.diff_xIndex(diffs,lyric[0])
        new_end = dmp.diff_xIndex(diffs,lyric[1])
        entry = [new_start,new_end,lyric[2],lyric[3]]
        m21_lyrics_list_corrected.append(entry)

    #for correction in m21_lyrics_list_corrected[0:10]:
    #print(m21_lyrics_list_corrected)


    #Apply patches to correct m21 lyrics line
    m21_lyrics_line_corrected = dmp.patch_apply(patches, m21_lyrics_line)[0]

    print("corrected: ", m21_lyrics_line_corrected)


    m21_lyrics_updates = {}
    i=0
    for correction in m21_lyrics_list_corrected:
        m21_lyrics_updates[i] = {}
        m21_lyrics_updates[i]['start'] = correction[0]
        m21_lyrics_updates[i]['end'] = correction[1]
        m21_lyrics_updates[i]['text'] = m21_lyrics_line_corrected[correction[0]:correction[1]]
        #print(m21_lyrics_updates[i]['text'],end=" ")
        i+=1


    i=0
    for i in range(len(search.lyrics.LyricSearcher(s).index())):
        #all_lyrics[i].modify(start = m21_lyrics_updates[i]['start'], end = m21_lyrics_updates[i]['end'], text = m21_lyrics_updates[i]['text'])
        #all_lyrics[i][2] = m21_lyrics_updates[i]['end']
        #print(i, search.lyrics.LyricSearcher(s).index()[i])

        #TODO: Figure out why all caps "SOT TOW caused issues in ELW 382"

        print(search.lyrics.LyricSearcher(s).index()[i][4].text)
        print(m21_lyrics_updates[i]['text'])
        print("\n\n")

        search.lyrics.LyricSearcher(s).index()[i][4].text = m21_lyrics_updates[i]['text']
        search.lyrics.LyricSearcher(s).index()[i].modify(start = m21_lyrics_updates[i]['start'], end = m21_lyrics_updates[i]['end'])


    filename_save_lyricscorr = musicxml_file.replace(".musicxml","-lyricscorr.musicxml") #hymnal + "-" + hymn_number + "-"+ music_type + "-m21" + ".musicxml"
    s.write('musicxml', fp=filename_save_lyricscorr)
    return filename_save_lyricscorr

def newTag(tag, soup,attrs={}, tstr="",):
    n = soup.new_tag(tag)
    if (len(attrs) > 0):
        for k, v in attrs.items():
            n[k] = v
    if (len(tstr) > 0):
        n.string = tstr
    return n

def wrapTag(newTag, fromTagInclusive, toTagExclusive):
    fromTagInclusive.wrap(newTag)
    #p(fromTagInclusive.parent, "fromTag.parent")
    n = fromTagInclusive.parent
    #n = fromTagInclusive
    c = 0
    while 1:
        c += 1
        x = n.next_sibling
        if (x is None):
            break
        n.append(x)
        #print(c, x, n.next_sibling, isinstance(n.next_sibling, Tag), n.next_sibling.name if isinstance(n.next_sibling, Tag) else "~Tag", n.next_sibling.attrs if isinstance(n.next_sibling, Tag) else "~Tag")
        #if isinstance(n.next_sibling, Tag) and (n.next_sibling.name == 'p') and ('class' in n.next_sibling.attrs) and ('y' in n.next_sibling['class']):
        if (n.next_sibling == toTagExclusive):
            break
    return n, toTagExclusive
    
def extract_lyrics(multipage_musicxml_file,individual_lyrics=False):
    #from bs4 import BeautifulSoup
    print("Extracting lyrics from formatted musicxml file...")
    
    print(multipage_musicxml_file)
    with open(multipage_musicxml_file, "r") as file:
        music_xml = file.read()

    soup = BeautifulSoup(music_xml, 'xml')
    
    #Add new page tags to first measure
    new_page_tags = soup.find('part',{'id':'P1'}).find_all('print', {'new-page':'yes'})
    measures = soup.find('part',{'id':'P1'}).find_all('measure')

    first_measures = [soup.find('measure')]
    for tag in new_page_tags:
        first_measures.append(tag.parent)

    for i in range(0,len(first_measures)):
        n = newTag('slide-line',soup)
        if i == len(first_measures)-1:
            n, _ = wrapTag(n, first_measures[i], measures[-1])
            n.append(measures[-1])
        else:
            n, _ = wrapTag(n, first_measures[i], first_measures[i+1])


    slides = soup.find_all('slide-line')
    #print(len(slides), "slides found")

    lyrics_output=[]
    ln = 1
    lyrics_line=[]
    lyrics_slides = []

    for line in slides:
        #Clear lyrics list
        measures = []
        measure_widths=[]
        running_offset =int(float(0))
        measures = line.find_all('measure')

        
        lyrics_string = ""

        #print("***Line", ln, end = "***\n\n")
        #print("Measures:", len(measures))
        

        
        i=0
        for measure in measures:
            measure_widths.append(measure['width'])
            #current_measure_width =int(float(measure['width'])
            if i==0:
                running_offset =int(float(0)) 
            else:
                running_offset +=int(float(measure_widths[i-1]))

            i+=1

            lyrics=[]
            lyrics = measure.find_all('lyric')


            for lyric in lyrics:
                try:
                    if (note_x := lyric.parent['default-x']) != None:
                        #print(note_x)
                        note_x =int(float(note_x))
                except KeyError:
                    #print("No default-x on note")
                    note_x =int(float(0))
                try:
                    if (lyric_x := lyric['default-x']) != None:
                        #print(lyric_x)
                        lyric_x =int(float(lyric_x))
                except KeyError:
                    #print("No default-x on lyric")
                    lyric_x =int(float(0))
                
                #tab_offset = note_x + lyric_x
                #print("Offset from left edge of measure: ", tab_offset)
                #absolute_tab_stop = running_offset + tab_offset

                lyric_text = lyric.find('text').text
                lyric_syllabic = lyric.find('syllabic').text

                #print("\""+lyric_text + "\" (" + lyric_syllabic + ") at tab stop: ", absolute_tab_stop,"\n")
                if lyric_syllabic == "end" or lyric_syllabic == "single":
                    lyrics_string += lyric_text + " "
                    #print(lyric_text, end=' ')
                else:
                    #print(lyric_text,end='')
                    lyrics_string += lyric_text
                
                #print(lyric_text + " ",end = "")
                #Add text and tab stop value to running lyrics list
                lyrics_line_entry = [lyric_text,lyric_syllabic] #,absolute_tab_stop]
                lyrics_line.append(lyrics_line_entry)
            #print("Lyrics_line", lyrics_line)
        
        #print(lyrics_string, end="\n")
        lyrics_slides.append(lyrics_string)

        #print("\n")

        #Add slide lyrics info to master slide list
        lyrics_output.append(lyrics_line)
        lyrics_line = []
        ln+=1

    if individual_lyrics == True:
        return lyrics_output
    else:
        return lyrics_slides

    #print(len(slide_output), "slides")
    #print("Slide", n := randint(0,len(slide_output-1),), "lyric", l:= randint(0,len(slide_output[n-1])), ":", slide_output[n][l][0])

def create_cue_index_dict(pro7_file_obj,element_number=1):
    print("Indexing slide cues...")
    if element_number == None:
        el = 1
    else:
        el = element_number
    
    slide_order_list_raw = []
    slide_order_list = []
    cue_list = []
    #cue_UUID_list = []
    cue_dict = {}
    cue_index_dict = {}

    slide_order_list_raw = pro7_file_obj.cue_groups[0].cue_identifiers

    for sl in slide_order_list_raw:
        UUID=str(sl).split("\"")[1]
        print(UUID,end="\n")
        slide_order_list.append(UUID)
        
    #print(len(slide_order_list),end="\n")


    cue_list = pro7_file_obj.cues

    i=0
    for c in cue_list:
        try:
            escapes = ''.join([chr(char) for char in range(1, 32)])
            translator = str.maketrans('', '', escapes)
            cue_UUID = str(c.uuid).split("\"")[1]

            print(cue_UUID,end="\n")
            
            #tab_stops = c.actions[0].slide.presentation.base_slide.elements[el].element.text.attributes.paragraph_style.tab_stops
            #text_element = c.actions[0].slide.presentation.base_slide.elements[1].element.text.rtf_data.decode(encoding='cp1252')

            ###text_line_64 = proto_pres_obj_test_dict['cues'][n]['actions'][0]['slide']['presentation']['base_slide']['elements'][1]['element']['text']['rtf_data']

            #print(text_element,end="\n")
            #print("here")

            #text_line = text_element.split("0\n")[1].split("\n")[1].translate(translator)[:-1]

            #cue_dict[cue_UUID]=text_line
            cue_index_dict[slide_order_list.index(cue_UUID)] = i
            i+=1

        except IndexError:
            print("oops – Index Error")
            print("Cue number:",i)
            i+=1

    #ordered_cue_list = []
    #print("Reconstructed slide text:")
    #for UUID in slide_order_list:
    #    print(cue_dict[UUID])

    cue_index_dict = dict(sorted(cue_index_dict.items()))

    return cue_index_dict

def open_reading_template(filename):
    #filename = "Reading_template_slides"
    print("Reading slide template file...")

    filename_in = "./"+filename+".pro"

    open_file = filename_in
    if open_file != "":  # if folder dialog was not cancelled
        open_file = Path(open_file)
        print("Selected File: " + str(open_file))

        if str(open_file).split(".")[-1] == "pro":
            print("Presentation!")
            pro7_file_obj = presentation_pb2.Presentation()

            file1 = open(open_file, mode='rb')

            parse_error = False
            try:
                pro7_file_obj.ParseFromString(file1.read())
                
            except BaseException as err:
                #write_file_line(log_file, 'ERROR: ' + repr(err) + ' occurred trying to parse ' + file1.name)
                parse_error = True
            file1.close()
    return pro7_file_obj

def make_hymn_presentation(hymn_lines,hymn_info):
    print("Assembling hymn slides and lyrics...")
    #import get_hymn_media
    
    music_type = hymn_info['music_type']
    number = hymn_info['number']
    hymnal = hymn_info['hymnal']
    title = hymn_info['title']


    
        
    pro7_file_obj = open_reading_template("Hymn_template_slides")
    preface_slides = 1 #TODO – Fix this to use only one preface slide
    #starting_offset = 1
    cue_index_dict = create_cue_index_dict(pro7_file_obj,1)

    #Set location of primary substition text element
    el = 1
    



    #if reading_source['title'] == "Psalm":
    print(cue_index_dict)

    #Convert protobuf to proto-plus
    proto_pres_obj = convert_protobuf_to_proto_plus(pro7_file_obj)
    proto_pres_obj_dict = proto.Message.to_dict(proto_pres_obj)


    hymn_pngs = get_hymn_media(hymn_info, "png")
    
    
    media_path = "Media/Assets/"
    absolute_string_prefix = "file:///Users/chaddorsey/Documents/ProPresenter/"


    for slide_number in range(0,preface_slides):

        #Psalm workaround – TODO – Fix this weirdness! Likely an issue with element numbering
        #if (reading_source['type'] == "psalm-chanted") or (reading_source['type'] == "psalm-spoken"):
        #    slide_number = slide_number
        #    print(cue_index_dict[slide_number+starting_offset])
            

        ####TODO – fix reference to "n" in text lines below and substitute working text lines


        n=cue_index_dict[slide_number] #+starting_offset]
       
        preface_text = ""
        text_line_64 = proto_pres_obj_dict['cues'][n]['actions'][0]['slide']['presentation']['base_slide']['elements'][0]['element']['text']['rtf_data'] #.decode(encoding='cp1252')
        text_line = base64.b64decode(text_line_64).decode(encoding='cp1252')
        text_line_mod = text_line.replace("@@@",title)
        text_line_mod = text_line_mod.replace("$$$",hymnal).replace("&&&",number)
        text_line_mod_reencode = base64.b64encode(text_line_mod.encode(encoding='cp1252'))
        proto_pres_obj_dict['cues'][n]['actions'][0]['slide']['presentation']['base_slide']['elements'][0]['element']['text']['rtf_data'] = text_line_mod_reencode

    for slide_number in range(preface_slides,len(hymn_lines)+preface_slides,1):

        n=cue_index_dict[slide_number] #+starting_offset]
        
        #Read and modify text line
        text_to_insert = hymn_lines[slide_number-preface_slides]
        text_line_64 = proto_pres_obj_dict['cues'][n]['actions'][0]['slide']['presentation']['base_slide']['elements'][el]['element']['text']['rtf_data'] #.decode(encoding='cp1252')
        text_line = base64.b64decode(text_line_64).decode(encoding='cp1252')
        #print(n," ")
        #print(text_line,"\n")
        text_line_mod = text_line.replace("%%%"," "+text_to_insert)
        text_line_mod_reencode = base64.b64encode(text_line_mod.encode(encoding='cp1252'))

        #Update slide object with modified text line
        print("Adding lyrics text...")
        proto_pres_obj_dict['cues'][n]['actions'][0]['slide']['presentation']['base_slide']['elements'][el]['element']['text']['rtf_data'] = text_line_mod_reencode
    
        ##Put media into slide

        img_num = slide_number-preface_slides
        png_filename = hymn_pngs[img_num][0]
        png_filepath = hymn_pngs[img_num][1]


        #Copy file from hymn_pngs to ProPresenter Media folder
        shutil.copyfile(png_filepath, "/Users/chaddorsey/Documents/ProPresenter/" + media_path + png_filename)

        #Replace media path and UUID in protobuf object
        print("Inserting "+str(len(hymn_pngs))+" music images...")

        uuid_string = str(uuid.uuid4())
        pro_filepath = media_path + png_filename
        absolute_string = absolute_string_prefix + media_path + png_filename

        proto_pres_obj_dict['cues'][n]['actions'][0]['slide']['presentation']['base_slide']['elements'][0]['element']['fill']['media']['url']['absolute_string'] = absolute_string
        proto_pres_obj_dict['cues'][n]['actions'][0]['slide']['presentation']['base_slide']['elements'][0]['element']['fill']['media']['url']['local']['path'] = pro_filepath #['local']['path'] = pro_filepath
        proto_pres_obj_dict['cues'][n]['actions'][0]['slide']['presentation']['base_slide']['elements'][0]['element']['fill']['media']['uuid']['string'] = uuid_string

        #proto_pres_obj_test_dict['cues'][n]['actions'][0]['slide']['presentation']['base_slide']['elements'][0]['element']['fill']['media']['image']['drawing']['natural_size']['height']=130
        

    '''
    #Now that we've made all the slides we need for the reading, delete extraneous slides from the template
    uuids_to_delete = []
    for slide_number in range(len(reading_slides)+2,len(cue_index_dict),1):
        uuids_to_delete.append(slide_order_list[cue_index_dict[slide_number]])


    print("Deleting extra slides:") 
    for uuid in uuids_to_delete:
        for i in range(len(proto_pres_obj_dict['cues'])):
            if proto_pres_obj_dict['cues'][i]['uuid']['string'] == uuid:
                print("Deleting slide with UUID: "+uuid)
                del proto_pres_obj_dict['cues'][i]
                break
        for i in range(len(proto_pres_obj_dict['cue_groups'][0]['cue_identifiers'])):
            if proto_pres_obj_dict['cue_groups'][0]['cue_identifiers'][i]['string'] == uuid:
                print("Deleting slide reference with UUID: "+uuid)
                del proto_pres_obj_dict['cue_groups'][0]['cue_identifiers'][i]
                break


        #print(proto_pres_obj_dict['cues'][uuid])
        #del proto_pres_obj_dict['cues'][uuid]
    '''

    #When we're all done, use the ParseDict function to convert our dictionary back into a protobuf object using the Presentation class
    print("Converting Python dictionary back to protobuf object...")
    proto_pres_obj_mod = ParseDict(proto_pres_obj_dict,presentation_pb2.Presentation())

    #Write output file
    print("Writing final hymn presentation file...")
    filename_out = "./" + hymn_info['hymnal'] + "-" +hymn_info['number'] + "-final" + "_out.pro" #TODD consider + hymn_info['title']
    with open(filename_out, "wb") as fd:
        fd.write(proto_pres_obj_mod.SerializeToString())
    #return proto_pres_obj_mod
def delete_vbox_and_clean(mscx_file):
    #Read in and unroll musicxml file
    print("Removing title and header container...")
    filename = mscx_file

    with open(filename, "r") as file:
        mscx = file.read()

    soup = BeautifulSoup(mscx, features='xml')

    #Remove title and attribution text
    for el in soup.find_all('VBox'):
        el.decompose()

    for el in soup.find_all('Instrument'):
        el.decompose()

    for el in soup.find_all('Tempo'):
        el.decompose()
    
    #Write output file
    
    filename_out = "./" + filename.replace(".mscx","_clean.mscx")
    with open(filename_out, "w") as file:
        file.write(str(soup))
    
    return filename_out

In [337]:
##### BEGIN Program below this line #####

In [346]:
hymnal = "ELW"
hymn_number = "482"
music_type = "melody"

hymn_info = make_hymn_info(hymnal, hymn_number, music_type)

print(hymn_info)

files = get_hymn_media(hymn_info, "musicxml")
print(files[0][1])

unroll_file = unroll_and_clean(files[0][1],hymn_info)

words_file_path = get_hymn_media(hymn_info, "text")[0][1]
official_lyrics = parse_words_file(words_file_path, hymn_info)
patched_file = compare_and_patch_xml_lyrics(official_lyrics, unroll_file)



Collecting hymn info...
{'hymnal': 'ELW', 'number': '482', 'music_type': 'melody'}
Gathering hymn media...
['ELW_855_-_Crown_Him_with_Many_Crowns+atom8467', 'ELW_162_-_Glory_to_God_(Hurd)+atom9072', 'ELW_691_-_Accept,_O_Lord,_the_Gifts_We_Bring_WOV_759+atom8347', 'ELW_585_-_Hear_Us_Now,_Our_God_and_Father+atom8542', 'ELW_421_-_By_All_Your_Saints_(Thomas,_Apostle)_(KUORTANE)+atom359735', 'ELW_421_-_By_All_Your_Saints_(Mathias,_Apostle)_(KUORTANE)+atom359738', 'ELW_784_-_Grant_Peace,_We_Pray,_in_Mercy,_Lord_LBW_471+atom8527', 'ELW_351_-_O_Sacred_Head,_Now_Wounded_(isometric)+atom8736', 'ELW_508_-_As_Rain_from_the_Clouds+atom8377', 'ELW_281_-_Silent_Night,_Holy_Night!_LBW_65_LS_26+atom8814', 'ELW_678_-_God,_Whose_Giving_Knows_No_Ending_LBW_408+atom8520', 'ELW_620_-_How_Sweet_the_Name_of_Jesus_Sounds_LBW_345+atom8560', 'ELW_372_-_Christ_Is_Arisen_LBW_136+atom8426', 'ELW_475_-_Lord,_Enthroned_in_Heavenly_Splendor_LBW_172+atom8644', 'ELW_299_-_Cold_December_Flies_Away_LBW_53+atom8439', 'ELW_

  for el in soup.find_all('voice', text = re.compile('[2]')):


Gathering hymn media...
['ELW_855_-_Crown_Him_with_Many_Crowns+atom8467', 'ELW_162_-_Glory_to_God_(Hurd)+atom9072', 'ELW_691_-_Accept,_O_Lord,_the_Gifts_We_Bring_WOV_759+atom8347', 'ELW_585_-_Hear_Us_Now,_Our_God_and_Father+atom8542', 'ELW_421_-_By_All_Your_Saints_(Thomas,_Apostle)_(KUORTANE)+atom359735', 'ELW_421_-_By_All_Your_Saints_(Mathias,_Apostle)_(KUORTANE)+atom359738', 'ELW_784_-_Grant_Peace,_We_Pray,_in_Mercy,_Lord_LBW_471+atom8527', 'ELW_351_-_O_Sacred_Head,_Now_Wounded_(isometric)+atom8736', 'ELW_508_-_As_Rain_from_the_Clouds+atom8377', 'ELW_281_-_Silent_Night,_Holy_Night!_LBW_65_LS_26+atom8814', 'ELW_678_-_God,_Whose_Giving_Knows_No_Ending_LBW_408+atom8520', 'ELW_620_-_How_Sweet_the_Name_of_Jesus_Sounds_LBW_345+atom8560', 'ELW_372_-_Christ_Is_Arisen_LBW_136+atom8426', 'ELW_475_-_Lord,_Enthroned_in_Heavenly_Splendor_LBW_172+atom8644', 'ELW_299_-_Cold_December_Flies_Away_LBW_53+atom8439', 'ELW_695_-_As_Saints_of_Old_(FOREST_GREEN)+atom8378', 'ELW_445_-_Wash,_O_God,_Our_Sons_a

In [299]:
words_file_path = get_hymn_media(hymn_info, "text")[0][1]
official_lyrics = parse_words_file(words_file_path, hymn_info)
patched_file = compare_and_patch_xml_lyrics(official_lyrics, unroll_file)

Gathering hymn media...
['ELW_855_-_Crown_Him_with_Many_Crowns+atom8467', 'ELW_162_-_Glory_to_God_(Hurd)+atom9072', 'ELW_691_-_Accept,_O_Lord,_the_Gifts_We_Bring_WOV_759+atom8347', 'ELW_585_-_Hear_Us_Now,_Our_God_and_Father+atom8542', 'ELW_421_-_By_All_Your_Saints_(Thomas,_Apostle)_(KUORTANE)+atom359735', 'ELW_421_-_By_All_Your_Saints_(Mathias,_Apostle)_(KUORTANE)+atom359738', 'ELW_784_-_Grant_Peace,_We_Pray,_in_Mercy,_Lord_LBW_471+atom8527', 'ELW_351_-_O_Sacred_Head,_Now_Wounded_(isometric)+atom8736', 'ELW_508_-_As_Rain_from_the_Clouds+atom8377', 'ELW_281_-_Silent_Night,_Holy_Night!_LBW_65_LS_26+atom8814', 'ELW_678_-_God,_Whose_Giving_Knows_No_Ending_LBW_408+atom8520', 'ELW_620_-_How_Sweet_the_Name_of_Jesus_Sounds_LBW_345+atom8560', 'ELW_372_-_Christ_Is_Arisen_LBW_136+atom8426', 'ELW_475_-_Lord,_Enthroned_in_Heavenly_Splendor_LBW_172+atom8644', 'ELW_299_-_Cold_December_Flies_Away_LBW_53+atom8439', 'ELW_695_-_As_Saints_of_Old_(FOREST_GREEN)+atom8378', 'ELW_445_-_Wash,_O_God,_Our_Sons_a

IndexError: list index out of range

In [347]:
music_type = hymn_info['music_type']
command = '/Applications/MuseScore\ 3.app/Contents/MacOS/mscore -S Unrolled'+music_type+'.mss --export-to ./tmp-score.mscx ' + patched_file.replace("(","\(").replace(")","\)") #./ELW-346-test.musicxml"
os.system(command)

dlopen error : dlopen(libjack.0.dylib, 0x0001): tried: '/Applications/MuseScore 3.app/Contents/MacOS/../Frameworks/libjack.0.dylib' (no such file), '/Applications/MuseScore 3.app/Contents/MacOS/../Frameworks/libjack.0.dylib' (no such file), 'libjack.0.dylib' (no such file), '/usr/lib/libjack.0.dylib' (no such file), '/Users/chaddorsey/Dropbox/dev/ProPresenter7/Pro7_Media_Sweeper/libjack.0.dylib' (no such file) 
dlopen error : dlopen(/usr/local/lib/libjack.0.dylib, 0x0001): tried: '/usr/local/lib/libjack.0.dylib' (no such file) 
Creating main window…
ZoomBox::setLogicalZoom(): Formatting logical zoom level as 100% (rounded from 1.000000)
Reading translations…
convert </Users/chaddorsey/dev/python/web_scraping/sundays_seasons_scrape/completed-hymns/ELW/ELW_482_-_I_Come_with_Joy+atom8564/ELW_482_-_I_Come_with_Joy+atom8564+Melody-unrolled-lyricscorr.musicxml>...
JIT is disabled for QML. Property bindings and animations will be very slow. Visit https://wiki.qt.io/V4 to learn about possible 

0

In [348]:
##Style text and create line breaks

#TODO – Create functions:
#1. Remove vertical frames

#def mscore_command(hymn_info)
#input_filename = hymn_info['hymnal'] + "-" +hymn_info['number'] + ".musicxml"

music_type = hymn_info['music_type']
command = '/Applications/MuseScore\ 3.app/Contents/MacOS/mscore -S Unrolled'+music_type+'.mss --export-to ./tmp-score.mscx ' + patched_file.replace("(","\(").replace(")","\)") #./ELW-346-test.musicxml"
os.system(command)


dlopen error : dlopen(libjack.0.dylib, 0x0001): tried: '/Applications/MuseScore 3.app/Contents/MacOS/../Frameworks/libjack.0.dylib' (no such file), '/Applications/MuseScore 3.app/Contents/MacOS/../Frameworks/libjack.0.dylib' (no such file), 'libjack.0.dylib' (no such file), '/usr/lib/libjack.0.dylib' (no such file), '/Users/chaddorsey/Dropbox/dev/ProPresenter7/Pro7_Media_Sweeper/libjack.0.dylib' (no such file) 
dlopen error : dlopen(/usr/local/lib/libjack.0.dylib, 0x0001): tried: '/usr/local/lib/libjack.0.dylib' (no such file) 
Creating main window…
ZoomBox::setLogicalZoom(): Formatting logical zoom level as 100% (rounded from 1.000000)
Reading translations…
convert </Users/chaddorsey/dev/python/web_scraping/sundays_seasons_scrape/completed-hymns/ELW/ELW_482_-_I_Come_with_Joy+atom8564/ELW_482_-_I_Come_with_Joy+atom8564+Melody-unrolled-lyricscorr.musicxml>...
JIT is disabled for QML. Property bindings and animations will be very slow. Visit https://wiki.qt.io/V4 to learn about possible 

0

In [349]:
#Remove title and header container and extraneous markup
clean_mscx = delete_vbox_and_clean("./tmp-score.mscx")

Removing title and header container...


In [350]:
#Create paginated MuseScore file
music_type = hymn_info['music_type']
command = '/Applications/MuseScore\ 3.app/Contents/MacOS/mscore -S Unrolled-' + music_type + '.mss --export-to ./tmp-paginated.mscx ' + clean_mscx
os.system(command)

dlopen error : dlopen(libjack.0.dylib, 0x0001): tried: '/Applications/MuseScore 3.app/Contents/MacOS/../Frameworks/libjack.0.dylib' (no such file), '/Applications/MuseScore 3.app/Contents/MacOS/../Frameworks/libjack.0.dylib' (no such file), 'libjack.0.dylib' (no such file), '/usr/lib/libjack.0.dylib' (no such file), '/Users/chaddorsey/Dropbox/dev/ProPresenter7/Pro7_Media_Sweeper/libjack.0.dylib' (no such file) 
dlopen error : dlopen(/usr/local/lib/libjack.0.dylib, 0x0001): tried: '/usr/local/lib/libjack.0.dylib' (no such file) 
Creating main window…
ZoomBox::setLogicalZoom(): Formatting logical zoom level as 100% (rounded from 1.000000)
Reading translations…
convert <././tmp-score_clean.mscx>...
JIT is disabled for QML. Property bindings and animations will be very slow. Visit https://wiki.qt.io/V4 to learn about possible solutions for your platform.
	using style <Unrolled-melody.mss>
	to <./tmp-paginated.mscx>
... success!


0

In [351]:
##Export slide images to hymn folder

#TODO – Create function
#2. Style to lines and export images 

import os
#img_dir = "/Users/chaddorsey/Dropbox/dev/python/web_scraping/sundays_seasons_scrape/completed-hymns/ELW/ELW_346_-_Ride_On\,_Ride_On_in_Majesty\!_\(THE_KING\'S_MAJESTY\)_\[LBW_121\]+atom8794"

img_dir = hymn_info['hymn_folder']
print(img_dir)
png_output_filename_base = hymn_info['hymnal'] + "-" +hymn_info['number']
command_exp = "/Applications/MuseScore\ 3.app/Contents/MacOS/mscore -S Unrolled-"+music_type+".mss --export-to "+ img_dir.replace("(","\(").replace(")","\)") + "/" + png_output_filename_base + ".png" + " ./tmp-paginated.mscx"
#os.system(command_mkdir)
os.system(command_exp)

/Users/chaddorsey/dev/python/web_scraping/sundays_seasons_scrape/completed-hymns/ELW/ELW_482_-_I_Come_with_Joy+atom8564/


dlopen error : dlopen(libjack.0.dylib, 0x0001): tried: '/Applications/MuseScore 3.app/Contents/MacOS/../Frameworks/libjack.0.dylib' (no such file), '/Applications/MuseScore 3.app/Contents/MacOS/../Frameworks/libjack.0.dylib' (no such file), 'libjack.0.dylib' (no such file), '/usr/lib/libjack.0.dylib' (no such file), '/Users/chaddorsey/Dropbox/dev/ProPresenter7/Pro7_Media_Sweeper/libjack.0.dylib' (no such file) 
dlopen error : dlopen(/usr/local/lib/libjack.0.dylib, 0x0001): tried: '/usr/local/lib/libjack.0.dylib' (no such file) 
Creating main window…
ZoomBox::setLogicalZoom(): Formatting logical zoom level as 100% (rounded from 1.000000)
Reading translations…
convert <./tmp-paginated.mscx>...
restHeight < 0.0: -1.966607

restHeight < 0.0: -148.067812

restHeight < 0.0: -169.150124

restHeight < 0.0: -0.157639

restHeight < 0.0: -170.243874

restHeight < 0.0: -0.435357

restHeight < 0.0: -166.872770

restHeight < 0.0: -1.095139

restHeight < 0.0: -147.567812

JIT is disabled for QML. Pro

0

In [352]:
#Export musicxml file with final paginated lyrics
#TODO – Create function
#3. Create lyrics slides

xml_final_lyrics = "./test_final_for_lyrics.musicxml"
command = "/Applications/MuseScore\ 3.app/Contents/MacOS/mscore -S Unrolled-" + music_type + ".mss --export-to "+ xml_final_lyrics + " ./tmp-paginated.mscx"
os.system(command)
lyrics_slides = (extract_lyrics(xml_final_lyrics))

print(lyrics_slides)

dlopen error : dlopen(libjack.0.dylib, 0x0001): tried: '/Applications/MuseScore 3.app/Contents/MacOS/../Frameworks/libjack.0.dylib' (no such file), '/Applications/MuseScore 3.app/Contents/MacOS/../Frameworks/libjack.0.dylib' (no such file), 'libjack.0.dylib' (no such file), '/usr/lib/libjack.0.dylib' (no such file), '/Users/chaddorsey/Dropbox/dev/ProPresenter7/Pro7_Media_Sweeper/libjack.0.dylib' (no such file) 
dlopen error : dlopen(/usr/local/lib/libjack.0.dylib, 0x0001): tried: '/usr/local/lib/libjack.0.dylib' (no such file) 
Creating main window…
ZoomBox::setLogicalZoom(): Formatting logical zoom level as 100% (rounded from 1.000000)
Reading translations…
convert <./tmp-paginated.mscx>...
restHeight < 0.0: -1.966607

restHeight < 0.0: -148.067812

restHeight < 0.0: -169.150124

restHeight < 0.0: -0.157639

restHeight < 0.0: -170.243874

restHeight < 0.0: -0.435357

restHeight < 0.0: -166.872770

restHeight < 0.0: -1.095139

restHeight < 0.0: -147.567812

JIT is disabled for QML. Pro

Extracting lyrics from formatted musicxml file...
./test_final_for_lyrics.musicxml
['I come with joy, a child of God, forgiven, loved, and ', 'free, the life of Jesus to recall in ', 'love laid down for me, in love laid down for me. ', 'I come with Christians far and near to find, as all are ', 'fed, the new community of love in ', "Christ's communion bread, in Christ's communion bread. ", 'As Christ breaks bread and bids us share, each proud division ', 'ends. The love that made us, makes us one, and ', 'strangers now are friends, and strangers now are friends. ', 'The Spirit of the risen Christ, unseen, but ever ', 'near, is in such friendship better known, a', 'live among us here, alive among us here. ', 'Together met, together bound by all that God has ', "done, we'll go with joy, to give the world the ", 'love that makes us one, the love that makes us one. ']


In [353]:
make_hymn_presentation(lyrics_slides,hymn_info)

Assembling hymn slides and lyrics...
Reading slide template file...
Selected File: Hymn_template_slides.pro
Presentation!
Indexing slide cues...
59F168C3-80EF-44B4-9661-C3D044F811F7
0B8A36B8-B0DC-4483-8B67-1BF133F00334
DCA646CD-FAA4-40D6-8A00-5BCE267DCBD9
4F0EACBC-3459-45DD-920C-83D2A8D5D297
8E535D66-F97E-4107-9BC3-598E53C63810
5517BE16-02E0-4F45-A3A6-CF9981A7B1CA
217BF505-AEA0-4077-B605-B6167EF8885E
0D8179F2-A598-4283-B328-7C14EAEEBC8E
29D60096-5E42-4788-AA75-6F876EEC5272
E612E68D-4C39-4462-95E0-380C61997EED
633FD085-DB02-42F1-AF51-9F7D622559D4
0E0558D2-29BB-4B7B-B537-B6335A1499F9
8D61916E-E4E7-40D4-8A0D-35C48D363CEA
E156350E-F851-4967-A1AB-59C53D0C245F
897B6479-1688-4AC1-8BA8-21ABA54B2254
BA0D0432-4D9B-4C6D-8A16-03B5CDAE6AEB
F0120AC2-4A7F-4E59-BCB2-45D42554847A
A7A1DEFE-088B-4C87-A80D-1C1C6D349C68
D03ECFE2-9B73-4E20-8204-9B06278A19BC
FFCDCAEE-7F97-42B4-A0FC-B86783462868
42E6D093-190F-45CC-9D1A-F21F2F8E8157
9CBA9B12-44D9-4B6D-95B3-CF4EEF30A542
5164B2FB-D2D8-4C1C-9606-02924067AAB1
8B39

In [818]:
##TESTING AND LEFTOVERS BELOW HERE \/\/\/\/

pro7_file_obj_test = open_reading_template("Hymn_template_slides")
proto_pres_obj_test = convert_protobuf_to_proto_plus(pro7_file_obj_test)
proto_pres_obj_test_dict = proto.Message.to_dict(proto_pres_obj_test)




Reading slide template file...
Selected File: Hymn_template_slides.pro
Presentation!


In [819]:
##TESTING GROUND
preface_slides = 1 #TODO – Fix this to use only one preface slide
        #starting_offset = 1
cue_index_dict = create_cue_index_dict(pro7_file_obj_test,1)
print(cue_index_dict)

Indexing slide cues...
59F168C3-80EF-44B4-9661-C3D044F811F7
0B8A36B8-B0DC-4483-8B67-1BF133F00334
DCA646CD-FAA4-40D6-8A00-5BCE267DCBD9
4F0EACBC-3459-45DD-920C-83D2A8D5D297
8E535D66-F97E-4107-9BC3-598E53C63810
5517BE16-02E0-4F45-A3A6-CF9981A7B1CA
217BF505-AEA0-4077-B605-B6167EF8885E
0D8179F2-A598-4283-B328-7C14EAEEBC8E
29D60096-5E42-4788-AA75-6F876EEC5272
E612E68D-4C39-4462-95E0-380C61997EED
633FD085-DB02-42F1-AF51-9F7D622559D4
0E0558D2-29BB-4B7B-B537-B6335A1499F9
8D61916E-E4E7-40D4-8A0D-35C48D363CEA
E156350E-F851-4967-A1AB-59C53D0C245F
897B6479-1688-4AC1-8BA8-21ABA54B2254
BA0D0432-4D9B-4C6D-8A16-03B5CDAE6AEB
F0120AC2-4A7F-4E59-BCB2-45D42554847A
A7A1DEFE-088B-4C87-A80D-1C1C6D349C68
D03ECFE2-9B73-4E20-8204-9B06278A19BC
FFCDCAEE-7F97-42B4-A0FC-B86783462868
42E6D093-190F-45CC-9D1A-F21F2F8E8157
9CBA9B12-44D9-4B6D-95B3-CF4EEF30A542
5164B2FB-D2D8-4C1C-9606-02924067AAB1
8B3974E4-3953-48E1-9658-BED5F79803E6
B8A26502-AECB-4797-BBA4-2B9E343D1941
897B6479-1688-4AC1-8BA8-21ABA54B2254
9CBA9B12-44D9-4

In [820]:
n=18
text_line64 = proto_pres_obj_test_dict['cues'][n]['actions'][0]['slide']['presentation']['base_slide']['elements'][0]['element']['text']['rtf_data']
print(proto_pres_obj_test_dict['cues'][n]['actions'][0]['slide']['presentation']['base_slide']['elements'][0]['element']['fill']['media']['image']['drawing']['natural_size']['height'])#['custom_image_bounds']) #= 200 # ['relative_path'] #.decode(encoding='cp1252')
media_info = proto_pres_obj_test_dict['cues'][n]['actions'][0]['slide']['presentation']['base_slide']['elements'][0]['element']['fill']['media']['url'] # ['relative_path'] #.decode(encoding='cp1252')
text_line = base64.b64decode(text_line64).decode(encoding='cp1252')

print(media_info)
print(text_line)


155.0
{'absolute_string': 'file:///Users/chaddorsey/Documents/ProPresenter/Media/Assets/ELW-344-harmony-01.png', 'platform': 1, 'local': {'root': 10, 'path': 'Media/Assets/ELW-344-harmony-01.png'}}
{\rtf1\ansi\ansicpg1252\cocoartf2639
\cocoatextscaling0\cocoaplatform0{\fonttbl}
{\colortbl;\red255\green255\blue255;}
{\*\expandedcolortbl;;}
}


In [None]:
n=2
print(search.lyrics.LyricSearcher(s).index()[n].start)
print(search.lyrics.LyricSearcher(s).index()[n].end)
print((len(search.lyrics.LyricSearcher(s).index())))

#Potentially problematic but right direction

###print(all_lyrics[n][4].text)
###print(m21_lyrics_updates[n]['text'])

for i in range(len(search.lyrics.LyricSearcher(s).index())):
    #all_lyrics[i].modify(start = m21_lyrics_updates[i]['start'], end = m21_lyrics_updates[i]['end'], text = m21_lyrics_updates[i]['text'])
    #all_lyrics[i][2] = m21_lyrics_updates[i]['end']
    search.lyrics.LyricSearcher(s).index()[i][4].text = m21_lyrics_updates[i]['text']
    search.lyrics.LyricSearcher(s).index()[i].modify(start = m21_lyrics_updates[i]['start'], end = m21_lyrics_updates[i]['end'])


##Line that worked in the past \/ \/ \/
#search.lyrics.LyricSearcher(s).index()[6][4].text = "O"
#print(search.lyrics.LyricSearcher(s).index()[6])
#n=9 
#print(search.lyrics.LyricSearcher(s).index()[n].start)
#print(search.lyrics.LyricSearcher(s).index()[n].end)
#print(search.lyrics.LyricSearcher(s).index()[n][4].text)

for n in range(len(search.lyrics.LyricSearcher(s).index()[0:10])):
    print(search.lyrics.LyricSearcher(s).index()[n])

7
10
285


In [None]:
#Saving just in case
#Open musicxml file and parse with music21

s = converter.parse(filename_mod_load)

#Test for file success
s.analyze('key')

#Get indexed lyrics
all_lyrics = search.lyrics.LyricSearcher(s).index()

#Parse indexed lyrics into a single-line string
m21_lyrics_by_word = []
for i in range(len(all_lyrics)):
    if i==0:
        m21_lyrics_by_word.append(all_lyrics[i][5])
    else:
        if all_lyrics[i][1] != all_lyrics[i-1][2]:
            entry = " "+all_lyrics[i][5]
        else:
            entry = all_lyrics[i][5]
        m21_lyrics_by_word.append(entry)
    m21_lyrics_line = "".join(m21_lyrics_by_word)
print("Official:",len(official_text_line))
print(official_text_line,end="\n\n")
print("M211:",len(m21_lyrics_line))
print(m21_lyrics_line)



##Other way to do this
#lyrics_in_file_list = []
#
#for i in range(num_verses):
#    lyrics_in_file_list.append(text.assembleLyrics(s,i+1))
#
#print(lyrics_in_file_list[0])
#print(len(text.assembleLyrics(s,1)))
#
#lyrics_in_file_line = " ".join(lyrics_in_file_list)


In [307]:
hymn_dirs = "/Users/chaddorsey/dev/python/web_scraping/sundays_seasons_scrape/completed-hymns/"

hymnal_dirs = os.listdir(hymn_dirs + str(hymnal) + "/")


#Find the directory that contains the hymn number
for dir in hymnal_dirs:
    if str(hymn_number) in dir:
        print(dir)
        hymn_dir = dir

hymn_folder = hymn_dirs + str(hymnal) + "/" + hymn_dir + "/"

hymn_files = os.listdir(hymn_folder)
print(hymn_files)

if music_type == "harmony":
    for file in hymn_files:
        if "Harmony" in file and file.endswith(".musicxml"):
            hymn_musicxml = file
elif music_type == "melody":
    for file in hymn_files:
        if "Melody" in file and file.endswith(".musicxml"):
            hymn_musicxml = file

hymn_musicxml_path = hymn_folder + hymn_musicxml
print(hymn_musicxml_path)



ELW_344_-_All_Glory,_Laud,_and_Honor+atom8354
['ELW_344_-_All_Glory,_Laud,_and_Honor+atom8354+Harmony.tif', 'ELW_344_-_All_Glory,_Laud,_and_Honor+atom8354+Harmony.musicxml', 'ELW-344-harmony-18.png', 'ELW-344-harmony-19.png', 'ELW-344-harmony-09.png', 'ELW-344-harmony-21.png', 'ELW-344-harmony-m21.mscz', 'ELW-344-harmony-20.png', 'ELW-344-harmony-08.png', 'ELW-344-harmony-22.png', 'ELW_344_-_All_Glory,_Laud,_and_Honor+atom8354+Melody.tif', 'ELW-344-harmony-12.png', 'ELW-344-harmony-06.png', 'ELW_344_-_All_Glory,_Laud,_and_Honor+atom8354+Words.rtf', '.mscbackup', 'ELW_344_-_All_Glory,_Laud,_and_Honor+atom8354+Braille.brf', 'ELW-344-harmony-07.png', 'ELW-344-harmony-13.png', 'ELW-344-harmony-05.png', 'ELW-344-harmony-11.png', 'ELW_344_-_All_Glory,_Laud,_and_Honor+atom8354+Melody.musicxml', 'ELW-344-harmony-10.png', 'ELW-344-harmony-04.png', 'ELW-344-harmony-14.png', 'ELW-344-harmony-15.png', 'ELW-344-harmony-01.png', 'ELW-344-harmony-17.png', 'ELW-344-harmony-03.png', 'ELW-344-harmony-02

In [308]:
#Identify and set up list of relevant png files (WARNING: Will include all PNG files in directory)
hymn_pngs = []
for file in hymn_files:
    if file.endswith(".png"):
        hymn_pngs.append(file)
#print(hymn_pngs)
hymn_pngs = sorted(hymn_pngs, key=lambda x: int(x.split("-")[-1].split(".")[0]))
print(hymn_pngs)

media_path = "Media/Assets/"
absolute_string_prefix = "file:///Users/chaddorsey/Documents/ProPresenter/"

['ELW-344-harmony-01.png', 'ELW-344-harmony-02.png', 'ELW-344-harmony-03.png', 'ELW-344-harmony-04.png', 'ELW-344-harmony-05.png', 'ELW-344-harmony-06.png', 'ELW-344-harmony-07.png', 'ELW-344-harmony-08.png', 'ELW-344-harmony-09.png', 'ELW-344-harmony-10.png', 'ELW-344-harmony-11.png', 'ELW-344-harmony-12.png', 'ELW-344-harmony-13.png', 'ELW-344-harmony-14.png', 'ELW-344-harmony-15.png', 'ELW-344-harmony-16.png', 'ELW-344-harmony-17.png', 'ELW-344-harmony-18.png', 'ELW-344-harmony-19.png', 'ELW-344-harmony-20.png', 'ELW-344-harmony-21.png', 'ELW-344-harmony-22.png']


In [None]:
##Put media into slide

#Get relevant image file name
img_file = hymn_folder+hymn_png[n]

#Copy file from hymn_pngs to ProPresenter Media folder
shutil.copyfile(img_file, "/Users/chaddorsey/Documents/ProPresenter/" + media_path)

#Replace media path and UUID in protobuf object
uuid_string = str(uuid.uuid4())
absolute_string = absolute_string_prefix + media_path + img_file
proto_pres_obj_test_dict['cues'][n]['actions'][0]['slide']['presentation']['base_slide']['elements'][0]['element']['fill']['media']['url']['absolute_string'] = absolute_string
proto_pres_obj_test_dict['cues'][n]['actions'][0]['slide']['presentation']['base_slide']['elements'][0]['element']['fill']['media']['url']['local']['path'] = path
proto_pres_obj_test_dict['cues'][n]['actions'][0]['slide']['presentation']['base_slide']['elements'][0]['element']['fill']['media']['uuid']['string'] = uuid_string


In [283]:
 #Create proto object and Write to output file as presentation
 proto_pres_obj_test = ParseDict(proto_pres_obj_test_dict,presentation_pb2.Presentation())
 

filename_out = "./" + hymn_info['hymnal'] + "-" +hymn_info['number'] + "-" + hymn_info['title'] + "_img__out.pro"
with open(filename_out, "wb") as fd:
    fd.write(proto_pres_obj_test.SerializeToString())

Media/Assets/ELW-344-harmony-m21-02.png


In [115]:
#Syllabification experiments

first_verse_test = verses_text[0]
first_verse_words = word_tokenize(first_verse_test)
#first_verse_syllables = sltk.tokenize(first_verse_test)
first_verse_syllables = [dic.inserted(word) for word in first_verse_words]
print(first_verse_syllables)
verse_words = []
verse_syllables = []
for verse in verses_text:
    verse_words = word_tokenize(verse)
    verse_syllables = [dic.inserted(word) for word in verse_words]
    #if a list item in verse_syllables is a punctiation mark, join it with the previous item
    for i in range(len(verse_syllables)):
        if verse_syllables[i] in [".", ",", ";", ":", "!", "?"]:
            verse_syllables[i-1] = verse_syllables[i-1] + verse_syllables[i]
            verse_syllables[i] = ""
    verse_syllables = [x for x in verse_syllables if x != ""]
    print(verse_syllables[0])
    #Turn verse_syllables into a string
    

    #join elements in verse_syllables[0] into a string separated by spaces
    #verse_syllables_string = " ".join(verse_syllables[0])
    #print(verse_syllables_string)
    
    

    

['All', 'glo-ry', ',', 'laud', ',', 'and', 'hon-or', 'to', 'you', ',', 're-deemer', ',', 'king', ',', 'to', 'whom', 'the', 'lips', 'of', 'chil-dren', 'made', 'sweet', 'hosan-nas', 'ring', '.']
All
You
All
All
The
All
3The
All
4To
All
All
Their


In [279]:
#Load media files into presentation


number = 3

if number > 10:
    n = str("0"+str(number))
else:
    n = str(number)

uuid_string = str(uuid.uuid4())
media_path = "Media/Assets/"
filename_by_number = "ELW-344-harmony-m21-0" + n + ".png"
absolute_string_prefix = "file:///Users/chaddorsey/Documents/ProPresenter/"
absolute_string = absolute_string_prefix + media_path + filename_by_number




In [280]:
proto_pres_obj_test_dict['cues'][n]['actions'][0]['slide']['presentation']['base_slide']['elements'][0]['element']['fill']['media']['url']['absolute_string'] = absolute_string
proto_pres_obj_test_dict['cues'][n]['actions'][0]['slide']['presentation']['base_slide']['elements'][0]['element']['fill']['media']['url']['local']['path'] = path
proto_pres_obj_test_dict['cues'][n]['actions'][0]['slide']['presentation']['base_slide']['elements'][0]['element']['fill']['media']['uuid']['string'] = uuid_string

In [651]:
make_hymn_presentation(lyrics_slides,hymn_info)

Assembling hymn slides and lyrics...
Reading slide template file...
Selected File: Hymn_template_harmony.pro
Presentation!
Indexing slide cues...
A3BEF52A-0FC5-4A3D-A2C6-AC7D1BFF8ECA
56D284B1-1532-486D-B186-DABBBF71F401
5FD2F76F-9B0F-4411-8E5D-EB1DA34CB359
43955E85-8D12-4366-9C47-9AD61830E978
12216170-61F6-47CE-8B8B-53F211543F51
7C4A3386-EA50-4830-8E91-429212121587
FDE790F0-B369-41EB-BB40-2307D10DCDC5
93155145-2C63-47C1-9F4B-2B88C2E52341
120E636A-735F-4DD4-88A5-99DFFFC5F5BB
D9B0F316-603C-4266-B5FC-5558D189BBF5
BEA3FD0C-66DE-4686-B482-3E63D769CFDA
B08876A5-BE5D-4DB8-B912-226D0F6E1DA7
37B6D68E-6716-454B-96A6-3522C1135F0C
A749914C-B388-4AB8-9EC6-42E81161BB94
030CE50A-8487-48E3-9FD1-66D5EFCFE8A5
D0CD242D-1A2D-44FA-902A-FFE9F5AD6A68
26C3FBBE-6487-459D-A460-231A7BC360F5
2AF1778E-CBBC-4890-978F-CEC6C3CB14CD
CF667ED1-113F-495D-9629-56127AED36CA
D5B1A0E5-A2E0-42C2-8895-AAA661E0CA5A
D870F617-86C7-4655-AC4E-ADB3197EBEE9
69415EC9-F87D-4473-AE1B-E77381CA5BE9
701B9126-2D92-4A03-89BD-EAA7427326BE
84B

2172caa989874e2299fbdcb6c272c878


In [None]:
###PROBABLY NOT NEEDED

i=0
lyrics_tmpline = []
lyrics_lines = []
lyrics_out = []
for line in slide_output:
    lyrics_tmpline=[]
    for lyric in line:
        lyrics_tmpline.append(line[i][0])
        #if i==0:
        #    lyrics_tmpline += line[i][0]
        #else:
        #    lyrics_tmpline += " " + line[i][0]
        i+=1
    i=0
    lyrics_lines.append(lyrics_tmpline)


for line in lyrics_lines:
    print("\t"+"\t".join(line),end="\n")
    lyrics_out.append("\t"+"\t".join(line))

	Love	di	vine,	all	loves	ex	cel	ling,	Joy	of	heav’n,	to
	earth	come	down!	Fix	in	us	thy	hum	ble	dwell	ing,	all	thy
	faith	ful	mer	cies	crown.	Je	sus,	thou	art	all	com	pas	sion,
	pure,	un	bound	ed	love	thou	art;	vis	it	us	with
	thy	sal	va	tion,	en	ter	ev	trem	bling	heart.
	Breathe,	oh,	breathe	thy	lov	ing	Spir	it	in	to	ev	'ry
	trou	bled	breast;	Jet	us	all	in	thee	in	her	it;	Jet	us
	find	thy	prom	ised	rest.	Take	a	way	the	love	of	sin	ning;
	Al	pha	and	oo	me	ga	be;	end	of	faith,	as
	its	be	gin	ning,	set	our	hearts	at	lib	er	ty.
	Come,	Al	might	y,	to	de	liv	er;	Jet	us	all	thy
	life	te	ceive;	sud	den	ly	te	turn,	and	nev	er,	nev	er
	more	thy	tem	ples	leave.	Thee	we	would	be	al	ways	bless	ing,
	serve	thee	as	thy	hosts	a	bove,	pray,	and	ptaise	thee
	with	out	ceas	ing,	glo	ry	in	thy	per	fect	love.
	Fin	ish	then	thy	new	cre	a	tion,	pure	and	spot	less
	Jet	us	be;	Jet	us	see	thy	great	sal	va	tion	per	fect
	ly	te	stored	in	thee!	Changed	from	glo	ry	in	to	glo	ty,
	till	in	heav’n	we	take	our	place,	t

In [None]:
#SKIPPING FOR NOW – Get indexed lyrics
all_lyrics = search.lyrics.LyricSearcher(s).index()

#Parse indexed lyrics into a single-line string
m21_lyrics_by_word = []
for i in range(len(all_lyrics)):
    if i==0:
        m21_lyrics_by_word.append(all_lyrics[i][5])
    else:
        if all_lyrics[i][1] != all_lyrics[i-1][2]:
            entry = " "+all_lyrics[i][5]
        else:
            entry = all_lyrics[i][5]
        m21_lyrics_by_word.append(entry)
    m21_lyrics_line = "".join(m21_lyrics_by_word)

print("Official:",len(official_text_line))
print(official_text_line,end="\n\n")
print("M211:",len(m21_lyrics_line))
print(m21_lyrics_line)


In [12]:
soup.prettify

<bound method Tag.prettify of <?xml version="1.0" encoding="utf-8"?>
<!--MusicXML 2.0 file created by CapToMusic.py CapXML to MusicXML converter version 2.17--><!DOCTYPE score-partwise PUBLIC "-//Recordare//DTD MusicXML 2.0 Partwise//EN" "http://www.musicxml.org/dtds/partwise.dtd">
<score-partwise version="2.0">
<identification>
<encoding>
<encoding-date>2022-12-28</encoding-date>
<software>CapToMusic.py CapXML to MusicXML converter version 2.17</software>
<encoding-description>Options: FinaleDolet33=1, ChordCaseMatters=1, ExportToSibelius=0</encoding-description>
</encoding>
</identification>
<defaults>
<scaling>
<millimeters>1.6</millimeters>
<tenths>10.0</tenths>
</scaling>
<page-layout>
<page-height>1856</page-height>
<page-width>1312</page-width>
<page-margins type="both">
<left-margin>93</left-margin>
<right-margin>93</right-margin>
<top-margin>93</top-margin>
<bottom-margin>93</bottom-margin>
</page-margins>
</page-layout>
<system-layout>
<system-margins>
<left-margin>0</left-ma

In [10]:
print(treble_complete)

<part id="P1">
<measure number="0">
<direction>
<direction-type>
<other-direction/>
</direction-type>
<sound tempo="120"/>
</direction>
<print>
<system-layout>
<system-margins>
<left-margin>30</left-margin>
<right-margin>0</right-margin>
</system-margins>
<top-system-distance>90</top-system-distance>
</system-layout>
<staff-layout number="1">
<staff-distance>50</staff-distance>
</staff-layout>
</print>
<attributes>
<divisions>480</divisions>
<key>
<fifths>-1</fifths>
<mode>major</mode>
</key>
<time>
<beats>6</beats>
<beat-type>4</beat-type>
</time>
<clef>
<sign>G</sign>
<line>2</line>
</clef>
</attributes>
<note>
<pitch>
<step>C</step>
<octave>4</octave>
</pitch>
<duration>480</duration>
<voice>1</voice>
<type>quarter</type>
<stem>up</stem>
<lyric default-y="-95" number="1">
<syllabic>single</syllabic>
<text>This</text>
</lyric>



</note>
<backup>
<duration>480</duration>
</backup>
<note>
<pitch>
<step>B</step>
<alter>-1</alter>
<octave>3</octave>
</pitch>
<duration>480</duration>
<vo

In [9]:
print(bass_complete)

<part id="P2">
<measure number="0">
<direction>
<direction-type>
<other-direction/>
</direction-type>
<sound tempo="120"/>
</direction>
<print>
<system-layout>
<system-margins>
<left-margin>30</left-margin>
<right-margin>0</right-margin>
</system-margins>
<top-system-distance>90</top-system-distance>
</system-layout>
<staff-layout number="1">
<staff-distance>170</staff-distance>
</staff-layout>
</print>
<attributes>
<divisions>480</divisions>
<key>
<fifths>-1</fifths>
<mode>major</mode>
</key>
<time>
<beats>6</beats>
<beat-type>4</beat-type>
</time>
<clef>
<sign>F</sign>
<line>4</line>
</clef>
</attributes>
<note>
<pitch>
<step>C</step>
<octave>3</octave>
</pitch>
<duration>480</duration>
<voice>1</voice>
<type>quarter</type>
<stem>down</stem>
</note>
<note>
<chord/>
<pitch>
<step>G</step>
<octave>3</octave>
</pitch>
<duration>480</duration>
<voice>1</voice>
<type>quarter</type>
<stem>down</stem>
</note>
<backup>
<duration>480</duration>
</backup>
<note print-object="no">
<rest/>
<dura

In [24]:
##EXTRA Section

last_part_tag = holding_tag.find('part').unwrap()
print(last_part_tag)
treble_contents = holding_tag
print(treble_contents)

<holding-tag>
<measure number="0">
<direction>
<direction-type>
<other-direction/>
</direction-type>
<sound tempo="120"/>
</direction>
<print>
<system-layout>
<system-margins>
<left-margin>30</left-margin>
<right-margin>0</right-margin>
</system-margins>
<top-system-distance>90</top-system-distance>
</system-layout>
<staff-layout number="1">
<staff-distance>50</staff-distance>
</staff-layout>
</print>
<attributes>
<divisions>480</divisions>
<key>
<fifths>-1</fifths>
<mode>major</mode>
</key>
<time>
<beats>6</beats>
<beat-type>4</beat-type>
</time>
<clef>
<sign>G</sign>
<line>2</line>
</clef>
</attributes>
<note>
<pitch>
<step>C</step>
<octave>4</octave>
</pitch>
<duration>480</duration>
<voice>1</voice>
<type>quarter</type>
<stem>up</stem>



<lyric default-y="-155" number="1">
<syllabic>begin</syllabic>
<text>Re</text>
</lyric>
</note>
<backup>
<duration>480</duration>
</backup>
<note>
<pitch>
<step>B</step>
<alter>-1</alter>
<octave>3</octave>
</pitch>
<duration>480</duration>
<voice

In [223]:
#Tests and experiments with diff_match_patch
test_text1 = "The quik brovvnfoox"
test_text2 = "The quick brown fox"
patches_test = dmp.patch_make(test_text1,test_text2)
diff_test = dmp.diff_main(test_text1, test_text2)

for patch in patches_test:
    print(patch)
for diff in diff_test:
    print(diff)
dmp.diff_xIndex(diffs,m21_lyrics_list[1][1])
m21_lyrics_list[-1]



def convert_musicxml(input_file: str):
    """
    Convert a MusicXML file into a MuseScore file.

    :param input_file: The path (relative or absolute) of a MusicXML file.
    """
    output_file = input_file.replace('.musicxml', '.mscx')
    utils.mscore(['-o', output_file, input_file])
    os.remove(input_file)
convert_musicxml("ELW-629-melody-mod-test.musicxml")

@@ -4,16 +4,16 @@
  qui
+c
 k bro
-vvnfo
+wn f
 ox

(0, 'The qui')
(1, 'c')
(0, 'k bro')
(-1, 'vv')
(1, 'w')
(0, 'n')
(1, ' ')
(0, 'f')
(-1, 'o')
(0, 'ox')


In [None]:
holding_tag_treble = soup.new_tag("holding-tag-treble")

for n in range(1,verse_count+1):
    match_string = "[^" + str(n) + "]"
    treble_temp = copy.copy(treble_part)
    
    for lyric in treble_temp.find_all('lyric',{'number':re.compile(match_string)}):
        lyric.decompose()
    for lyric in treble_temp.find_all('lyric'):
        lyric['number']=1
    
    holding_tag_treble.append(treble_temp)
   
holding_tag_treble.find('part').unwrap()
holding_tag_treble.find('part').unwrap()
holding_tag_treble.find('part').unwrap()
#print(holding_tag)
treble_complete = holding_tag_treble.find('part').unwrap()
treble_complete.append(holding_tag_treble)
treble_complete.find('holding-tag-treble').unwrap()
print(holding_tag_treble)
print(treble_complete)


In [None]:
## SKIP
#treble_test = soup.find('part', {'id':'P1'})
#treble_test_soup = copy.copy(soup.find('part',{'id':'P1'}))

treble = soup.find('part', {'id':'P1'})
bass = soup.find('part', {'id':'P2'})

#treble_test_p_tag = treble_test_soup.find('part',{'id':'P1'}).unwrap()
treble_contents=[]
#print(treble_test_p_tag)
#treble_contents=treble_test_soup.find_all()

for tag in soup.find('part', {'id':'P1'}).find_all():
    snippet = tag.extract()
    treble_contents.append(snippet)
#bass_contents = bass.find()
for el in treble_contents:
    if el != "\n":


In [525]:
#SKIP
##holding_tag = soup.new_tag("holding-tag")
##holding_tag.append(treble_verses)

In [None]:
##SKIP
#treble_soup = copy.copy(soup)
#bass_soup = copy.copy(soup)

In [526]:
#SKIP
##print(holding_tag)

<holding-tag><part id="P1"><holding-tag><part id="P1">
<measure number="0">
<direction>
<direction-type>
<other-direction/>
</direction-type>
<sound tempo="120"/>
</direction>
<print>
<system-layout>
<system-margins>
<left-margin>30</left-margin>
<right-margin>0</right-margin>
</system-margins>
<top-system-distance>90</top-system-distance>
</system-layout>
<staff-layout number="1">
<staff-distance>50</staff-distance>
</staff-layout>
</print>
<attributes>
<divisions>480</divisions>
<key>
<fifths>-1</fifths>
<mode>major</mode>
</key>
<time>
<beats>6</beats>
<beat-type>4</beat-type>
</time>
<clef>
<sign>G</sign>
<line>2</line>
</clef>
</attributes>
<note>
<pitch>
<step>C</step>
<octave>4</octave>
</pitch>
<duration>480</duration>
<voice>1</voice>
<type>quarter</type>
<stem>up</stem>
<lyric default-y="-95" number="1">
<syllabic>single</syllabic>
<text>This</text>
</lyric>



</note>
<backup>
<duration>480</duration>
</backup>
<note>
<pitch>
<step>B</step>
<alter>-1</alter>
<octave>3</octav

In [183]:
#treble_part = treble_verses.find_all()
for lyric in treble_verses.find_all('lyric'):
    lyric['number']="1"

print(treble_verses)

AttributeError: ResultSet object has no attribute 'find_all'. You're probably treating a list of elements like a single element. Did you call find_all() when you meant to call find()?

In [13]:
p1_tag = parts[0].find('part')
p2_tag = parts[1].find('part')
#bass = parts[1].descendants()

#len(bass)

None


In [None]:
#slide_line = soup.new_tag('slide-line')
new_page_tags = soup.find_all('print', {'new-page':'yes'})
print(len(new_page_tags))
measures = soup.find_all('measure')
print(len(measures))

first_measures = [soup.find('measure')]
for tag in new_page_tags:
    first_measures.append(tag.parent)

def is_child_new_page(x):
    if isinstance(x, NavigableString):
        pass
    if isinstance(x, Tag):
        child_tag=x.findChild()
        False if child_tag.has_attr('new-page') else True

14
80


In [None]:
def newTag(tag, attrs={}, tstr=""):
    n = soup.new_tag(tag)
    if (len(attrs) > 0):
        for k, v in attrs.items():
            n[k] = v
    if (len(tstr) > 0):
        n.string = tstr
    return n

def wrapTag(newTag, fromTagInclusive, toTagExclusive):
    fromTagInclusive.wrap(newTag)
    #p(fromTagInclusive.parent, "fromTag.parent")
    n = fromTagInclusive.parent
    c = 0
    while 1:
        c += 1
        x = n.next_sibling
        if (x is None):
            break
        n.append(x)
        #print(c, x, n.next_sibling, isinstance(n.next_sibling, Tag), n.next_sibling.name if isinstance(n.next_sibling, Tag) else "~Tag", n.next_sibling.attrs if isinstance(n.next_sibling, Tag) else "~Tag")
        #if isinstance(n.next_sibling, Tag) and (n.next_sibling.name == 'p') and ('class' in n.next_sibling.attrs) and ('y' in n.next_sibling['class']):
        if (n.next_sibling == toTagExclusive):
            break
    return n, toTagExclusive

for i in range(0,len(first_measures)-1):
    n = newTag('slide-line')
    n, _ = wrapTag(n, first_measures[i], first_measures[i+1])
    #print(first_measures[i].attrs)
    #print("i=",i)
    #print(n.findNext('measure').attrs, "wrapped div" + "\n")


In [None]:
slides = soup.find_all('slide-line')
print(len(slides))
print(slides[0].find_all('lyric'))

14
[<lyric default-x="6.50" default-y="-86.26" number="1" relative-y="-30.00">
<syllabic>single</syllabic>
<text font-family="Nexa Text" font-weight="bold">This</text>
</lyric>, <lyric default-x="6.50" default-y="-86.26" number="1" relative-y="-30.00">
<syllabic>end</syllabic>
<text font-family="Nexa Text" font-weight="bold">is</text>
</lyric>, <lyric default-x="6.50" default-y="-86.26" number="1" relative-y="-30.00">
<syllabic>end</syllabic>
<text font-family="Nexa Text" font-weight="bold">the</text>
</lyric>, <lyric default-y="-86.26" number="1" relative-y="-30.00">
<syllabic>begin</syllabic>
<text font-family="Nexa Text" font-weight="bold">Spir</text>
</lyric>, <lyric default-x="6.50" default-y="-86.26" number="1" relative-y="-30.00">
<syllabic>end</syllabic>
<text font-family="Nexa Text" font-weight="bold">it’s</text>
</lyric>, <lyric default-x="6.50" default-y="-86.26" number="1" relative-y="-30.00">
<syllabic>begin</syllabic>
<text font-family="Nexa Text" font-weight="bold">en</t

In [None]:


slide_output=[]
ln = 1
for line in slides:
    #Clear lyrics list
    measures = []
    measure_widths=[]
    running_offset =int(float(0))
    measures = line.find_all('measure')
    print("***Line", ln, end = "***\n\n")
    

    
    i=0
    for measure in measures:
        measure_widths.append(measure['width'])
        #current_measure_width =int(float(measure['width'])
        if i==0:
            running_offset =int(float(0)) 
        else:
            running_offset +=int(float(measure_widths[i-1]))

        i+=1

        lyrics=[]
        lyrics = measure.find_all('lyric')

        for lyric in lyrics:
            try:
                if (note_x := lyric.parent['default-x']) != None:
                    print(note_x)
                    note_x =int(float(note_x))
            except KeyError:
                print("No default-x on note")
                note_x =int(float(0))
            try:
                if (lyric_x := lyric['default-x']) != None:
                    print(lyric_x)
                    lyric_x =int(float(lyric_x))
            except KeyError:
                print("No default-x on lyric")
                lyric_x =int(float(0))
            
            tab_offset = note_x + lyric_x
            print("Offset from left edge of measure: ", tab_offset)
            absolute_tab_stop = running_offset + tab_offset

            lyric_text = lyric.find('text').text
            print("\""+lyric_text + "\" at tab stop: ", absolute_tab_stop,"\n")
            #Add text and tab stop value to running lyrics list
            lyrics_line.append([lyric_text,absolute_tab_stop])

    #Add slide lyrics info to master slide list
    slide_output.append(lyrics_line)
    ln+=1

print(len(slide_output), "slides")
print("Slide", n := randint(0,len(slide_output),), "lyric", l:= randint(0,len(slide_output[n])), ":", slide_output[n][l][0])


    
    

***Line 1***

82.98
6.50
Offset from left edge of measure:  88
"This" at tab stop:  88 

20.89
6.50
Offset from left edge of measure:  26
"is" at tab stop:  201 

157.14
6.50
Offset from left edge of measure:  163
"the" at tab stop:  338 

242.29
No default-x on lyric
Offset from left edge of measure:  242
"Spir" at tab stop:  417 

412.60
6.50
Offset from left edge of measure:  418
"it’s" at tab stop:  593 

29.29
6.50
Offset from left edge of measure:  35
"en" at tab stop:  709 

172.32
6.50
Offset from left edge of measure:  178
"try" at tab stop:  852 

261.72
22.15
Offset from left edge of measure:  283
"now:" at tab stop:  957 

440.52
6.50
Offset from left edge of measure:  446
"the" at tab stop:  1120 

33.80
6.50
Offset from left edge of measure:  39
"wa" at tab stop:  1244 

171.67
6.50
Offset from left edge of measure:  177
"ter" at tab stop:  1382 

257.84
17.26
Offset from left edge of measure:  274
"and" at tab stop:  1479 

430.18
6.50
Offset from left edge of measure:  