In [39]:
from pydantic import BaseModel, parse_obj_as, Field, constr
from lmformatenforcer import JsonSchemaParser
from lmformatenforcer.integrations.transformers import build_transformers_prefix_allowed_tokens_fn
from transformers import pipeline
import json

"""
NOTE: IF WANT TO CHANGE MUSIC PROMPT LOOK FOR **MUSIC_PROMPT** IN THE CODE
Short term attributes and long term attributes are captured for every text chunk passed in
flush attribute empties stored info and long_term_prompt
long term attributes is collected from the first text chunk passed in and is used as a prefix for all the short term attributes
short term attributes used in the prompt for a given chunk
"""

'\nNOTE: IF WANT TO CHANGE MUSIC PROMPT LOOK FOR **MUSIC_PROMPT** IN THE CODE\nShort term attributes and long term attributes are captured for every text chunk passed in\nflush attribute empties stored info and long_term_prompt\nlong term attributes is collected from the first text chunk passed in and is used as a prefix for all the short term attributes\nshort term attributes used in the prompt for a given chunk\n'

In [40]:



class DetailedInfo():
    def __init__(self, text, info, prompt=None): #, info, prompt, duration=None, timestamps=None):
        if isinstance(text, dict):
            self.text = text['text']
            self.duration = text['duration']
            self.timestamps = text['timestamp']
        else:
            self.text = text
            self.duration = None
            self.timestamps = None
        # self.info = info
        self.short_term = info.short_term if info is not None else None
        self.long_term = info.long_term if info is not None else None
        self.prompt = prompt

    def __call__(self):
        return self.prompt
    
    def to_dict(self):
        return {
            "text": self.text,
            "duration": self.duration,
            "timestamps": self.timestamps,
            "short_term": self.short_term.__dict__ if self.short_term is not None else None,
            "long_term": self.long_term.__dict__ if self.long_term is not None else None,
            "prompt": self.prompt
        }
       



In [41]:
class MusicGenInfo(BaseModel):
    class ShortTermAttributes(BaseModel):
        """
        Attributes that are collected for every text chunk passed in
        Note: the variable name influences what the LLM attempts to extract
        """
        tone: str
        intensity: str
        is_crescendo: bool
        volume: str
        
        def __str__(self):
            # **MUSIC_PROMPT**
            # formatting for the short term attribute prompt
            ret = [f"{self.tone} tone",
                   f"{self.intensity} intensity",
                   f"and {self.volume} volume",
                #    "with a crescendo" if self.is_crescendo else "with no crescendo"
                   ]
            ret = " ".join(ret)
            return ret
        
    class LongTermAttributes(BaseModel):
        """
        Attributes that are collected from the first text chunk passed in and is used as a prefix for all the short term attributes
        Technically these are collected for all chunks, but only the first chunk's attributes are used in prompts
        Note: the variable name influences what the LLM attempts to extract
        """
        instrumentation: str
        # short_ambient_music_setting: str
        # short_music_genre_setting: str
        # short_music_setting: str
        short_background_ambient_setting: str
        short_music_descriptors: str
        pitch: str
        beat: str
        is_major_key: bool
            
    
        def __str__(self):
            # **MUSIC_PROMPT**
            # Formatting for the long term attribute prefix
            ret = [
            # f"Ambient music with",
            # f"{self.short_music_setting} ambient music setting",
            f"{self.short_background_ambient_setting} ambient music setting",
            f"{self.instrumentation} instrumentals",
            # f"({self.short_music_descriptors})",
            # f"{self.pitch} pitch",
            # f"{self.beat} beat",
            "in a major key." if self.is_major_key else "in a minor key."
            ]
            ret = " ".join(ret)
            return ret
    short_term: ShortTermAttributes
    long_term: LongTermAttributes

In [42]:
class LLMPromptGenerator:
    def __init__(self, model_name="TheBloke/Llama-2-7b-Chat-GPTQ"): # device="cpu"
        self.model_name = model_name
        self.hf_pipeline = pipeline('text-generation', model=model_name, device_map='auto') # device=device)
        self.info = []
        self.long_term_prompt = None
        
    def generate_llm_prompt(self, text, music_gen_info=MusicGenInfo, prompt=None):
        # NOTE: Prompt expects two input locations for formatting purposes.
        if isinstance(text, list):
            return [self.generate_llm_prompt(t, music_gen_info, prompt=prompt) for t in text]
        
        if prompt is None:
            prompt = """
        {}\n\n In the following format {}, A piece music generated as background ambience for the above text would have these qualities: \n
        """
        prompt = prompt.format(text, music_gen_info.schema_json())
        
        
        # prompt = f"""
        # {text}\n\n In the following format {music_gen_info.schema_json()}, A piece music generated as background ambience for the above text would have these qualities: \n
        # """
        return prompt
    
    def extract_info(self, text, music_gen_info=MusicGenInfo, prompt=None, flush=False):
        ret_first = False
        
        if not isinstance(text, list):
            text = [text]
            ret_first = True
        if len(text) == 0:
            return
        
        text_raw = text
        # durations = [None for t in text]
        # timestamps = [None for t in text]
        
        if isinstance(text[0], dict):
            text_raw = [t['text'] for t in text]
            # durations = [t['duration'] for t in text]
            # timestamps = [t['timestamp'] for t in text]
        
        
        llm_prompts = self.generate_llm_prompt(text_raw, music_gen_info, prompt=prompt)
        parser = JsonSchemaParser(music_gen_info.schema())
        prefix_function = build_transformers_prefix_allowed_tokens_fn(self.hf_pipeline.tokenizer, parser)
        output_dict = self.hf_pipeline(llm_prompts, prefix_allowed_tokens_fn=prefix_function)
        
        if flush: # Empties the info list and long_term_prompt
            self.info = []
            self.long_term_prompt = None
            
        results = []
        # if isinstance(text, list):
        for i, p in enumerate(llm_prompts):
            results.append(json.loads(output_dict[i][0]['generated_text'][len(p):]))
            
        # info_formatted = parse_obj_as(list[MusicGenInfo], [r for r in results])
        info_formatted = parse_obj_as(list[music_gen_info], [r for r in results])
        
        info_detailed = [DetailedInfo(t, i) for t, i in zip(text, info_formatted)]
        
        self.info.extend(info_detailed)
        
        if ret_first:
            return results[0]
        
        return results
        # result = json.loads(output_dict[0]['generated_text'][len(llm_prompts):])
        # self.info.append(parse_obj_as(MusicGenInfo, result))
        # return result

    def prompts(self):
        if self.long_term_prompt is None:
            self.long_term_prompt = str(self.info[0].long_term)
        
        prompts = []
        for i in self.info:
            # **MUSIC_PROMPT**
            # Combining the long and short term portions
            new_prompt = f"{self.long_term_prompt} {str(i.short_term)}"
            i.prompt = new_prompt
            prompts.append(new_prompt)
        
        
        return prompts, self.info

In [43]:
# texts = ['Harry looked down in time to see a tiny, wrinkled, newborn bird poke its head out of the ashes. It was quite as ugly as the old one. "\'It\'s a shame you had to see him on a burning day,\' said Dumbledore, seating himself behind his desk. "\'He\'s really very handsome most of the time, wonderful red and gold plumage—facinating creatures, Phoenixes.', 
#          ' They can carry immensely heavy loads, their tears have healing powers, and they make highly faithful pets. In the shock of forks catching fire, Harry had forgotten what he was there for, but it all came back to him as Dumbledore settled himself in the high-backed chair behind the desk, and fixed Harry with his penetrating light blue stare.']

chunks = [
    {'timestamp': [0.0, 24.52], 
    'text': ' Harry looked down in time to see a tiny, wrinkled, newborn bird poke its head out of the ashes. It was quite as ugly as the old one. "\'It\'s a shame you had to see him on a burning day,\' said Dumbledore, seating himself behind his desk. "\'He\'s really very handsome most of the time, wonderful red and gold plumage—facinating creatures, Phoenixes.', 
    'duration': 24.52}, 
    {'timestamp': [24.52, 47.64], 
    'text': ' They can carry immensely heavy loads, their tears have healing powers, and they make highly faithful pets. In the shock of forks catching fire, Harry had forgotten what he was there for, but it all came back to him as Dumbledore settled himself in the high-backed chair behind the desk, and fixed Harry with his penetrating light blue stare.', 
    'duration': 23.12
    }
    ]


flush_extractor = True 

extractor = LLMPromptGenerator()


In [50]:
category_list = ["Dazzling",
"Dark",
"Awe-inspiring",
"Creepy",
"Complicated",
"Amusing",
"Addictive",
"Witty",
"Unique",
"Unexpected",
"Gripping",
"Astonishing"]
category_list = '|'.join(category_list)


genres = ["Fantasy",
"Adventure",
"Romance",
"Contemporary",
"Dystopian",
"Mystery",
"Horror",
"Thriller",
"Paranormal",
"Historical fiction",
"Science Fiction",
"Childrens"]
genres = '|'.join(genres)


feelings = ["love", "joy", "surprise", "anger", "sadness", "fear"]
feelings = '|'.join(feelings)


class MusicGenInfoModded(BaseModel):
    class ShortTermAttributes(BaseModel):
        """
        Attributes that are collected for every text chunk passed in
        Note: the variable name influences what the LLM attempts to extract
        """
        # tone: str
        # intensity: str
        # is_crescendo: bool
        # volume: str
        
        # genre: str
        one_sentence_description: str
        music_setting: str
        feeling: constr(pattern=feelings)
        is_setting_described: bool
        setting_description: str
        
        def __str__(self):
            # **MUSIC_PROMPT**
            # formatting for the short term attribute prompt
            # ret = [f"{self.tone} tone",
            #        f"{self.intensity} intensity",
            #        f"and {self.volume} volume",
            #     #    "with a crescendo" if self.is_crescendo else "with no crescendo"
            #        ]
            # ret = " ".join(ret)
            ret = str(vars(self))
            
            # ret = [f'Invoking a feeling of {self.feeling}']
            # ret = " ".join(ret)
            
            return ret
        
    class LongTermAttributes(BaseModel):
        """
        Attributes that are collected from the first text chunk passed in and is used as a prefix for all the short term attributes
        Technically these are collected for all chunks, but only the first chunk's attributes are used in prompts
        Note: the variable name influences what the LLM attempts to extract
        """
        instruments: str
        # short_ambient_music_setting: str
        # short_music_genre_setting: str
        # short_music_setting: str
        # short_background_ambient_setting: str
        # short_music_descriptors: str
        # pitch: str
        # beat: str
        # is_major_key: bool
        book_genre: constr(pattern=genres)
        # sentiment: str
        music_type: str
        # categories: ['Dark', 'Dazzling', 'Other']
        # categories: constr(regex='^(Dark|Dazzling|Other)$')
        # categories: constr(pattern=r'(Dark|Dazzling|Other)')
        descriptor: constr(pattern=category_list)
        one_sentence_soundscape: str
        
        
        # is_mystical: bool
#         is_dazzling
# Dark
# Awe-inspiring
# Creepy
# Complicated
# Amusing
# Addictive
# Witty
# Unique
# Unexpected
# Gripping
# Astonishing
            
    
        def __str__(self):
            # **MUSIC_PROMPT**
            # Formatting for the long term attribute prefix
            # ret = [
            # # f"Ambient music with",
            # # f"{self.short_music_setting} ambient music setting",
            # f"{self.short_background_ambient_setting} ambient music setting",
            # f"{self.instrumentation} instrumentals",
            # # f"({self.short_music_descriptors})",
            # # f"{self.pitch} pitch",
            # # f"{self.beat} beat",
            # "in a major key." if self.is_major_key else "in a minor key."
            # ]
            # ret = " ".join(ret)
            
            ret = str(vars(self))
            # ret = [f'{self.descriptor}',
            #        f'{self.book_genre}',
            #        f'{self.music_type}'
            #     #    f'{self.instruments}']
            # ]
            # ret = " ".join(ret)
            
            
            return ret
    short_term: ShortTermAttributes
    long_term: LongTermAttributes

In [51]:
prompt = """
    {}\n\n In this format {}, describe this book as it relates to these categories. Be creative and use interesting words.
    """

In [52]:
music_gen_info = MusicGenInfoModded

In [53]:
extracted_json = extractor.extract_info(chunks, flush=flush_extractor, music_gen_info=music_gen_info, prompt=prompt)
# Generate Prompts
prompts, info = extractor.prompts()

/tmp/ipykernel_4102509/4118056173.py:59: PydanticDeprecatedSince20: `parse_obj_as` is deprecated. Use `pydantic.TypeAdapter.validate_python` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.6/migration/
  info_formatted = parse_obj_as(list[music_gen_info], [r for r in results])


In [54]:
for prompt in prompts:
    print(prompt)

{'instruments': 'flute', 'book_genre': 'Fantasy', 'music_type': 'Orchestral', 'descriptor': 'Dazzling', 'one_sentence_soundscape': 'A symphony of magic and wonder'} {'one_sentence_description': "The flute's melody soared through the forest, weaving a spell of enchantment around the listeners", 'music_setting': 'A clearing in a mystical forest', 'feeling': 'joy', 'is_setting_described': True, 'setting_description': 'The trees rustled with excitement as the music filled the air'}
{'instruments': 'flute', 'book_genre': 'Fantasy', 'music_type': 'Orchestral', 'descriptor': 'Dazzling', 'one_sentence_soundscape': 'A symphony of magic and wonder'} {'one_sentence_description': 'A young apprentice embarks on a quest to master their craft', 'music_setting': 'A mystical forest filled with ancient magic', 'feeling': 'joy', 'is_setting_described': True, 'setting_description': 'A place of ancient magic and wonder'}


In [55]:
# {'instruments': 'piano', 'book_genre': 'Fantasy', 'music_type': 'Orchestral', 'descriptor': 'Dazzling'} {'one_sentence_description': 'A young boy discovers he is the chosen one, destined to save the wizarding world from darkness', 'music_setting': 'In a dimly lit, misty forest', 'feeling': 'love'}
# {'instruments': 'piano', 'book_genre': 'Fantasy', 'music_type': 'Orchestral', 'descriptor': 'Dazzling'} {'one_sentence_description': 'A young boy discovers he can play the piano with incredible skill, transporting himself and those around him to magical worlds', 'music_setting': 'In a dimly lit, cozy room filled with the soft glow of candles and the rich scent of old books', 'feeling': 'joy'}