In [None]:
from tenacity import retry, stop_after_attempt, wait_random_exponential, retry_if_exception_type
from langchain.llms import OpenAI
from langchain import HuggingFaceHub,LLMChain
from dotenv import load_dotenv
from langchain.prompts import PromptTemplate
from langchain import FewShotPromptTemplate
from langchain.output_parsers import CommaSeparatedListOutputParser
import pandas as pd
from collections import Counter
from io import StringIO
import streamlit as st
import re
import os
import openai
import ast
import json
from langchain.output_parsers import PydanticOutputParser
from langchain.prompts import PromptTemplate
from langchain.pydantic_v1 import BaseModel, Field, validator
# Decorator for automatic retry requests
@retry(
    retry = retry_if_exception_type((openai.error.APIError, openai.error.APIConnectionError, openai.error.RateLimitError, openai.error.ServiceUnavailableError, openai.error.Timeout)),
    # Function to add random exponential backoff to a request
    wait = wait_random_exponential(multiplier = 1, max = 60),
    stop = stop_after_attempt(10)
)
def run_llm_chain(hub_chain,user_input):
    output =hub_chain.run(input=user_input)
    print(output)     
    return output

In [None]:
from pydantic import BaseModel, Field, conlist
from typing import List, Optional, Tuple
class OutputResult(BaseModel):
    key: conlist(str, min_length=3, max_length=5) = Field(description="The key with the story parameters. Must contain between 3 and 5 parameters")
    story:str = Field(description="The generated story for the given key")


In [None]:
from pydantic import BaseModel, Field, conlist
from typing import List, Optional, Tuple
class InstructionResult(BaseModel):
    key: conlist(str, min_length=3, max_length=5) = Field(description="The key with the story parameters. Must contain between 3 and 5 parameters")
    instruction:str = Field(description="Instruction on how to generate the story given the key")

In [None]:
from langchain.llms import OpenAI
from langchain import HuggingFaceHub,LLMChain
from dotenv import load_dotenv
from langchain.prompts import PromptTemplate
from langchain import FewShotPromptTemplate
from langchain.output_parsers import CommaSeparatedListOutputParser
import pandas as pd
from collections import Counter
from io import StringIO
import streamlit as st
import re
import os
import openai
import ast
import json
from langchain.output_parsers import PydanticOutputParser
from langchain.prompts import PromptTemplate
from pydantic import BaseModel, Field, validator


def createDataset(iterations, key, story_type, instructions,style) -> pd.DataFrame:
    import os
    import openai
    import ast
    from langchain.chat_models import ChatOpenAI
# initialize the models
    openai.api_key = os.getenv("OPENAI_API_KEY")
    openai = ChatOpenAI(             
        model_name="gpt-3.5-turbo",
        openai_api_key=openai.api_key,
        temperature=1.5
    )  

    
    examples = [          
            {
                "input": "Generate a tuple with first part a key built like this [age,gender, superpower], and the value in the tuple will be an entire story of maximum 100 words with detailed description for a super-hero with the given age, of the given gender and with the given superpower ",
                "output": OutputResult.model_validate({
                    "key": ["18", "man", "invisibility"],
                    "story": "A 18 year old man, tall with a strong yet athletic build. Noir eyes and light brown hair that seems to be a reflection of the warmth of his personality. His superpower of invisibility make him silent, introspective and observant. He knows when to be seen and when to remain invisible in the background; like a silent guardian protecting those around him. With a strong sense of justice and power, he is an invaluable asset to those he holds near and dear. His kind and compassionate spirit give him an aura of protectiveness, making him a person of strength and courage in difficult moments."            
                     }).model_dump_json().replace("{", "{{").replace("}", "}}"),
            },
            {
                "input": "Generate a tuple with first part a key built like this [product ,theme, details], and the value in the tuple will be a gingle of maximum 100 words with commercial for the given product, in the given theme incorporating the provided details.",
                "output": OutputResult.model_validate({
                    "key": ["Whiskers", "happy", "cat food-holiday season price reductions-great for your cat"],
                    "story": "We are so happy to announce holiday discounts for the best cat food outhere! For happy and healthy cat choose Whiskers! Meow!"            
                     }).model_dump_json().replace("{", "{{").replace("}", "}}"),                
            },
             {
                "input": "Generate a tuple with first part a key built like this [fictional character ,location, adventure], and the value in the tuple will be a story of maximum 100 words describing an adventure of the given fictional character in the provided location.",
                "output": OutputResult.model_validate({
                    "key": ["Baba Yaga", "Asia", "getting no respect"],
                    "story": "Once upon a time Baba Yaga wondered far far away from her home and ended up in remote Hokkaido island. She was used to locals showing her great respect out of fear and also because she was always one of the pillars of Slavic culture. But in Hokkaido the locals knew nothing about her, and she was very disappointed because they have shown her no respect. Eventually she decided there is no place like home and went back"            
                     }).model_dump_json().replace("{", "{{").replace("}", "}}"),     
            },
        ]


    

    # create a example template
    example_template = """
        User: {input}
        AI: {output}
    """
    # create a prompt example from above template
    example_prompt = PromptTemplate(
        input_variables=["input", "output"],
        template=example_template
    )

    parser = PydanticOutputParser(pydantic_object=OutputResult)

    # now break our previous prompt into a prefix and suffix
    # the prefix is our instructions    
    prefix = """You are a helpful assistant great in story telling. You are very diverse and creative. You need to generate a dataset where the key would be generated values string representing the story parameters according to the user given instructions, and the value will be a story written given this key. Transform the output into structured object given those instructions: {format_instructions} Here are a few examples on how to generate the content of the dataset:
    """

    # and the suffix our user input and output indicator
    suffix = """
    User: {input}
    AI:"""


    # now create the few shot prompt template
    few_shot_prompt_template = FewShotPromptTemplate(
        examples=examples,
        example_prompt=example_prompt,
        prefix=prefix,
        suffix=suffix,
        input_variables=["input"],
        example_separator="\n\n",
        partial_variables={"format_instructions": parser.get_format_instructions()},    
    )

    f_prompt = "Generate a tuple with first part a key built like this [{key}], and the value in the tuple will be an entire {type} of maximum 100 words .{instructions}. {style}"
    user_input = f_prompt.format(key=key, type=story_type,instructions = instructions, style=style)
    df = pd.DataFrame()
    for i in range(iterations):
            hub_chain = LLMChain(prompt=few_shot_prompt_template,llm=openai,verbose=True)              
            output  = run_llm_chain(hub_chain,user_input)                          
            # # Extract the first and second elements as strings
            try:
                parsed_result = parser.parse(output)
                print(parsed_result)
                print(f"""
                    key: {", ".join(parsed_result.key) if parsed_result.key else 'Not specified'}
                    story: {parsed_result.story if parsed_result.story else 'Not specified'}
                """)
            except Exception as e:
                print(e)
                continue
           
            first_string = ", ".join(parsed_result.key) if parsed_result.key else 'Not specified'
            print('first string:',first_string)

            second_string = parsed_result.story if parsed_result.story else 'Not specified'
            print('second string:',second_string)      
            
            

            # Access and print the key-value pairs
            
            new_row = {
            'keywords':key, 
            'story_type':story_type, 
            'instructions':instructions,         
            'generated_key':first_string,
            'generated_value': second_string
            }
            new_row = pd.DataFrame([new_row])
            df = pd.concat([df, new_row], axis=0, ignore_index=True)
    
    return df

In [None]:
def createInstructions(genre):
    import os
    import openai
    import ast
    from langchain.chat_models import ChatOpenAI
# initialize the models
    openai.api_key = os.getenv("OPENAI_API_KEY")
    openai = ChatOpenAI(             
        model_name="gpt-3.5-turbo",
        openai_api_key=openai.api_key,
        temperature=1.5
    )  

    
    examples = [          
            {
                "input": "Please specify three parameters for sci-fi story, and provide instruction how to generate a story based on those parameters",
                "output": InstructionResult.model_validate({
                    "key": ["hero name", "location the story takes place in", "general outline of the hero's adventure"],
                    "instruction": "please generate a sci-fi story, given the hero name, which takes place at the specified location and adheres to the general outline of the hero's adventure ."            
                     }).model_dump_json().replace("{", "{{").replace("}", "}}"),
            },
            {
                "input": "Please specify three parameters for food recipe, and provide instruction how to generate a recipe based on those parameters.",
                "output": InstructionResult.model_validate({
                    "key": ["name of raw material", "cooking type (fried, baked etc)", "kitchen style (Asian, European etc)"],
                    "instruction": "please generate a recipe, given the raw material and the type of cooking, the recipe should belong to the specified kitchen style."            
                     }).model_dump_json().replace("{", "{{").replace("}", "}}"),                
            },
             {
                "input": "Please specify three parameters for a strength workout, and provide instruction how to generate a workout based on those parameters.",
                "output": InstructionResult.model_validate({
                    "key": ["name of the muscle group (upper, lower, core)", "name of type of exercise (super sets, tri sets, isometric, eccentric)","weight"],
                    "instruction": "Given the specified muscle group build a workout for this muscle group of the specified exercise type and with the given weights."            
                     }).model_dump_json().replace("{", "{{").replace("}", "}}"),     
            },
        ]


    

    # create a example template
    example_template = """
        User: {input}
        AI: {output}
    """
    # create a prompt example from above template
    example_prompt = PromptTemplate(
        input_variables=["input", "output"],
        template=example_template
    )

    parser = PydanticOutputParser(pydantic_object=InstructionResult)

    # now break our previous prompt into a prefix and suffix
    # the prefix is our instructions    
    prefix = """You are a helpful story generation assistant. Given a story domain, please generate three parameters which could be used to generate distinct stories for this domain, along with instruction on how to use the parameters to generate the story. Transform the output into structured object given those instructions: {format_instructions} Here are a few examples on how to generate the content of the dataset:
    """

    # and the suffix our user input and output indicator
    suffix = """
    User: {input}
    AI:"""


    # now create the few shot prompt template
    few_shot_prompt_template = FewShotPromptTemplate(
        examples=examples,
        example_prompt=example_prompt,
        prefix=prefix,
        suffix=suffix,
        input_variables=["input"],
        example_separator="\n\n",
        partial_variables={"format_instructions": parser.get_format_instructions()},    
    )

    f_prompt = "Please specify three parameters for a {genre} , and provide instruction how to generate a {genre} based on those parameters "
    user_input = f_prompt.format(genre=genre)    
    
    hub_chain = LLMChain(prompt=few_shot_prompt_template,llm=openai,verbose=True)              
    output  = run_llm_chain(hub_chain,user_input)                          
    # Extract the first and second elements as strings
    try:
        parsed_result = parser.parse(output)
        print(parsed_result)
        print(f"""
            key: {", ".join(parsed_result.key) if parsed_result.key else 'Not specified'}
            instruction: {parsed_result.instruction if parsed_result.instruction else 'Not specified'}
        """)
    except Exception as e:
        print(e)
        
           
    key = ", ".join(parsed_result.key) if parsed_result.key else 'Not specified'
    print('key:',key)

    instruction = parsed_result.instruction if parsed_result.instruction else 'Not specified'
    print('instruction:',instruction)      
            
    return key,instruction

In [None]:
key,instruction = createInstructions('fairytale story')
print('key: ',key,' ,instructions: ',instruction)

key,instruction = createInstructions('commercial jingle')
print('key: ',key,' ,instructions: ',instruction)

key,instruction = createInstructions('news item')
print('key: ',key,' ,instructions: ',instruction)


In [None]:
df = createDataset(10, 'news agency, location, news item', 'news item','The news item will be a report of the given news agency regarding the provided location and revolving around the news item which takes place in that location','Be very precise and stick to the facts with the key parameters values.')

In [None]:
def generateDataset(genre,size, style) ->pd.DataFrame:
    key,instruction = createInstructions(genre)
    df = createDataset(size,key,genre,instruction,style)
    return df
    

In [None]:
newsItems_df = generateDataset('news item',10,'Be very precise and stick to the facts with the key parameters values.')

In [None]:
newsItems_df

In [None]:
df

In [None]:
another_df = createDataset(10, 'muscle group, training type, training weight', 'a workout plan','The workout plan will be for the specified muscle group with the given training type (such as supersets, isometric workout, unilateral workout, eccentric workout etc.) and the given weight','Be very precise and stick to the facts with the key parameters values.')

In [None]:
another_df = createDataset(10, 'fictional protagonist, fictional realm, adventure outline', 'a sci-fi story','a sci-fi story about the fictional protagonist , taking place in the described realm and based on the given adventure outline','Be very creative and diverse with the key parameters values, they should not be straightforward but imaginative and diverse')

In [None]:
gingle_df = createDataset(10, 'product name, catchy slogan, style (lyrical, satirical, funny)' , 'commercial jingle','commercial jingle using the specified product name and a catchy slogan. The style of the jingle should align with the given preference to create a catchy jingle to promote the product.','Be very creative and diverse with the key parameters values, they should not be straightforward but imaginative and diverse')

In [None]:
gingle_df

In [None]:
gingle_df.to_csv("../datasets/jingle_generated_dataset.csv")

In [None]:
another_df = createDataset(5000, 'magical character, location, adventure', 'fairytale story','detailed description of the specified adventure of our magical character taking place in the given location')

In [None]:
another_df

In [None]:
another_df.to_csv("../datasets/workout_generated_dataset.csv")

### Invocation of the model only with prompt engineering, no fine-tuning

In [None]:
# This code is for v1 of the openai package: pypi.org/project/openai
#from openai import OpenAI

#client = OpenAI()
openai.api_key = os.getenv("OPENAI_API_KEY")

f_prompt = "Given the following parameters: news agency: {news_agency}, location: {location} and news item: {news_item}, generate an elaborate report of that news agency revolving around the provided news item in the provided location. The report should be maximum 100 words "
f_sub_prompt = "{news_agency},  {location}, {news_item}"

df = pd.read_csv("generated_dataset.csv")
prepared_data = df.loc[:,['generated_key','generated_value']]

new_df = pd.DataFrame()
for index, row in prepared_data.head(10).iterrows():
  key = row['generated_key']  
  # read the next line from the csv
  array_of_strings = key.split(", ")
  news_agency = array_of_strings[0]
  location = array_of_strings[1]
  news_item = array_of_strings[2]

  prompt = f_prompt.format(news_agency=news_agency, location=location, news_item=news_item)
  sub_prompt = f_sub_prompt.format(news_agency=news_agency, location=location, news_item=news_item)
  print(sub_prompt)
  print(prompt)
  #response = client.completions.create(
  response = openai.Completion.create(
    model="text-davinci-003",
    prompt=prompt,
    temperature=1,
    max_tokens=256,
    top_p=1,
    frequency_penalty=0,
    presence_penalty=0,
    stop=["END"]
  )
    
  finish_reason = response['choices'][0]['finish_reason']
  response_txt = response['choices'][0]['text']
    
  new_row = {
      'news_agency':news_agency, 
      'location':location, 
      'news_item':news_item, 
      'prompt':prompt, 
      'sub_prompt':sub_prompt, 
      'response_txt':response_txt, 
      'finish_reason':finish_reason}
  new_row = pd.DataFrame([new_row])
  new_df = pd.concat([new_df, new_row], axis=0, ignore_index=True)

new_df.to_csv("de_vinci_prompt_eng.csv")

### Fine tuning the model with the dataset

In [None]:
import pandas as pd
import openai
import subprocess

df = pd.read_csv("generated_dataset.csv")

prepared_data = df.loc[:,['generated_key','generated_value']]
prepared_data.rename(columns={'generated_key':'prompt', 'generated_value':'completion'}, inplace=True)
prepared_data.to_csv('prepared_data.csv',index=False)


## prepared_data.csv --> prepared_data_prepared.json
subprocess.run('openai tools fine_tunes.prepare_data --file prepared_data.csv --quiet'.split())

## Start fine-tuning
subprocess.run('openai api fine_tunes.create --training_file prepared_data_prepared.jsonl --model davinci --suffix "StoryGenerationThemed"'.split())

### Invocation of the fine-tuned model

In [None]:
# This code is for v1 of the openai package: pypi.org/project/openai
#from openai import OpenAI

#client = OpenAI()
openai.api_key = os.getenv("OPENAI_API_KEY")

f_prompt = "{news_agency}, {location}, {news_item}"
news_agency_input = "BBC"
location_input = "Melburn"
news_item_input = "shark attack"
new_df = pd.DataFrame()
prompt = f_prompt.format(news_agency=news_agency_input, location=location_input, news_item=news_item_input)
print(prompt)

response = openai.Completion.create(
    model="davinci:ft-personal:storygenerationthemed-2023-12-04-14-10-42",
    prompt=prompt,
    temperature=1,
    max_tokens=256,
    top_p=1,
    frequency_penalty=0,
    presence_penalty=0,
    stop=["END"]
)
    
finish_reason = response['choices'][0]['finish_reason']
response_txt = response['choices'][0]['text']
print(response_txt)    
