In [5]:
from tenacity import retry, stop_after_attempt, wait_random_exponential, retry_if_exception_type
from langchain.llms import OpenAI
from langchain import HuggingFaceHub,LLMChain
from langchain.prompts import PromptTemplate
from langchain.output_parsers import CommaSeparatedListOutputParser
import pandas as pd
from collections import Counter
from io import StringIO
import streamlit as st
import re
import os
import openai
import ast
import json
from langchain.output_parsers import PydanticOutputParser
from langchain.pydantic_v1 import BaseModel, Field, validator
# Decorator for automatic retry requests
@retry(
    retry = retry_if_exception_type((openai.APIError, openai.APIConnectionError,  openai.Timeout, ValueError, SyntaxError,KeyError)),
    # Function to add random exponential backoff to a request
    wait = wait_random_exponential(multiplier = 1, max = 60),
    stop = stop_after_attempt(10)
)
def run_llm_chain(hub_chain,user_input,parser):    
    output =hub_chain.run(input=user_input)        
    parsed_result = parser.parse(output)                   
    return parsed_result

In [6]:
from pydantic import BaseModel, Field, conlist
from typing import List, Optional, Tuple
class OutputResult(BaseModel):
    key: conlist(str, min_length=3, max_length=5) = Field(description="The key with the story parameters. Must contain between 3 and 5 parameters")
    story:str = Field(description="The generated story for the given key")


In [7]:
from langchain.llms import OpenAI
from langchain import HuggingFaceHub,LLMChain
from langchain.prompts import PromptTemplate
from langchain.output_parsers import CommaSeparatedListOutputParser
import pandas as pd
from collections import Counter
from io import StringIO
import streamlit as st
import re
import os
import openai
import ast
import json
from langchain.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field, validator


def createDataset(iterations, key, story_type, instructions,style) -> pd.DataFrame:
    import os
    import openai
    import ast
    from langchain.chat_models import ChatOpenAI
# initialize the models
    openai.api_key = os.getenv("OPENAI_API_KEY")
    openai = ChatOpenAI(             
        model_name="gpt-3.5-turbo",
        openai_api_key=openai.api_key,
        temperature=1.5
    )  

    parser = PydanticOutputParser(pydantic_object=OutputResult)


    template = """You are a helpful assistant great in story telling. You are very diverse and creative. You need to generate a dataset where the key would be generated values string representing the story parameters according to the user given instructions, and the value will be a story written given this key. Transform the output into structured object given those instructions: {format_instructions} 
    User: {input}\n
    AI: """


    # now create the few shot prompt template
    prompt_template = PromptTemplate(
        template=template,
        input_variables=["input"],        
        partial_variables={"format_instructions": parser.get_format_instructions()},    
    )

    f_prompt = "Generate a tuple with first part a key built like this [{key}], and the value in the tuple will be an entire {type} of maximum 100 words .{instructions}. {style}"
    user_input = f_prompt.format(key=key, type=story_type,instructions = instructions,style=style)
    df = pd.DataFrame()
    for i in range(iterations):
            hub_chain = LLMChain(prompt=prompt_template,llm=openai,verbose=True)              
            parsed_result  = run_llm_chain(hub_chain,user_input,parser)                          
            # # Extract the first and second elements as strings
            try:
                
                print(parsed_result)
                print(f"""
                    key: {", ".join(parsed_result.key) if parsed_result.key else 'Not specified'}
                    story: {parsed_result.story if parsed_result.story else 'Not specified'}
                """)
            except Exception as e:
                print(e)
                continue
           
            first_string = ", ".join(parsed_result.key) if parsed_result.key else 'Not specified'
            print('first string:',first_string)

            second_string = parsed_result.story if parsed_result.story else 'Not specified'
            print('second string:',second_string)      
            
            

            # Access and print the key-value pairs
            
            new_row = {
            'keywords':key, 
            'story_type':story_type, 
            'instructions':instructions,         
            'generated_key':first_string,
            'generated_value': second_string
            }
            new_row = pd.DataFrame([new_row])
            df = pd.concat([df, new_row], axis=0, ignore_index=True)
    
    return df

In [8]:
another_df = createDataset(50, 'magical character, location, adventure', 'fairytale story','detailed description of the specified adventure of our magical character taking place in the given location','be very diverse and creative in the charachters and locations which you generate')



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mYou are a helpful assistant great in story telling. You are very diverse and creative. You need to generate a dataset where the key would be generated values string representing the story parameters according to the user given instructions, and the value will be a story written given this key. Transform the output into structured object given those instructions: The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"key": {"description": "The key with the story parameters. Must contain between 3 and 5

In [9]:
another_df

Unnamed: 0,keywords,story_type,instructions,generated_key,generated_value
0,"magical character, location, adventure",fairytale story,detailed description of the specified adventur...,"unicorn, enchanted forest, rescue mission, spe...","Once upon a time, in the mystical enchanted fo..."
1,"magical character, location, adventure",fairytale story,detailed description of the specified adventur...,"Fairy princess, Enchanted forest, Seeking the ...","In the heart of the Enchanted forest, the Fair..."
2,"magical character, location, adventure",fairytale story,detailed description of the specified adventur...,"Pixie, Enchanted Forest, Mystical Quest","In the heart of the Enchanted Forest, a mischi..."
3,"magical character, location, adventure",fairytale story,detailed description of the specified adventur...,"Humble elf, Enchanted forest, Epic journey",Once upon a time in the mysterious realm of th...
4,"magical character, location, adventure",fairytale story,detailed description of the specified adventur...,"dragons, enchanted forest, treasure hunt",In a magical land where dragons soared through...
5,"magical character, location, adventure",fairytale story,detailed description of the specified adventur...,"mermaid, enchanted forest, hide and seek with ...","Once upon a time in an enchanted forest, a pla..."
6,"magical character, location, adventure",fairytale story,detailed description of the specified adventur...,"Enchanting Wizard, Crystal Cavern, Epic Expedi...","Once upon a time, the Enchanting Wizard embark..."
7,"magical character, location, adventure",fairytale story,detailed description of the specified adventur...,"elf, enchanted forest, ancient curse, jewelry","Once upon a time in the enchanted forest, the ..."
8,"magical character, location, adventure",fairytale story,detailed description of the specified adventur...,"enchanting unicorn, enchanted forest, quest","In the depths of the enchanted forest, the enc..."
9,"magical character, location, adventure",fairytale story,detailed description of the specified adventur...,"magical character, location, adventure","Once upon a time, in a mystical land called [l..."


In [10]:
selected_columns_df = another_df[['generated_key', 'generated_value']]

# Rename the columns to 'parameters' and 'text'
selected_columns_df.columns = ['parameters', 'text']

# Define the path where you want to save the CSV file
csv_file_path = '../../datasets/evaluation/elaborate_fairytale.csv'

# Save the selected columns with custom column names to a CSV file
selected_columns_df.to_csv(csv_file_path, index=False)

print(f"Selected columns saved to {csv_file_path}")

Selected columns saved to ../../datasets/evaluation/elaborate_fairytale.csv
