In [16]:
from tenacity import retry, stop_after_attempt, wait_random_exponential, retry_if_exception_type
from langchain.llms import OpenAI
from langchain import HuggingFaceHub,LLMChain
from langchain.prompts import PromptTemplate
from langchain import FewShotPromptTemplate
from langchain.output_parsers import CommaSeparatedListOutputParser
import pandas as pd
from collections import Counter
from io import StringIO
import streamlit as st
import re
import os
import openai
import ast
import json
from langchain.output_parsers import PydanticOutputParser
from langchain.prompts import PromptTemplate
from langchain.pydantic_v1 import BaseModel, Field, validator
#Decorator for automatic retry requests
@retry(
    retry = retry_if_exception_type((openai.APIError, openai.APIConnectionError,  openai.Timeout, ValueError, SyntaxError,KeyError)),
    # Function to add random exponential backoff to a request
    wait = wait_random_exponential(multiplier = 1, max = 60),
    stop = stop_after_attempt(10)
)
def run_llm_chain(hub_chain,user_input,parser):    
    output =hub_chain.run(query=user_input)        
    parsed_result = parser.parse(output)                   
    return parsed_result

In [17]:
from pydantic import BaseModel, Field, conlist
from typing import List, Optional, Tuple
class OutputResult(BaseModel):
    key: conlist(str, min_length=3, max_length=5) = Field(description="The key with the story parameters. Must contain between 3 and 5 parameters")
    story:str = Field(description="The generated story for the given key")


In [18]:
from langchain.llms import OpenAI
from langchain import HuggingFaceHub,LLMChain
from langchain.prompts import PromptTemplate
from langchain import FewShotPromptTemplate
from langchain.output_parsers import CommaSeparatedListOutputParser
import pandas as pd
from collections import Counter
from io import StringIO
import streamlit as st
import re
import os
import openai
import ast
import json
from langchain.output_parsers import PydanticOutputParser
from langchain.prompts import PromptTemplate
from pydantic import BaseModel, Field, validator


def createDataset(iterations, key, story_type, instructions) -> pd.DataFrame:
    import os
    import openai
    import ast
    from langchain.chat_models import ChatOpenAI
# initialize the models
    openai.api_key = os.getenv("OPENAI_API_KEY")
    openai = ChatOpenAI(             
        model_name="gpt-3.5-turbo",
        openai_api_key=openai.api_key,
        temperature=1.5
    )  
    
    parser = PydanticOutputParser(pydantic_object=OutputResult)
        
    prompt = PromptTemplate(
        template="Generate a tuple where the key is a string representing the story parameters according to the user given instructions, and the value will be a story written given this key. Transform the output into structured object given those instructions: {format_instructions}.\n Here are the user instructions: {query}\n",
        input_variables=["query"],
        partial_variables={"format_instructions": parser.get_format_instructions()},
    )

    


    f_prompt = "Generate a tuple with a key built like this [{key}], and the value in the tuple will be an entire {type} of maximum 100 words .{instructions}."
    user_input = f_prompt.format(key=key, type=story_type,instructions = instructions)
    df = pd.DataFrame()
    for i in range(iterations):
            hub_chain = LLMChain(prompt=prompt,llm=openai,verbose=True)              
            parsed_result  = run_llm_chain(hub_chain,user_input,parser)                          
            # # Extract the first and second elements as strings
            try:
                print(parsed_result)
                print(f"""
                    key: {", ".join(parsed_result.key) if parsed_result.key else 'Not specified'}
                    story: {parsed_result.story if parsed_result.story else 'Not specified'}
                """)
            except Exception as e:
                print(e)
                continue
           
            first_string = ", ".join(parsed_result.key) if parsed_result.key else 'Not specified'
            print('first string:',first_string)

            second_string = parsed_result.story if parsed_result.story else 'Not specified'
            print('second string:',second_string)      
            
            

            # Access and print the key-value pairs
            
            new_row = {
            'keywords':key, 
            'story_type':story_type, 
            'instructions':instructions,         
            'generated_key':first_string,
            'generated_value': second_string
            }
            new_row = pd.DataFrame([new_row])
            df = pd.concat([df, new_row], axis=0, ignore_index=True)
    
    return df

In [19]:
another_df = createDataset(50, 'magical character, location, adventure', 'fairytale story','detailed description of the specified adventure of the magical character taking place in the given location')



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mGenerate a tuple where the key is a string representing the story parameters according to the user given instructions, and the value will be a story written given this key. Transform the output into structured object given those instructions: The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"key": {"description": "The key with the story parameters. Must contain between 3 and 5 parameters", "items": {"type": "string"}, "maxItems": 5, "minItems": 3, "title": "Key", "type": "array"}, "story": {"desc

In [20]:
another_df

Unnamed: 0,keywords,story_type,instructions,generated_key,generated_value
0,"magical character, location, adventure",fairytale story,detailed description of the specified adventur...,"fairy, enchanted forest, rescue mission","Once upon a time in the enchanted forest, twin..."
1,"magical character, location, adventure",fairytale story,detailed description of the specified adventur...,"magical character, location, adventure","Once upon a time, a magical character named {{..."
2,"magical character, location, adventure",fairytale story,detailed description of the specified adventur...,"Brave Unicorn, Enchanted Forest, Quest","In the heart of the Enchanted Forest, the Brav..."
3,"magical character, location, adventure",fairytale story,detailed description of the specified adventur...,"fairytale, enchanted forest, rescue mission","Once upon a time, in an enchanted forest, ther..."
4,"magical character, location, adventure",fairytale story,detailed description of the specified adventur...,"elf, enchanted forest, saves the kingdom","In the heart of the enchanted forest, an elf n..."
5,"magical character, location, adventure",fairytale story,detailed description of the specified adventur...,"fairy, enchanted forest, rescuing a lost unicorn","Once upon a time, in the heart of the enchante..."
6,"magical character, location, adventure",fairytale story,detailed description of the specified adventur...,"magical character, location, adventure","Once upon a time, the magical character embark..."
7,"magical character, location, adventure",fairytale story,detailed description of the specified adventur...,"Poppy the Unicorn, Enchanted Forest, Saving th...",In the depths of the Enchanted Forest lived Po...
8,"magical character, location, adventure",fairytale story,detailed description of the specified adventur...,"magical character, location, adventure","Once upon a time, the mystical [magical charac..."
9,"magical character, location, adventure",fairytale story,detailed description of the specified adventur...,"fairy, enchanted forest, ancient tree, dragon ...","In the depths of the enchanted forest, the fai..."


In [22]:
selected_columns_df = another_df[['generated_key', 'generated_value']]

# Rename the columns to 'parameters' and 'text'
selected_columns_df.columns = ['parameters', 'text']

# Define the path where you want to save the CSV file
csv_file_path = '../../datasets/evaluation/simple_fairytale.csv'

# Save the selected columns with custom column names to a CSV file
selected_columns_df.to_csv(csv_file_path, index=False)

print(f"Selected columns saved to {csv_file_path}")

Selected columns saved to ../../datasets/evaluation/simple_fairytale.csv
