In [1]:
from tenacity import retry, stop_after_attempt, wait_random_exponential, retry_if_exception_type
from langchain.llms import OpenAI
from langchain import HuggingFaceHub,LLMChain
from langchain.prompts import PromptTemplate
from langchain import FewShotPromptTemplate
from langchain.output_parsers import CommaSeparatedListOutputParser
import pandas as pd
from collections import Counter
from io import StringIO
import streamlit as st
import re
import os
import openai
import ast
import json
from langchain.output_parsers import PydanticOutputParser
from langchain.prompts import PromptTemplate
from langchain.pydantic_v1 import BaseModel, Field, validator
# Decorator for automatic retry requests
@retry( #the function retrys to activate the run_llm_chain(RLC) function a few times if the RLC function doesnt work 
    retry = retry_if_exception_type((openai.OpenAIError, openai.APIConnectionError, openai.Timeout)),
    # Function to add random exponential backoff to a request
    wait = wait_random_exponential(multiplier = 1, max = 60),
    stop = stop_after_attempt(10)
)
def run_llm_chain(hub_chain,user_input): # the function talks with the LLM
    output =hub_chain.run(input=user_input)
    return output

In [2]:
from pydantic import BaseModel, Field, conlist
from typing import List, Optional, Tuple
class OutputResult(BaseModel):
    key: conlist(str, min_length=3, max_length=5) = Field(description="The key with the story parameters. Must contain between 3 and 5 parameters")
    story:str = Field(description="The generated story for the given key")


In [9]:
from langchain.llms import OpenAI
from langchain import HuggingFaceHub,LLMChain
from langchain.prompts import PromptTemplate
from langchain import FewShotPromptTemplate
from langchain.output_parsers import CommaSeparatedListOutputParser
import pandas as pd
from collections import Counter
from io import StringIO
import streamlit as st
import re
import os
import openai
import ast
import json
from langchain.output_parsers import PydanticOutputParser
from langchain.prompts import PromptTemplate
from pydantic import BaseModel, Field, validator


def createDatasetSimple(key, story_type, instructions) -> pd.DataFrame:
    import os
    import openai
    import ast
    from langchain.chat_models import ChatOpenAI
# initialize the models
    openai.api_key = os.getenv("OPENAI_API_KEY")
    openai = ChatOpenAI(             
        model_name="gpt-3.5-turbo",
        openai_api_key=openai.api_key,
        temperature=1.5
    )  

    
    f_prompt = "Generate a pair with a key built like this [{key}], and the value in the pair will be an entire {type}.{instructions}."
    user_input = f_prompt.format(key=key, type=story_type,instructions = instructions)
    prompt_template = PromptTemplate(
        template="{input}",
        input_variables=["input"]        
    )
   
    hub_chain = LLMChain(prompt=prompt_template,llm=openai,verbose=True)              
    output  = run_llm_chain(hub_chain,user_input)                          
    return output
            

In [10]:
output = createDatasetSimple('news agency, location, news item', 'news item','The news item will be a report of the given news agency regarding the provided location and revolving around the news item which takes place in that location')
output



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mGenerate a pair with a key built like this [news agency, location, news item], and the value in the pair will be an entire news item.The news item will be a report of the given news agency regarding the provided location and revolving around the news item which takes place in that location.[0m

[1m> Finished chain.[0m


'Pair 1:\n\nKey: [Reuters, New York, Earthquake]\nValue: \n"EARTHQUAKE HITS NEW YORK CITY\n\nAccording to witnesses and local officials, a powerful earthquake rocked New York City earlier today. The earthquake, estimated to have a magnitude of 6.5, caused widespread panic as buildings shook and debris fell in various parts of the city. Emergency services are currently assessing the damage and attending to any injured individuals. Residents are urged to stay clear of damaged structures and follow any advisory issued by authorities. More updates on this developing situation will be provided as they become available."\n\nPair 2:\n\nKey: [Associated Press, Paris, Protest]\nValue:\n"PROTESTERS TAKE TO THE STREETS IN PARIS\n\nReports from Paris indicate that thousands of protesters have taken to the streets to voice their grievances against recent political developments. The demonstrators, comprising various groups and organizations, are expressing dissent over policies implemented by the go

In [11]:
from langchain.llms import OpenAI
from langchain import HuggingFaceHub,LLMChain
from langchain.prompts import PromptTemplate
from langchain import FewShotPromptTemplate
from langchain.output_parsers import CommaSeparatedListOutputParser
import pandas as pd
from collections import Counter
from io import StringIO
import streamlit as st
import re
import os
import openai
import ast
import json
from langchain.output_parsers import PydanticOutputParser
from langchain.prompts import PromptTemplate
from pydantic import BaseModel, Field, validator


def createDatasetSimpleLength(key, story_type, instructions) -> pd.DataFrame:
    import os
    import openai
    import ast
    from langchain.chat_models import ChatOpenAI
# initialize the models
    openai.api_key = os.getenv("OPENAI_API_KEY")
    openai = ChatOpenAI(             
        model_name="gpt-3.5-turbo",
        openai_api_key=openai.api_key,
        temperature=1.5
    )  

    f_prompt = "Generate a pair with a key built like this [{key}], and the value in the pair will be an entire {type} of maximum 100 words.{instructions}."
    user_input = f_prompt.format(key=key, type=story_type,instructions = instructions)
    prompt_template = PromptTemplate(
        template="{input}",
        input_variables=["input"]        
    )

    
    hub_chain = LLMChain(prompt=prompt_template,llm=openai,verbose=True)              
    output  = run_llm_chain(hub_chain,user_input)                          
    return output

In [12]:
output = createDatasetSimpleLength('news agency, location, news item', 'news item','The news item will be a report of the given news agency regarding the provided location and revolving around the news item which takes place in that location')
output



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mGenerate a pair with a key built like this [news agency, location, news item], and the value in the pair will be an entire news item of maximum 100 words.The news item will be a report of the given news agency regarding the provided location and revolving around the news item which takes place in that location.[0m

[1m> Finished chain.[0m


'Key: [Reuters, London, COVID-19]\nValue: \nBritish Prime Minister Boris Johnson announced new measures today to combat the spread of the COVID-19 virus in London. The city has seen a significant surge in cases in recent weeks, prompting government officials to impose stricter restrictions on social gatherings and non-essential businesses. Johnson urged Londoners to adhere to the new guidelines in order to protect themselves and others from the potentially deadly virus. Health authorities are closely monitoring the situation and are prepared to take further action if necessary to prevent a worsening outbreak in the capital city.'

In [13]:
from langchain.llms import OpenAI
from langchain import HuggingFaceHub,LLMChain
from langchain.prompts import PromptTemplate
from langchain import FewShotPromptTemplate
from langchain.output_parsers import CommaSeparatedListOutputParser
import pandas as pd
from collections import Counter
from io import StringIO
import streamlit as st
import re
import os
import openai
import ast
import json
from langchain.output_parsers import PydanticOutputParser
from langchain.prompts import PromptTemplate
from pydantic import BaseModel, Field, validator


def createDatasetSimpleRole(key, story_type, instructions) -> pd.DataFrame:
    import os
    import openai
    import ast
    from langchain.chat_models import ChatOpenAI
# initialize the models
    openai.api_key = os.getenv("OPENAI_API_KEY")
    openai = ChatOpenAI(             
        model_name="gpt-3.5-turbo",
        openai_api_key=openai.api_key,
        temperature=1.5
    )  

    prefix = """You are a helpful assistant great in story telling. You follow the given instructions in a precise manner.
    """
    f_prompt = """{prefix}. Generate a pair with first part a key built like this [{key}], and the value in the pair will be an entire {type}.{instructions}.
      Return result maximum 100 words"""
    user_input = f_prompt.format(prefix=prefix,key=key, type=story_type,instructions = instructions)
    prompt_template= PromptTemplate(
        prefix=prefix, 
        input_variables=["input"],
        template="{input}"
    )
    hub_chain = LLMChain(prompt=prompt_template,llm=openai,verbose=True)              
    output  = run_llm_chain(hub_chain,user_input)                          
    return output

In [14]:
output = createDatasetSimpleRole('news agency, location, news item', 'news item','The news item will be a report of the given news agency regarding the provided location and revolving around the news item which takes place in that location')
output



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mYou are a helpful assistant great in story telling. You follow the given instructions in a precise manner.
    . Generate a pair with first part a key built like this [news agency, location, news item], and the value in the pair will be an entire news item.The news item will be a report of the given news agency regarding the provided location and revolving around the news item which takes place in that location.
      Return result maximum 100 words[0m

[1m> Finished chain.[0m




In [15]:
from langchain.llms import OpenAI
from langchain import HuggingFaceHub,LLMChain
from langchain.prompts import PromptTemplate
from langchain import FewShotPromptTemplate
from langchain.output_parsers import CommaSeparatedListOutputParser
import pandas as pd
from collections import Counter
from io import StringIO
import streamlit as st
import re
import os
import openai
import ast
import json
from langchain.output_parsers import PydanticOutputParser
from langchain.prompts import PromptTemplate
from pydantic import BaseModel, Field, validator


def createDatasetSimpleFormat(key, story_type, instructions) -> pd.DataFrame:
    import os
    import openai
    import ast
    from langchain.chat_models import ChatOpenAI
    
    # Initialize the models
    openai.api_key = os.getenv("OPENAI_API_KEY")
    openai_model = ChatOpenAI(
        model_name="gpt-3.5-turbo",
        openai_api_key=openai.api_key,
        temperature=1.5
    )

    prefix = """You are a helpful assistant great in storytelling. You follow the given instructions in a precise manner. 
    Ensure the result is formatted correctly."""
    parser = PydanticOutputParser(pydantic_object=OutputResult)
    # Detailed format instructions
    format_instructions = parser.get_format_instructions()

    # Construct the full prompt
    f_prompt = """{prefix}. {format_instructions}. Generate a pair with the first part a key built like this [{key}], 
    and the value in the pair will be an entire {type}. {instructions}. Ensure the result is a maximum of 100 words."""
    user_input = f_prompt.format(prefix=prefix, key=key, type=story_type, instructions=instructions, format_instructions=format_instructions)
    
    prompt_template = PromptTemplate(
        prefix=prefix,
        template="{input}",
        input_variables=["input"],
        partial_variables={"format_instructions": format_instructions},
    )

    # Initialize the LLMChain with the prompt template and OpenAI model
    hub_chain = LLMChain(prompt=prompt_template, llm=openai_model, verbose=True)

    # Get the output from the LLMChain
    output_text = run_llm_chain(hub_chain, user_input)
    print(output_text)

    # Parse the output text to create an OutputResult object
    try:
        parsed_result = parser.parse(output_text)                
    except (SyntaxError, KeyError, ValueError):
        raise ValueError("The LLM response could not be parsed into the expected OutputResult format.")

    return parsed_result


In [16]:
output = createDatasetSimpleFormat('news agency, location, news item', 'news item','The news item will be a report of the given news agency regarding the provided location and revolving around the news item which takes place in that location')
output



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mYou are a helpful assistant great in storytelling. You follow the given instructions in a precise manner. 
    Ensure the result is formatted correctly.. The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"key": {"description": "The key with the story parameters. Must contain between 3 and 5 parameters", "items": {"type": "string"}, "maxItems": 5, "minItems": 3, "title": "Key", "type": "array"}, "story": {"description": "The generated story for the given key", "title": "Story", "type": "string"}}, 

OutputResult(key=['BBC News', 'London', 'Brexit Update'], story="Breaking news from BBC News in London, as Brexit negotiations unfold. Amidst escalating tensions, both sides struggle to reach a consensus on key trade agreements. Citizens express concern over the uncertainty looming over the future of their country's economy. Stay tuned for further updates on this critical issue.")

In [17]:
from langchain.llms import OpenAI
from langchain import HuggingFaceHub,LLMChain
from langchain.prompts import PromptTemplate
from langchain import FewShotPromptTemplate
from langchain.output_parsers import CommaSeparatedListOutputParser
import pandas as pd
from collections import Counter
from io import StringIO
import streamlit as st
import re
import os
import openai
import ast
import json
from langchain.output_parsers import PydanticOutputParser
from langchain.prompts import PromptTemplate
from pydantic import BaseModel, Field, validator


def createDatasetSimpleStyle(key, story_type, instructions, style) -> pd.DataFrame:
    import os
    import openai
    import ast
    from langchain.chat_models import ChatOpenAI
    
    # Initialize the models
    openai.api_key = os.getenv("OPENAI_API_KEY")
    openai_model = ChatOpenAI(
        model_name="gpt-3.5-turbo",
        openai_api_key=openai.api_key,
        temperature=1.5
    )

    prefix_string = """You are a helpful assistant great in storytelling. You follow the given instructions in a precise manner.{style}. 
    Ensure the result is formatted correctly."""
    prefix = prefix_string.format(style = style)
    parser = PydanticOutputParser(pydantic_object=OutputResult)
    # Detailed format instructions
    format_instructions = parser.get_format_instructions()

    # Construct the full prompt
    f_prompt = """{prefix}. {format_instructions}.Generate a pair with the first part a key built like this [{key}], 
    and the value in the pair will be an entire {type}. {instructions}. Ensure the result is a maximum of 100 words."""
    user_input = f_prompt.format(prefix=prefix, key=key, type=story_type, instructions=instructions, format_instructions=format_instructions)

    # Create the prompt template
    prompt_template = PromptTemplate(
        prefix=prefix,
        template="{input}",
        input_variables=["input"],
        partial_variables={"format_instructions": format_instructions},
    )

    # Initialize the LLMChain with the prompt template and OpenAI model
    hub_chain = LLMChain(prompt=prompt_template, llm=openai_model, verbose=True)

    # Get the output from the LLMChain
    output_text = run_llm_chain(hub_chain, user_input)
    print(output_text)

    # Parse the output text to create an OutputResult object
    try:
        parsed_result = parser.parse(output_text)        
    except (SyntaxError, KeyError, ValueError):
        raise ValueError("The LLM response could not be parsed into the expected OutputResult format.")

    return parsed_result


In [19]:
output = createDatasetSimpleStyle('news agency, location, news item', 'news item','The news item will be a report of the given news agency regarding the provided location and revolving around the news item which takes place in that location','You are very diverse and creative. Dont be dull, go wild, invent interesting locations and adventures and fun characters!')
output



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mYou are a helpful assistant great in storytelling. You follow the given instructions in a precise manner.You are very diverse and creative. Dont be dull, go wild, invent interesting locations and adventures and fun characters!. 
    Ensure the result is formatted correctly.. The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"key": {"description": "The key with the story parameters. Must contain between 3 and 5 parameters", "items": {"type": "string"}, "maxItems": 5, "minItems": 3, "title": "Key", 

OutputResult(key=['Daily Globe News', 'Tropical Island', 'Mysterious Disappearance'], story='Breaking news from Daily Globe News! Mystery surrounds the idyllic Tropical Island as reports come in regarding a mysterious disappearance.Click here for updates!')

In [20]:
from langchain.llms import OpenAI
from langchain import HuggingFaceHub,LLMChain
from langchain.prompts import PromptTemplate
from langchain import FewShotPromptTemplate
from langchain.output_parsers import CommaSeparatedListOutputParser
import pandas as pd
from collections import Counter
from io import StringIO
import streamlit as st
import re
import os
import openai
import ast
import json
from langchain.output_parsers import PydanticOutputParser
from langchain.prompts import PromptTemplate
from pydantic import BaseModel, Field, validator


def createDataset(iterations, key, story_type, instructions,style) -> pd.DataFrame:
    import os
    import openai
    import ast
    from langchain.chat_models import ChatOpenAI
# initialize the models
    openai.api_key = os.getenv("OPENAI_API_KEY")
    openai = ChatOpenAI(             
        model_name="gpt-3.5-turbo",
        openai_api_key=openai.api_key,
        temperature=1.5
    )  

    
    examples = [          
            {
                "input": """Generate a tuple with first part a key built like this [age,gender, superpower], 
                and the value in the tuple will be an entire story of maximum 100 words with detailed description for a super-hero with the given age, of the given gender and with the given superpower """,
                "output": OutputResult.model_validate({
                    "key": ["18", "man", "invisibility"],
                    "story": """A 18 year old man, tall with a strong yet athletic build. Noir eyes and light brown hair that seems to be a reflection of the warmth of his personality. 
                    His superpower of invisibility make him silent, 
                    introspective and observant. He knows when to be seen and when to remain invisible in the background; like a silent guardian protecting those around him. With a strong sense of justice and power,
                    he is an invaluable asset to those he holds near and dear. His kind and compassionate spirit give him an aura of protectiveness, making him a person of strength and courage in difficult moments."""            
                     }).model_dump_json().replace("{", "{{").replace("}", "}}"),
            },
            {
                "input": """Generate a tuple with first part a key built like this [product ,theme, details], 
                and the value in the tuple will be a gingle of maximum 100 words with commercial for the given product, in the given theme incorporating the provided details.""",
                "output": OutputResult.model_validate({
                    "key": ["Whiskers", "happy", "cat food-holiday season price reductions-great for your cat"],
                    "story": "We are so happy to announce holiday discounts for the best cat food outhere! For happy and healthy cat choose Whiskers! Meow!"            
                     }).model_dump_json().replace("{", "{{").replace("}", "}}"),                
            },
             {
                "input": """Generate a tuple with first part a key built like this [fictional character ,location, adventure], 
                and the value in the tuple will be a story of maximum 100 words describing an adventure of the given fictional character in the provided location.""",
                "output": OutputResult.model_validate({
                    "key": ["Baba Yaga", "Asia", "getting no respect"],
                    "story": """Once upon a time Baba Yaga wondered far far away from her home and ended up in remote Hokkaido island. 
                    She was used to locals showing her great respect out of fear and also because she was always one of the pillars of Slavic culture. 
                    But in Hokkaido the locals knew nothing about her, and she was very disappointed because they have shown her no respect. Eventually she decided there is no place like home and went back"""            
                     }).model_dump_json().replace("{", "{{").replace("}", "}}"),     
            },
        ]


    

    # create a example template
    example_template = """
        User: {input}
        AI: {output}
    """
    # create a prompt example from above template
    example_prompt = PromptTemplate(
        input_variables=["input", "output"],
        template=example_template
    )

    parser = PydanticOutputParser(pydantic_object=OutputResult)

    # now break our previous prompt into a prefix and suffix
    # the prefix is our instructions    
    prefix = """You are a helpful assistant great in story telling. You follow the given instructions in a precise manner. 
    You need to generate a dataset where the key would be generated values string representing the story parameters according to the user given instructions, and the value will be a story written given this key. 
    Transform the output into structured object given those instructions: {format_instructions} Here are a few examples on how to generate the content of the dataset:
    """

    # and the suffix our user input and output indicator
    suffix = """
    User: {input}
    AI:"""


    # now create the few shot prompt template
    few_shot_prompt_template = FewShotPromptTemplate(
        examples=examples,
        example_prompt=example_prompt,
        prefix=prefix,
        suffix=suffix,
        input_variables=["input"],
        example_separator="\n\n",
        partial_variables={"format_instructions": parser.get_format_instructions()},    
    )

    f_prompt = "Generate a tuple with first part a key built like this [{key}], and the value in the tuple will be an entire {type} of maximum 100 words .{instructions}. {style}"
    user_input = f_prompt.format(key=key, type=story_type,instructions = instructions, style=style)
    df = pd.DataFrame()
    for i in range(iterations):
            hub_chain = LLMChain(prompt=few_shot_prompt_template,llm=openai,verbose=True)              
            output  = run_llm_chain(hub_chain,user_input)                          
            # # Extract the first and second elements as strings
            try:
                parsed_result = parser.parse(output)               
            except Exception as e:
                print(e)
                continue
           
            first_string = ", ".join(parsed_result.key) if parsed_result.key else 'Not specified'
            print('first string:',first_string)

            second_string = parsed_result.story if parsed_result.story else 'Not specified'
            print('second string:',second_string)      
            
            

            # Access and print the key-value pairs
            
            new_row = {
            'keywords':key, 
            'story_type':story_type, 
            'instructions':instructions,         
            'generated_key':first_string,
            'generated_value': second_string
            }
            new_row = pd.DataFrame([new_row])
            df = pd.concat([df, new_row], axis=0, ignore_index=True)
    
    return df

In [21]:
df = createDataset(1, 'news agency, location, news item', 'news item','The news item will be a report of the given news agency regarding the provided location and revolving around the news item which takes place in that location','Be very precise and stick to the facts with the key parameters values.')



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mYou are a helpful assistant great in story telling. You follow the given instructions in a precise manner. 
    You need to generate a dataset where the key would be generated values string representing the story parameters according to the user given instructions, and the value will be a story written given this key. 
    Transform the output into structured object given those instructions: The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"key": {"description": "The key with the story parameters

In [22]:
df

Unnamed: 0,keywords,story_type,instructions,generated_key,generated_value
0,"news agency, location, news item",news item,The news item will be a report of the given ne...,"Reuters, New York City, terrorist attack","In a recent report by Reuters, a terrorist att..."
