In [1]:
from tenacity import retry, stop_after_attempt, wait_random_exponential, retry_if_exception_type
from langchain.llms import OpenAI
from langchain import HuggingFaceHub,LLMChain
from langchain.prompts import PromptTemplate
from langchain import FewShotPromptTemplate
from langchain.output_parsers import CommaSeparatedListOutputParser
import pandas as pd
from collections import Counter
from io import StringIO
import streamlit as st
import re
import os
import openai
import ast
import json
from langchain.output_parsers import PydanticOutputParser
from langchain.prompts import PromptTemplate
from langchain.pydantic_v1 import BaseModel, Field, validator
# Decorator for automatic retry requests

from pydantic import BaseModel, Field, conlist
from typing import List, Optional, Tuple
class OutputResult(BaseModel):
    key: conlist(str, min_length=3, max_length=5) = Field(description="The key with the story parameters. Must contain between 3 and 5 parameters")
    story:str = Field(description="The generated story for the given key")
    
@retry(
    retry = retry_if_exception_type((openai.APIError, openai.APIConnectionError,  openai.Timeout, ValueError, SyntaxError,KeyError)),
    # Function to add random exponential backoff to a request
    wait = wait_random_exponential(multiplier = 1, max = 60),
    stop = stop_after_attempt(10)
)
def run_llm_chain(hub_chain,user_input,parser):    
    output =hub_chain.run(input=user_input)        
    parsed_result = parser.parse(output)                   
    return parsed_result

In [2]:
from langchain.llms import OpenAI
from langchain import HuggingFaceHub,LLMChain
from langchain.prompts import PromptTemplate
from langchain import FewShotPromptTemplate
from langchain.output_parsers import CommaSeparatedListOutputParser
import pandas as pd
from collections import Counter
from io import StringIO
import streamlit as st
import re
import os
import openai
import ast
import json
from langchain.output_parsers import PydanticOutputParser
from langchain.prompts import PromptTemplate
from pydantic import BaseModel, Field, validator


def createDataset(iterations, key, story_type, instructions,style) -> pd.DataFrame:
    import os
    import openai
    import ast
    from langchain.chat_models import ChatOpenAI
# initialize the models
    openai.api_key = os.getenv("OPENAI_API_KEY")
    openai = ChatOpenAI(             
        model_name="gpt-3.5-turbo",
        openai_api_key=openai.api_key,
        temperature=1.5
    )  

    
    examples = [          
            {
                "input": """Generate a tuple with first part a key built like this [age,gender, superpower], 
                and the value in the tuple will be an entire story of maximum 100 words with detailed description for a super-hero with the given age, of the given gender and with the given superpower """,
                "output": OutputResult.model_validate({
                    "key": ["18", "man", "invisibility"],
                    "story": """A 18 year old man, tall with a strong yet athletic build. Noir eyes and light brown hair that seems to be a reflection of the warmth of his personality. 
                    His superpower of invisibility make him silent, 
                    introspective and observant. He knows when to be seen and when to remain invisible in the background; like a silent guardian protecting those around him. With a strong sense of justice and power,
                    he is an invaluable asset to those he holds near and dear. His kind and compassionate spirit give him an aura of protectiveness, making him a person of strength and courage in difficult moments."""            
                     }).model_dump_json().replace("{", "{{").replace("}", "}}"),
            },
            {
                "input": """Generate a tuple with first part a key built like this [product ,theme, details], 
                and the value in the tuple will be a gingle of maximum 100 words with commercial for the given product, in the given theme incorporating the provided details.""",
                "output": OutputResult.model_validate({
                    "key": ["Whiskers", "happy", "cat food-holiday season price reductions-great for your cat"],
                    "story": "We are so happy to announce holiday discounts for the best cat food outhere! For happy and healthy cat choose Whiskers! Meow!"            
                     }).model_dump_json().replace("{", "{{").replace("}", "}}"),                
            },
             {
                "input": """Generate a tuple with first part a key built like this [fictional character ,location, adventure], 
                and the value in the tuple will be a story of maximum 100 words describing an adventure of the given fictional character in the provided location.""",
                "output": OutputResult.model_validate({
                    "key": ["Baba Yaga", "Asia", "getting no respect"],
                    "story": """Once upon a time Baba Yaga wondered far far away from her home and ended up in remote Hokkaido island. 
                    She was used to locals showing her great respect out of fear and also because she was always one of the pillars of Slavic culture. 
                    But in Hokkaido the locals knew nothing about her, and she was very disappointed because they have shown her no respect. Eventually she decided there is no place like home and went back"""            
                     }).model_dump_json().replace("{", "{{").replace("}", "}}"),     
            },
        ]


    

    # create a example template
    example_template = """
        User: {input}
        AI: {output}
    """
    # create a prompt example from above template
    example_prompt = PromptTemplate(
        input_variables=["input", "output"],
        template=example_template
    )

    parser = PydanticOutputParser(pydantic_object=OutputResult)

    # now break our previous prompt into a prefix and suffix
    # the prefix is our instructions    
    prefix = """You are a helpful assistant great in story telling. You follow the given instructions in a precise manner. 
    You need to generate a dataset where the key would be generated values string representing the story parameters according to the user given instructions, and the value will be a story written given this key. 
    Transform the output into structured object given those instructions: {format_instructions} Here are a few examples on how to generate the content of the dataset:
    """

    # and the suffix our user input and output indicator
    suffix = """
    User: {input}
    AI:"""


    # now create the few shot prompt template
    few_shot_prompt_template = FewShotPromptTemplate(
        examples=examples,
        example_prompt=example_prompt,
        prefix=prefix,
        suffix=suffix,
        input_variables=["input"],
        example_separator="\n\n",
        partial_variables={"format_instructions": parser.get_format_instructions()},    
    )

    f_prompt = "Generate a tuple with first part a key built like this [{key}], and the value in the tuple will be an entire {type} of maximum 100 words .{instructions}. {style}"
    user_input = f_prompt.format(key=key, type=story_type,instructions = instructions, style=style)
    df = pd.DataFrame()
    for i in range(iterations):
            hub_chain = LLMChain(prompt=few_shot_prompt_template,llm=openai,verbose=True)              
            output  = run_llm_chain(hub_chain,user_input,parser)                                     
           
           
            generated_key = ", ".join(output.key) if output.key else 'Not specified'
            print('generated_key:',generated_key)

            generated_value = output.story if output.story else 'Not specified'
            print('generated_value:',generated_value)      
            
            

            # Access and print the key-value pairs
            
            new_row = {
            'keywords':key, 
            'story_type':story_type, 
            'instructions':instructions,         
            'generated_key':generated_key,
            'generated_value': generated_value
            }
            new_row = pd.DataFrame([new_row])
            df = pd.concat([df, new_row], axis=0, ignore_index=True)
    
    return df

In [3]:
df = createDataset(5, 'news agency, location, news item', 'news item','The news item will be a report of the given news agency regarding the provided location and revolving around the news item which takes place in that location','Be very precise and stick to the facts with the key parameters values.')
df

  warn_deprecated(
  warn_deprecated(
  warn_deprecated(




[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mYou are a helpful assistant great in story telling. You follow the given instructions in a precise manner. 
    You need to generate a dataset where the key would be generated values string representing the story parameters according to the user given instructions, and the value will be a story written given this key. 
    Transform the output into structured object given those instructions: The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"key": {"description": "The key with the story parameters

Unnamed: 0,keywords,story_type,instructions,generated_key,generated_value
0,"news agency, location, news item",news item,The news item will be a report of the given ne...,"Reuters, New York, Climate change protest","In a large protest in New York City, thousands..."
1,"news agency, location, news item",news item,The news item will be a report of the given ne...,"World Today, Paris, city protests","In breaking news from Paris, massive protests ..."
2,"news agency, location, news item",news item,The news item will be a report of the given ne...,"BBC, London, terrorist attack at Westminster",Breaking news from London: A terrorist attack ...
3,"news agency, location, news item",news item,The news item will be a report of the given ne...,"CNN, New York City, 2021 NYC Marathon",CNN is reporting from New York City where thou...
4,"news agency, location, news item",news item,The news item will be a report of the given ne...,"Fox News, Washington D.C., White House lockdown","Tonight in Washington D.C., the White House is..."


In [4]:
funny_news = createDataset(5, 'news agency, location, news item', 'news item','The news item will be a report of the given news agency regarding the provided location and revolving around the news item which takes place in that location','Be very creative and diverse with the key parameters values, they should not be straightforward but imaginative and diverse')
funny_news



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mYou are a helpful assistant great in story telling. You follow the given instructions in a precise manner. 
    You need to generate a dataset where the key would be generated values string representing the story parameters according to the user given instructions, and the value will be a story written given this key. 
    Transform the output into structured object given those instructions: The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"key": {"description": "The key with the story parameters

Unnamed: 0,keywords,story_type,instructions,generated_key,generated_value
0,"news agency, location, news item",news item,The news item will be a report of the given ne...,"Cosmic News Chronicle, Exoplanet Quantakunas, ...",In a bizarre turn of events on the newly disco...
1,"news agency, location, news item",news item,The news item will be a report of the given ne...,"Starhawk Global Broadcast, Frostbite Glacier, ...","In a groundbreaking discovery, researchers hav..."
2,"news agency, location, news item",news item,The news item will be a report of the given ne...,"Illuminate Daily Tribune, Atlantis, sudden dis...",The Illuminte Daily Tribune reports from the m...
3,"news agency, location, news item",news item,The news item will be a report of the given ne...,"Star Chronicles, Mercury, unexpected signal fr...","In an unexpected turn of events, the scientist..."
4,"news agency, location, news item",news item,The news item will be a report of the given ne...,"Sunshine Press, The Enchanted Ghost Creek, str...","Today, officials in The Enchanted Ghost Creek ..."


In [5]:
workouts = createDataset(5, 'muscle group, training type, training weight', 'a workout plan','The workout plan will be for the specified muscle group with the given training type (such as supersets, isometric workout, unilateral workout, eccentric workout etc.) and the given weight','Be very precise and stick to the facts with the key parameters values.')
workouts



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mYou are a helpful assistant great in story telling. You follow the given instructions in a precise manner. 
    You need to generate a dataset where the key would be generated values string representing the story parameters according to the user given instructions, and the value will be a story written given this key. 
    Transform the output into structured object given those instructions: The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"key": {"description": "The key with the story parameters

Unnamed: 0,keywords,story_type,instructions,generated_key,generated_value
0,"muscle group, training type, training weight",a workout plan,The workout plan will be for the specified mus...,"Legs, Isometric, 50 kg","For your leg day workout, focus on isometric e..."
1,"muscle group, training type, training weight",a workout plan,The workout plan will be for the specified mus...,"shoulders, isometric workout, medium weight","For shoulders, incorporate isometric workouts ..."
2,"muscle group, training type, training weight",a workout plan,The workout plan will be for the specified mus...,"quadriceps, supersets, lifting heavy",Quadriceps workout plan: Perform supersets of ...
3,"muscle group, training type, training weight",a workout plan,The workout plan will be for the specified mus...,"quadriceps, supersets, 200 lbs",Here is a workout plan for quadriceps focusing...
4,"muscle group, training type, training weight",a workout plan,The workout plan will be for the specified mus...,"legs, eccentric, heavy",The workout plan for legs will focus on eccent...


In [6]:
scifi = createDataset(5, 'fictional protagonist, fictional realm, adventure outline', 'a sci-fi story','a sci-fi story about the fictional protagonist , taking place in the described realm and based on the given adventure outline','Be very creative and diverse with the key parameters values, they should not be straightforward but imaginative and diverse')
scifi



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mYou are a helpful assistant great in story telling. You follow the given instructions in a precise manner. 
    You need to generate a dataset where the key would be generated values string representing the story parameters according to the user given instructions, and the value will be a story written given this key. 
    Transform the output into structured object given those instructions: The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"key": {"description": "The key with the story parameters

Unnamed: 0,keywords,story_type,instructions,generated_key,generated_value
0,"fictional protagonist, fictional realm, advent...",a sci-fi story,a sci-fi story about the fictional protagonist...,"Lanaya Stormrage, Xerathia, Finding the key to...","In the ancient realm of Xerathia, Lanaya Storm..."
1,"fictional protagonist, fictional realm, advent...",a sci-fi story,a sci-fi story about the fictional protagonist...,"Serenity Floramancer, Xylaturn-Nebula, discove...","On the world of Xylaturn-Nebula, Serenity Flor..."
2,"fictional protagonist, fictional realm, advent...",a sci-fi story,a sci-fi story about the fictional protagonist...,"Qilaana, Nebula Esmeralda, facing the calamity...","In the mystical realm of Nebula Esmeralda, the..."
3,"fictional protagonist, fictional realm, advent...",a sci-fi story,a sci-fi story about the fictional protagonist...,"Zarael, Fenasia, Unraveling the fabric of spac...",In the fantastical realm of the ethereal Fenas...
4,"fictional protagonist, fictional realm, advent...",a sci-fi story,a sci-fi story about the fictional protagonist...,"Aurora Starfall, Xelion 7, finding the lost ar...","Aurora Starfall, intergalactic explorer hailin..."


In [7]:
gingle_df = createDataset(5, 'product name, catchy slogan, style (lyrical, satirical, funny)' , 'commercial jingle','commercial jingle using the specified product name and a catchy slogan. The style of the jingle should align with the given preference to create a catchy jingle to promote the product.','Be very creative and diverse with the key parameters values, they should not be straightforward but imaginative and diverse')
gingle_df



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mYou are a helpful assistant great in story telling. You follow the given instructions in a precise manner. 
    You need to generate a dataset where the key would be generated values string representing the story parameters according to the user given instructions, and the value will be a story written given this key. 
    Transform the output into structured object given those instructions: The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"key": {"description": "The key with the story parameters

Unnamed: 0,keywords,story_type,instructions,generated_key,generated_value
0,"product name, catchy slogan, style (lyrical, s...",commercial jingle,commercial jingle using the specified product ...,"Galactic Treats, Cosmic delights, out of this ...",Intergalctic travelers rejoice! Galactic Treat...
1,"product name, catchy slogan, style (lyrical, s...",commercial jingle,commercial jingle using the specified product ...,"SugarPlum Confetti, Sprinkle Sweetness Around,...","Come one, come all, to a land where sweetness ..."
2,"product name, catchy slogan, style (lyrical, s...",commercial jingle,commercial jingle using the specified product ...,"Eternal Youth Elixer, Fountain of bliss in eve...","Come one come all to the Eternal Youth Elixer,..."
3,"product name, catchy slogan, style (lyrical, s...",commercial jingle,commercial jingle using the specified product ...,"Magic Bean Coffee, Awake Before Dawn!, lyrical","In the early morn, with darkness all around,\n..."
4,"product name, catchy slogan, style (lyrical, s...",commercial jingle,commercial jingle using the specified product ...,"Silverwind, catch the breezeling, lyrical","Silverwind, the light in your sky Gracefully f..."


In [8]:
fairytale = createDataset(5, 'magical character, location, adventure', 'fairytale story','detailed description of the specified adventure of our magical character taking place in the given location','Be very creative and diverse with the key parameters values, they should not be straightforward but imaginative and diverse')
fairytale



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mYou are a helpful assistant great in story telling. You follow the given instructions in a precise manner. 
    You need to generate a dataset where the key would be generated values string representing the story parameters according to the user given instructions, and the value will be a story written given this key. 
    Transform the output into structured object given those instructions: The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"key": {"description": "The key with the story parameters

Unnamed: 0,keywords,story_type,instructions,generated_key,generated_value
0,"magical character, location, adventure",fairytale story,detailed description of the specified adventur...,"Sirenadora, Crystal Caves, Rescuing the Moon","In the mystical Crystal Caves of Zelandria, de..."
1,"magical character, location, adventure",fairytale story,detailed description of the specified adventur...,"Moonbeam Sprite, Enchanted Forest, Celebration...","In an Enchanted Forest, deep and mystical, whe..."
2,"magical character, location, adventure",fairytale story,detailed description of the specified adventur...,"Phoenix Firefly, Illum-Ternia, quest for etern...",In the land of Illum-Ternia where magic ruled ...
3,"magical character, location, adventure",fairytale story,detailed description of the specified adventur...,"The Velvet Queen, Celestial Farmsteads, Rift i...","In the realm of Celestial Farmsteads, where st..."
4,"magical character, location, adventure",fairytale story,detailed description of the specified adventur...,"Celesteorella the Star Gazer, Realm of the Ete...","In the Realm of the Eternal Twilight, Celesteo..."


In [None]:
another_df.to_csv("../datasets/workout_generated_dataset.csv")