In [2]:
from datasets import load_dataset
import random
import time
import pickle

random.seed(0)

def sample_from_dataset(dataset, num):
  return dataset.shuffle()[:num]


In [3]:
import os
import pandas as pd
import tqdm.notebook as tqdm

from datasets import load_dataset
import random

from transformers import AutoTokenizer, GemmaForCausalLM, BitsAndBytesConfig, AutoModelForCausalLM
import torch

os.chdir('/workspace/ligo_general/prompt')

In [4]:
import pathlib
import textwrap
import google.generativeai as genai

In [5]:
genai.configure(api_key=API_KEY)
model = genai.GenerativeModel('gemini-1.0-pro')

In [6]:
random.seed(0)

def sample_from_dataset(dataset, num):
  return dataset.shuffle()[:num]

cnn_dm = load_dataset('cnn_dailymail', '3.0.0')
samsum = load_dataset('samsum')
arxiv = load_dataset('scientific_papers', 'arxiv')
pubmed = load_dataset('scientific_papers', 'pubmed')
billsum = load_dataset('billsum')

You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this dataset from the next major release of `datasets`.


In [7]:
n_train = 10000
n_valid = 800
n_test = 800

cnn_train = sample_from_dataset(cnn_dm['train'], n_train)
sam_train = sample_from_dataset(samsum['train'], n_train)
arx_train = sample_from_dataset(arxiv['train'], n_train // 2)
pub_train = sample_from_dataset(pubmed['train'], n_train // 2)
bill_sample = sample_from_dataset(billsum['train'], n_train + n_valid)
balanced_train_txt = cnn_train['article'] + sam_train['dialogue'] + arx_train['article'] + pub_train['article'] + bill_sample['text'][:n_train]
balanced_train_sum = cnn_train['highlights'] + sam_train['dialogue'] + arx_train['abstract'] + pub_train['abstract'] + bill_sample['summary'][:n_train]
text = balanced_train_txt + balanced_train_sum
# sample 2000 from text
text = random.sample(text, 2000)

In [8]:
aspects_areas = [
    {
        "name": "Tone Adjustment",
        "description": "Modify the tone of the given text to convey a specific emotion or attitude, such as formal, friendly, assertive, or empathetic, and etc. Ensure the adjusted tone is consistent throughout the entire text and aligns with the intended message."
    },
    {
        "name": "Style Transformation",
        "description": "Transform the writing style of the given text to match a target style, such as academic, journalistic, or creative, and etc. Maintain the core content and meaning of the text while adapting its language, sentence structure, and overall presentation to the desired style."
    },
    {
        "name": "Satirical or Parodic Twist",
        "description": "Inject satire or parody into the text, rewriting it in a way that humorously critiques or lampoons the original content, characters, or themes. This approach requires a delicate balance of humor, critique, and creativity to entertain while possibly offering commentary."
    },
    {
        "name": "Age-Specific Adaptation",
        "description": "Tailor the text to a specific age group, adjusting the language, content, and presentation to be age-appropriate and engaging for that demographic. This could involve simplifying concepts for children, incorporating youthful slang for teenagers, or addressing topics relevant to seniors."
    },
    {
        "name": "Paraphrasing",
        "description": "Paraphrase the given text, maintaining its original meaning but using different words and sentence structures. Ensure the paraphrased text is clear, concise, and easier to understand for a wide audience."
    },
    {
        "name": "Summarization",
        "description": "Summarize the given text, capturing the main points and key information while omitting unnecessary details. The summary should be concise yet comprehensive, allowing readers to quickly grasp the essence of the original text."
    },
    {
        "name": "Text Simplification",
        "description": "Simplify the given text by reducing its complexity, using simpler vocabulary, shorter sentences, and clearer explanations, and etc. The simplified text should be easily understandable by a broader audience, including non-native speakers and readers with varying levels of literacy."
    },
    {
        "name": "Content Expansion",
        "description": "Expand the given text by adding relevant details, examples, explanations, or background information, and etc. The expanded content should provide a more comprehensive and in-depth understanding of the topic while maintaining coherence and readability."
    },
    {
        "name": "Perspective Shift",
        "description": "Rewrite the given text from a different perspective or point of view, such as from a specific character, stakeholder, or demographic, and etc. The rewritten text should accurately reflect the chosen perspective while preserving the overall message and context."
    },
    {
        "name": "Dialogue Generation",
        "description": "Generate realistic and engaging dialogue based on a given context, scenario, or set of characters, and etc. The generated dialogue should be natural, coherent, and consistent with the personalities and relationships of the involved parties."
    },
    {
        "name": "Text Normalization",
        "description": "Normalize the given text by converting abbreviations, slang, or informal language into standard, formal language, and etc. The normalized text should maintain the original meaning while adhering to proper grammar, spelling, and punctuation conventions."
    },
    {
        "name": "Localization and Personalization",
        "description": "Adapt the given text to specific locales, cultures, or audience demographics. This may involve modifying vocabulary, examples, references, or tone, and etc to better resonate with the target audience and ensure cultural relevance and sensitivity."
    },
    {
        "name": "Genre Conversion",
        "description": "Transform the given text to fit a specific literary or content genre, such as converting a news article into a fairy tale, a scientific report into a poem, or a business memo into a short story, and etc. This requires not only a change in style and tone but also a creative reimagining of the content to align with genre conventions."
    },
    {
        "name": "Historical Period Adaptation",
        "description": "Rewrite the text as if it were written in a different historical period, using the language, idioms, and cultural references appropriate to that time, and etc. This could involve converting modern prose into Shakespearean English, translating contemporary dialogue into the vernacular of the 1920s, or presenting a current event in the style of a medieval chronicle."
    },
    {
        "name": "Technical Jargon Incorporation or Removal",
        "description": "Either simplify technical text by removing jargon and complex language to make it accessible to a general audience or, conversely, increase the technical level of a general text by incorporating specific jargon and concepts from fields like medicine, law, or engineering, and etc."
    },
    {
        "name": "Narrative Voice Alteration",
        "description": "Change the narrative voice or point of view of the text. This could involve shifting from first-person to third-person narration, changing from passive to active voice, or rewriting a narrative to be told from the perspective of a different character or object within the story."
    },
    {
        "name": "Emotional Tone Infusion",
        "description": "Infuse the text with a specific emotional tone or mood, such as joy, sadness, suspense, or nostalgia, without altering the fundamental message. This requires subtle changes in word choice, sentence rhythm, and imagery to evoke the desired emotional response in the reader."
    },
    {
        "name": "Cross-Cultural Adaptation",
        "description": "Adapt the text to resonate with readers from a different cultural background, which involves more than just translation. It includes adjusting cultural references, metaphors, and societal norms to be more inclusive or relevant to the new audience while maintaining the essence and purpose of the original text."
    },
    {
        "name": "Interactive Content Creation",
        "description": "Transform the text into an interactive format, such as a choose-your-own-adventure story, an interactive tutorial, or a role-playing game script. This involves not just rewriting but reimagining the content as a branching narrative or dialogue with multiple paths and outcomes based on reader or user choices."
    },
    {
        "name": "Multimedia Adaptation",
        "description": "Convert the text into a script or storyboard for multimedia use, such as a video, podcast, or comic strip. This involves visualizing how the text can be represented using different media and creating detailed instructions or scripts that align with the original message but leverage the strengths of the chosen format."
    },
    {
        "name": "Sensory Detail Enhancement",
        "description": "Enhance the text by adding rich sensory details to make descriptions more vivid and engaging. This could involve describing scenes, emotions, or actions in a way that appeals to the senses (sight, sound, touch, taste, and smell), enriching the reader's experience and immersion."
    },
    {
        "name": "Ethical or Philosophical Examination",
        "description": "Rewrite the text to highlight or explore ethical dilemmas, philosophical questions, or moral lessons. This could involve adding scenarios, dialogues, or reflections that prompt readers to consider deeper meanings or the moral implications of the content."
    },
    {
        "name": "Mythological Retelling",
        "description": "Transform the text by weaving in mythological elements or themes, retelling the story or information through the lens of gods, mythical creatures, or ancient folklore. This could involve integrating characters from mythology or reimagining events as epic tales."
    },
    {
        "name": "Science Fiction or Fantasy Conversion",
        "description": "Adapt the text into a science fiction or fantasy setting, incorporating elements like advanced technology, alien societies, magical realms, or dystopian futures. This encourages creative reimagining of the original content in a way that explores alternate realities or universes."
    },
    {
        "name": "Educational Curriculum Development",
        "description": "Transform the text into an educational module or curriculum, complete with objectives, lessons, activities, and assessments. This adaptation should make the content suitable for learning environments, tailoring the information to fit educational goals and student engagement strategies."
    },
    {
        "name": "Health and Wellness Emphasis",
        "description": "Edit the text to focus on health and wellness, whether by incorporating advice on physical health, mental well-being, or lifestyle improvements. The rewritten content should offer valuable insights or tips that promote a healthier, more balanced lifestyle."
    },
    {
        "name": "Cultural Festival or Holiday Incorporation",
        "description": "Integrate elements of a specific cultural festival, holiday, or celebration into the text, adapting the content to reflect the traditions, rituals, and significance of the event. This could involve describing celebrations, historical backgrounds, or cultural practices related to the occasion."
    }
]

In [17]:
def generate_rewrite_prompts(aspect_area, original_texts, count):
    prompts = []
    aspect_area = random.choice(aspect_area)
    for original_text in random.sample(original_texts, count):
        aspect_name = aspect_area["name"]
        aspect_description = aspect_area["description"]

        # Generate a prompt that includes a brief of the original text
        prompt = f"Please craft a unique sentence that instruct a test taker to rewrite this text '{original_text}'(do not include this in the instruction) based on this rewriting task '{aspect_name}', create a unique, clear, and specific instruction. This instruction should be tailored to enhance or transform the provided text snippet in a way that aligns with the task's objective. It should not only fit within a 30-word limit. Come up with the tasks, genre, style, aspect, or example and etc directly when you ask the test taker, and ensure the sentence is clear and engaging without requiring further instruction or the inclusion of brackets or placeholders. Create a standalone sentence that embodies all these criteria, making sure it's free from any special characters or symbols. Thanks!"

        response = model.generate_content(
            prompt,
            generation_config=genai.types.GenerationConfig(
                temperature=0.9)
        )

        # Simulate calling an API with the prompt (for illustration purposes)
        # Replace this part with your actual API call
        # response = "Imagine the API response generates a creative and specific rewrite prompt based on the input."

        generated_prompt = response.candidates[0].content.parts[0].text
        print(f"{generated_prompt}")
        # Check for special characters
        special_chars = ['[', ']', '<', '>', '{', '}']
        if any(char in generated_prompt for char in special_chars):
            print("Special characters found in the generated prompt. Skipping...")
            continue 

        prompts.append(generated_prompt)
        
        time.sleep(1)  # Delay to avoid rate limits or for simulation

    return prompts

# Prepare DataFrame for storing original texts and rewrite prompts
df = pd.DataFrame(columns=['Original Text', 'Rewrite Prompt'])

# Example usage with a single text and aspect area
original_texts = random.sample(text, 2000)  # Assuming 'text' is your list of original texts
#aspect_area = random.choice(aspects_areas)  # Assuming 'guiding_dictionary' is accessible and contains your rewrite aspects

for original_text in original_texts:
    try:
        prompts = generate_rewrite_prompts(aspects_areas, original_text, 1)
        for prompt in prompts:
            df = df.append({'Original Text': original_text, 'Rewrite Prompt': prompt}, ignore_index=True)
        time.sleep(1)
    except Exception as e:
        print(f"Reached the API limit, pausing for 5 minutes. Error: {e}")
        time.sleep(300)
        continue

Compose a captivating mythological retelling by infusing a fresh perspective, imaginative elements, and a distinctive writing style into your rewrite of the provided text.


  df = df.append({'Original Text': original_text, 'Rewrite Prompt': prompt}, ignore_index=True)


Enhance the sensory experience of this text by infusing vivid descriptions that ignite the reader's senses, evoking specific sights, sounds, smells, tastes, and textures.


  df = df.append({'Original Text': original_text, 'Rewrite Prompt': prompt}, ignore_index=True)


Expand the substance of the text by adding at least 25% more words, incorporating new examples, varying sentence structure, and diversifying vocabulary to enhance clarity and engagement.


  df = df.append({'Original Text': original_text, 'Rewrite Prompt': prompt}, ignore_index=True)


KeyboardInterrupt: 

In [9]:
original_texts = random.sample(text, 2000)

In [13]:
original_texts[10][:500]

"SECTION 1. SHORT TITLE.\n\n    This Act may be cited as the ``Securing Our Nation's Application \nPrivacy Act of 2014'' or the ``SNAP Act of 2014''.\n\nSEC. 2. ACCESS BY MOBILE APPLICATIONS TO CERTAIN DEVICE CONTENT AND \n              FUNCTIONS.\n\n    (a) In General.--Not later than 1 year after the date of the \nenactment of this Act, the Federal Trade Commission shall promulgate \nregulations in accordance with section 553 of title 5, United States \nCode, that require--\n            (1) any person who "

In [23]:
rewrite_prompts

[response:
 GenerateContentResponse(
     done=True,
     iterator=None,
     result=glm.GenerateContentResponse({'candidates': [{'content': {'parts': [{'text': 'Adapt the excerpt to make it more inclusive and appropriate for a diverse audience. Consider cultural nuances, avoiding stereotypes, and ensuring that the text respects the identities and experiences of different groups.'}], 'role': 'model'}, 'finish_reason': 1, 'index': 0, 'safety_ratings': [{'category': 9, 'probability': 1, 'blocked': False}, {'category': 8, 'probability': 1, 'blocked': False}, {'category': 7, 'probability': 1, 'blocked': False}, {'category': 10, 'probability': 1, 'blocked': False}], 'token_count': 0, 'grounding_attributions': []}], 'prompt_feedback': {'safety_ratings': [{'category': 9, 'probability': 1, 'blocked': False}, {'category': 8, 'probability': 1, 'blocked': False}, {'category': 7, 'probability': 1, 'blocked': False}, {'category': 10, 'probability': 1, 'blocked': False}], 'block_reason': 0}}),
 ),
 

In [20]:
# Function to generate rewrite prompts using the Gemini API
def generate_rewrite_prompts(aspect_area, count):
    prompts = []
    for i in range(count):
        aspect_name = aspect_area["name"]
        aspect_description = aspect_area["description"]

        #prompt = f"Suppose I have a input text, and I want my genAI model to rewrite this text in different way. Now please only generate a high quality rewrite prompt for my model for this task: {aspect_name}. As described here {aspect_description} The prompt should be creative, clear, specific, and consistent in format. For example, if your prompt involve in a certain Genre, be specific on genre, an don't be vague in the prompt. It should cover a diverse range of topics and styles. Avoid bias and ensure the prompt is generalizable. You will not be given the original text, so your prompt should be applicable to any input text. Dont leave any blank spaces or leave a [ ] that I will need to fill in later in the prompt. The prompt should be a full sentence. Limit the prompt to 30 words. Please do not include any special characters or symbols in the prompt."
        prompt = f"Take a deep breath before you get started. Please craft a unique sentence that instruct a student to rewrite any input text according to this task {aspect_description}. Your sentence must be imaginative, precise, and detailed, avoiding ambiguity and generalities. It should not only fit within a 30-word limit but also be versatile enough to apply to a wide variety of texts. Specify the genre or style directly when relevant, and ensure the sentence is clear and engaging without requiring further customization or the inclusion of brackets or placeholders. Create a standalone sentence that embodies all these criteria, making sure it's free from any special characters or symbols. Thanks!"

        response = model.generate_content(
            prompt,
            generation_config=genai.types.GenerationConfig(
                temperature=0.9)
        )

        generated_prompt = response.candidates[0].content.parts[0].text

        # Check if the generated prompt contains '[' or ']'
        special_chars = ['[', ']', '<', '>', '{', '}']
        if any(char in generated_prompt for char in special_chars):
            print("Special characters found in the generated prompt. Skipping...")
            continue 

        prompts.append(generated_prompt)
        print(generated_prompt)
        
        time.sleep(1)  # Add a delay to avoid hitting rate limits

    return prompts

# Generate rewrite prompts for each aspect/area
rewrite_prompts = []
# randomly select an aspect area and generate rewrite prompts, create a total of 500 prompts
for i in range(500):
    aspect_area = random.choice(aspects_areas)
    try:
        prompts = generate_rewrite_prompts(aspect_area, 2)
        time.sleep(1)
    except Exception as e:
        print(f"reach the limit of the API, stop here for 5 minutes")
        time.sleep(300)
        pass
    rewrite_prompts.extend(prompts)

# Print the total number of generated rewrite prompts
print(f"Total rewrite prompts generated: {len(rewrite_prompts)}")

Reimagine the text as an ancient tapestry woven by divine hands, interlacing mortal tales with the threads of mythical beings, epic quests, and legendary wonders.
Reimagine your text as an epic tapestry woven with threads of ancient myth, where gods, heroes, and mythical beasts dance upon the stage of destiny.
Take a deep breath and reshape the text to foster a healthier lifestyle by weaving in physical well-being guidance, mental resilience tips, or daily habit recommendations.
Reimagine your text as a tapestry of vibrant wellness, weaving threads of physical vitality, mental clarity, and holistic nourishment into every fiber.
Embark on a literary voyage, reweaving the tapestry of words to resonate with a diverse audience, where cultural echoes dance harmoniously with the original text's soul.
Reimagine the text as a cultural tapestry, weaving threads of inclusivity and relevance to embrace a diverse audience's hearts and minds, honoring the original intent while illuminating new aven

In [None]:
# Save the rewrite prompts to a csv file wiht first column as id and second column as prompt
with open('/workspace/ligo_general/rewrite_prompts.csv', 'w') as f:
    f.write("id,rewrite_prompt\n")
    for i, prompt in enumerate(rewrite_prompts):
        # write the prompt to the with auto increment id
        f.write(f"{i+1},\"{prompt}\"\n")