<a href="https://colab.research.google.com/github/deep-diver/auto-data-fountain/blob/main/notebooks/pilot_modular.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install google-generativeai
!pip install pyyaml

In [3]:
GEMINI_API_KEY="..."

In [4]:
import json

def find_json_code_snippet(raw_code_snippet):
	json_parsed_string = None

	json_start_index = raw_code_snippet.find('{')
	json_end_index = raw_code_snippet.rfind('}')

	if json_start_index >= 0 and json_end_index >= 0:
		json_code_snippet = raw_code_snippet[json_start_index:json_end_index+1]
		try:
			json_parsed_string = json.loads(json_code_snippet, strict=False)
		except:
			raise ValueError('failed to parse string into JSON format')
	else:
		raise ValueError('No JSON code snippet found in string.')

	return json_parsed_string

def parse_first_json_code_snippet(code_snippet):
	json_parsed_string = None

	if isinstance(code_snippet, list):
		for code_snippet_piece in code_snippet:
			try:
				json_parsed_string = find_json_code_snippet(code_snippet_piece)
				return json_parsed_string
			except:
				pass
	else:
		try:
			json_parsed_string = find_json_code_snippet(code_snippet)
		except Exception as e:
			print(e)
			raise ValueError()

	return json_parsed_string

In [5]:
def determine_model_name(given_image=None):
  if given_image is None:
    return "gemini-pro"
  else:
    return "gemini-pro-vision"

def construct_image_part(given_image):
  return {
    "mime_type": "image/jpeg",
    "data": given_image
  }

def call_gemini(prompt="", API_KEY=None, given_text=None, given_image=None, generation_config=None, safety_settings=None):
  import google.generativeai as genai
  genai.configure(api_key=API_KEY)

  if generation_config is None:
    generation_config = {
      "temperature": 0.9,
      "top_p": 1,
      "top_k": 32,
      "max_output_tokens": 8192,
    }

  if safety_settings is None:
    safety_settings = [
      {
        "category": "HARM_CATEGORY_HARASSMENT",
        "threshold": "BLOCK_ONLY_HIGH"
      },
      {
        "category": "HARM_CATEGORY_HATE_SPEECH",
        "threshold": "BLOCK_ONLY_HIGH"
      },
      {
        "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
        "threshold": "BLOCK_ONLY_HIGH"
      },
      {
        "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
        "threshold": "BLOCK_ONLY_HIGH"
      },
    ]

  model_name = determine_model_name(given_image)
  model = genai.GenerativeModel(model_name=model_name,
                                generation_config=generation_config,
                                safety_settings=safety_settings)

  USER_PROMPT = prompt
  if given_text is not None:
    USER_PROMPT = f"""
------------------------------------------------
{given_text}
"""
  prompt_parts = [USER_PROMPT]
  if given_image is not None:
    prompt_parts.append(construct_image_part(given_image))

  response = model.generate_content(prompt_parts)
  return response.text

In [6]:
!mkdir counsel

mkdir: cannot create directory ‘counsel’: File exists


In [7]:
%%writefile counsel/diagram.mermaid

erDiagram
    COUNSELOR ||--|{ COUNSELEE : "provides counseling to"

    %% Comments for relationship attributes
    %% Start date: 2024-02-14
    %% Frequency: Weekly
    %% Topic: marriage guidance

Overwriting counsel/diagram.mermaid


In [8]:
%%writefile counsel/setup.yaml

initial_prompt: |
  The below erDiagram describes the basic setup of a certain scene.

  Generate possible conversations between a user and an assistant.
  The conversations should sound natural and logical.
  The direction or the style of the conversations should be "%s".
  The conversations should be occured without exposuring the underlying information of the erDiagram.

  The user should play the role of "%s" appeared in the erDiagram. The user should focus on the given role.
  The assistant should play the role of "%s" appeared in the erDiagram. The assistant should focus on the given role.
  Based on the words that the user say, the assistant gives appropriate, detailed, and long answers.

derivational_prompt: |
  The below erDiagram describes the basic setup of a certain scene.

  Here is the first few conversations
  %s

  Generate possible follo-up conversations between a user and an assistant.
  The conversations should sound natural and logical.
  The direction or the style of the conversations should be "%s".
  The conversations should be occured without exposuring the underlying information of the erDiagram.

  The user should play the role of "%s" appeared in the erDiagram. The user should focus on the given role.
  The assistant should play the role of "%s" appeared in the erDiagram. The assistant should focus on the given role.
  Based on the words that the user say, the assistant gives appropriate, detailed, and long answers.

output_format: |
  The generated conversations are recorded in a valid JSON as
  {"conversations":[{"user": text, "assistant": text},...]}.

delimiter: "------------------------------"

user_role: COUNSELEE
assistant_role: COUNSELOR

seed_evolving_directions:
  - in-depth
  - general

derivational_evolving_directions:
  - general

Overwriting counsel/setup.yaml


In [9]:
import os
import yaml

def get_setup_and_mermaid(folder_path):
    setup_yaml_path = os.path.join(folder_path, 'setup.yaml')
    mermaid_path = os.path.join(folder_path, 'diagram.mermaid')

    if not os.path.isfile(setup_yaml_path):
        raise FileNotFoundError(f"setup.yaml not found in {folder_path}")

    if not os.path.isfile(mermaid_path):
        raise FileNotFoundError(f"diagram.mermaid not found in {folder_path}")

    with open(setup_yaml_path, 'r') as file:
      setup = yaml.safe_load(file)

    with open(mermaid_path, 'r') as file:
      mermaid = file.read()

    return setup, mermaid.strip()

In [10]:
folder = "counsel"

setup, mermaid = get_setup_and_mermaid(folder)

In [14]:
def gen_seed(prompt, retry_num=4):
  cur_retry = 0
  test_json = None
  test = None

  while test_json is None and \
        test is None and \
        cur_retry <= retry_num:
    try:
      test_json = call_gemini(
        prompt=prompt,
        API_KEY=GEMINI_API_KEY
      )

      test = parse_first_json_code_snippet(test_json)
    except:
      cur_retry = cur_retry + 1
      continue

  return test

def gen_seeds(setup, mermaid, retry_num=4):
  initial_prompt = setup['initial_prompt']
  output_format = setup['output_format']
  delimiter = setup['delimiter']

  user_role = setup['user_role']
  assistant_role = setup['assistant_role']

  outputs = []
  seed_evolving_directions = setup["seed_evolving_directions"]

  for evolving_direction in seed_evolving_directions:
    prompt = initial_prompt % (evolving_direction, user_role, assistant_role)
    prompt = f"{prompt}\n{output_format}\n{delimiter}\n{mermaid}"
    outputs.append(gen_seed(prompt))

  return outputs

In [15]:
seeds = gen_seeds(setup, mermaid)

In [16]:
seeds

[{'conversations': [{'user': "I've been feeling really down lately and just don't know why. I tried talking to my friends, but they don't seem to understand. I don't know what to do.",
    'assistant': "It's understandable to feel overwhelmed and alone when you're struggling with emotional challenges. Seeking professional help can provide you with the support and guidance you need to navigate these difficulties. I can connect you with a counselor who specializes in helping individuals like you."},
   {'user': "I'm going through a divorce and it's been really tough. I don't know how to cope with the stress and anxiety.",
    'assistant': 'Divorce can be an incredibly stressful and emotional experience. Working with a counselor can help you process the complex emotions associated with divorce, develop coping mechanisms, and build resilience. I can help you find a counselor who specializes in divorce and relationship issues.'},
   {'user': "My relationship with my partner has been on the 