<a href="https://colab.research.google.com/github/deep-diver/janus/blob/main/pilot_modular_instruct.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install google-generativeai
!pip install pyyaml

In [2]:
GEMINI_API_KEY="..."

In [3]:
import json

def find_json_code_snippet(raw_code_snippet):
	json_parsed_string = None

	json_start_index = raw_code_snippet.find('{')
	json_end_index = raw_code_snippet.rfind('}')

	if json_start_index >= 0 and json_end_index >= 0:
		json_code_snippet = raw_code_snippet[json_start_index:json_end_index+1]
		try:
			json_parsed_string = json.loads(json_code_snippet, strict=False)
		except:
			raise ValueError('failed to parse string into JSON format')
	else:
		raise ValueError('No JSON code snippet found in string.')

	return json_parsed_string

def parse_first_json_code_snippet(code_snippet):
	json_parsed_string = None

	if isinstance(code_snippet, list):
		for code_snippet_piece in code_snippet:
			try:
				json_parsed_string = find_json_code_snippet(code_snippet_piece)
				return json_parsed_string
			except:
				pass
	else:
		try:
			json_parsed_string = find_json_code_snippet(code_snippet)
		except Exception as e:
			print(e)
			raise ValueError()

	return json_parsed_string

In [4]:
def determine_model_name(given_image=None):
  if given_image is None:
    return "gemini-pro"
  else:
    return "gemini-pro-vision"

def construct_image_part(given_image):
  return {
    "mime_type": "image/jpeg",
    "data": given_image
  }

def call_gemini(prompt="", API_KEY=None, given_text=None, given_image=None, generation_config=None, safety_settings=None):
  import google.generativeai as genai
  genai.configure(api_key=API_KEY)

  if generation_config is None:
    generation_config = {
      "temperature": 0.9,
      "top_p": 1,
      "top_k": 32,
      "max_output_tokens": 8192,
    }

  if safety_settings is None:
    safety_settings = [
      {
        "category": "HARM_CATEGORY_HARASSMENT",
        "threshold": "BLOCK_ONLY_HIGH"
      },
      {
        "category": "HARM_CATEGORY_HATE_SPEECH",
        "threshold": "BLOCK_ONLY_HIGH"
      },
      {
        "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
        "threshold": "BLOCK_ONLY_HIGH"
      },
      {
        "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
        "threshold": "BLOCK_ONLY_HIGH"
      },
    ]

  model_name = determine_model_name(given_image)
  model = genai.GenerativeModel(model_name=model_name,
                                generation_config=generation_config,
                                safety_settings=safety_settings)

  prompt_parts = [prompt]
  if given_image is not None:
    prompt_parts.append(construct_image_part(given_image))

  response = model.generate_content(prompt_parts)
  return response.text

In [5]:
!mkdir counsel-instruct

In [6]:
%%writefile counsel-instruct/diagram.mermaid

erDiagram
    COUNSELOR ||--|{ COUNSELEE : "provides counseling to"

    %% Comments for relationship attributes
    %% Topic: marriage guidance

Writing counsel-instruct/diagram.mermaid


In [52]:
%%writefile counsel-instruct/setup.yaml

initial_prompt: |
  %s # erDiagram
  %s # delimiter
  The above erDiagram describes the basic setup of a certain scene.

  Generate a pair of conversation between user(%s) and assistant(%s). # user role, assistant role
  The direction or the style of the conversation should be "%s". # direction
  The conversations should be occured without exposuring the underlying information of the erDiagram.

  The user describe his/her situation in a long paragraph as much as possible. It is ideal to provide detailed background information to the assistant.
  Then the assistant gives appropriate, detailed, and long answers. Trying to give every possible advices on this single conversation in step-by-step manner.

derivational_prompt: |
  %s # erDiagram
  %s # delimiter
  The above erDiagram describes the basic setup of a certain scene.

  %s # example conversation
  %s # delimiter
  Based on the above example conversation, generate another pair of conversation between user(%s) and assistant(%s). # user role, assistant role
  The direction or the style of the conversation should be "%s". # direction
  The conversations should be occured without exposuring the underlying information of the erDiagram.

  The user describe his/her situation in a long paragraph as much as possible. It is ideal to provide detailed background information to the assistant.
  Then the assistant gives appropriate, detailed, and long answers. Trying to give every possible advices on this single conversation in step-by-step manner.

output_format: |
  The generated conversations are recorded in a valid JSON as
  {"conversation":{"user": text, "assistant": text}}.

delimiter: "------------------------------"

user_role: COUNSELEE
assistant_role: COUNSELOR

seed_evolving_directions:
  - general
  - diverse

derivational_evolving_directions:
  - general
  - in-depth

Overwriting counsel-instruct/setup.yaml


In [53]:
import os
import yaml

def get_setup_and_mermaid(folder_path):
    mermaid = None
    setup_yaml_path = os.path.join(folder_path, 'setup.yaml')
    mermaid_path = os.path.join(folder_path, 'diagram.mermaid')

    if not os.path.isfile(setup_yaml_path):
        raise FileNotFoundError(f"setup.yaml not found in {folder_path}")

    with open(setup_yaml_path, 'r') as file:
      setup = yaml.safe_load(file)

    if 'er_diagram' in setup:
      mermaid = setup['er_diagram']
    elif 'er_diagram_path' in setup:
      mermaid_path = os.path.join(folder_path, setup['er_diagram_path'])

    if mermaid is None:
      if not os.path.isfile(mermaid_path):
          raise FileNotFoundError(f"diagram.mermaid not found in {folder_path}")

      with open(mermaid_path, 'r') as file:
        mermaid = file.read()

    return setup, mermaid.strip()

In [54]:
folder = "counsel-instruct"

setup, mermaid = get_setup_and_mermaid(folder)

In [55]:
def gen_data(prompt, retry_num):
  cur_retry = 0
  data_json = None
  data = None

  while (data_json is None or data is None) and \
        cur_retry <= retry_num:
    try:
      data_json = call_gemini(
        prompt=prompt,
        API_KEY=GEMINI_API_KEY
      )

      data = parse_first_json_code_snippet(data_json)
    except:
      cur_retry = cur_retry + 1
      continue

  return data

In [56]:
def gen_seeds(setup, mermaid, retry_num=4, s_factor=4):
  initial_prompt = setup['initial_prompt']
  output_format = setup['output_format']
  delimiter = setup['delimiter']

  user_role = setup['user_role']
  assistant_role = setup['assistant_role']

  outputs = []
  seed_evolving_directions = setup["seed_evolving_directions"]

  for evolving_direction in seed_evolving_directions:
    prompt = initial_prompt % (mermaid, delimiter, user_role, assistant_role, evolving_direction)
    prompt = f"{prompt}\n{output_format}"

    for _ in range(s_factor):
      output = gen_data(prompt, retry_num)

      if output is not None:
        outputs.append(output)

  return outputs

In [57]:
seeds = gen_seeds(setup, mermaid)

failed to parse string into JSON format
failed to parse string into JSON format
failed to parse string into JSON format


In [58]:
len(seeds)

8

In [59]:
import json

print(json.dumps(seeds, indent=2))

[
  {
    "conversation": {
      "user": "Hi there. I'm in a bit of a difficult situation and I'm not sure what to do. I've been seeing a therapist for a few months now to help me deal with some personal issues. I've been making a lot of progress, but I'm still struggling with some things. One of the things I'm struggling with is my relationship with my partner. We've been together for 5 years and I love him very much, but we've been having some problems lately. I'm not sure if we can work through them and I'm really scared of losing him. I'm not sure what to do next. I'm thinking about ending the relationship, but I'm not sure if that's the right decision. I'm so confused and I don't know what to do.",
      "assistant": "I understand that you're going through a difficult time. It sounds like you're really struggling with your relationship and you're not sure what to do. I'm sorry to hear that you're going through this. It's important to remember that you're not alone and there are p

In [60]:
def gen_derivations(setup, mermaid, seed_conversations, retry_num=4, d_factor=4):
  derivational_prompt = setup['derivational_prompt']
  output_format = setup['output_format']
  delimiter = setup['delimiter']

  user_role = setup['user_role']
  assistant_role = setup['assistant_role']

  outputs = []
  derivational_evolving_directions = setup["derivational_evolving_directions"]

  for seed_conversation in seed_conversations:
    example_conversation = seed_conversation['conversation']

    for evolving_direction in derivational_evolving_directions:
      prompt = derivational_prompt % \
        (mermaid, delimiter, json.dumps(example_conversation), delimiter, user_role, assistant_role, evolving_direction)
      prompt = f"{prompt}\n{output_format}"

      for _ in range(d_factor):
        generated_conversation = None
        generated_conversation_json = None
        output = gen_data(prompt, retry_num)

        if output is not None:
          outputs.append(output)

  return outputs

In [61]:
outputs = gen_derivations(setup, mermaid, seeds)

In [62]:
len(outputs)

64

In [63]:
import json

print(json.dumps(outputs, indent=2))

[
  {
    "conversation": {
      "user": "I've been married for 10 years and I'm starting to feel like I'm losing my connection with my spouse. We used to be so close, but now we seem to be drifting apart. We don't talk as much as we used to, and when we do, it's usually about the kids or work. I feel like we're more like roommates than husband and wife. I'm not sure what to do. I love my spouse and I don't want to lose him, but I'm not sure if our marriage is worth saving. I'm also worried about the impact that a divorce would have on our children.",
      "assistant": "It's understandable that you're feeling lost and confused. It's important to remember that you're not alone and that there are many resources available to help you. Here are some things that you can do to help you cope: - Talk to your spouse about what you're going through. It's important to be honest with each other about your feelings. - Try to spend more time together doing things that you both enjoy. - Go on a dat