# JSONL File Maker

JSONL Examples: https://pantheon.corp.google.com/storage/browser/cloud-samples-data/ai-platform/generative_ai;tab=objects?prefix=&forceOnObjectsSortingFiltering=false

# Authenticate

In [1]:
from google.colab import auth
auth.authenticate_user()


In [2]:
PROJECT_ID = 'css-genai-capstone'

# Install Dependencies

In [3]:
!pip install --upgrade google-cloud-aiplatform # Vertex AI module



# Libraries

In [4]:
import vertexai
import json
from vertexai.preview.generative_models import GenerativeModel
from vertexai.preview.language_models import TextGenerationModel
vertexai.init(project=PROJECT_ID)
model = GenerativeModel("gemini-1.5-flash-001")

# Core Functions

In [13]:

# Generate JSON file
def generate_jsonl_data(data, output_file):
    """
    Generate a JSONL file from a list of dictionaries.

    Parameters:
    - data: List of dictionaries, where each dictionary represents a JSON object.
    - output_file: Path to the output JSONL file.
    """
    with open(output_file, 'w') as file:
        for entry in data:
            json_line = json.dumps(entry)
            file.write(json_line + '\n')

# Example function to call the model's generate_content method
def call_model_generate_content(prompt):
    """
    Calls the model's generate_content method and returns the response.

    Parameters:
    - model: The model object with a generate_content method.
    - prompt: The prompt to send to the model.

    Returns:
    - The model's response.
    """
    response = model.generate_content(prompt)
    # print(response.text)
    return response.text

# Function to create prompts by calling the model
def create_prompts(number_of_examples, examples):
    """
    Creates a list of example prompts by calling the model to generate prompts.

    Parameters:
    - model: The model object with a generate_content method.
    - number_of_examples: The number of example prompts to generate.

    Returns:
    - List of example prompts.
    """
    prompts = []
    for _ in range(number_of_examples):
        prompt = call_model_generate_content(f"Generate a random individual prompt similar to one of these {examples}. Vary the persona in each call as much as possible. No titles needed") # Prompt for generating prompts
        prompts.append(prompt)
    print(f"Generated {len(prompts)} prompts.")
    return prompts

# Function to generate examples based on model version and prompts
def generate_responses(model_version, prompts):
    examples = []

    if model_version == "Gemini":
        def create_example(prompt, response):
            return {
                "messages": [
                    {"role": "system", "content": ""}, # System Prompt
                    {"role": "user", "content": prompt},
                    {"role": "model", "content": response}
                ]
            }
    elif model_version == "PaLM":
        def create_example(prompt, response):
            return {
                "input_text": prompt,
                "output_text": response
            }

    for prompt in prompts:
        response = call_model_generate_content(prompt)
        example = create_example(prompt, response)
        examples.append(example)

    print(f"Generated {len(examples)} examples.")
    return examples




# Inputs

In [15]:
# Parameters
number_of_examples = 4
model_version = "Gemini"

# Example prompts
example_prompts = [
    "You are a career coach. Advise professionals on how they can expand their network by attending industry conferences.",
    "You are a professional development mentor. Suggest ways to enhance skill sets with online courses.",
    "You are a market analyst. Explain the importance of staying updated with the latest market trends and insights.",
    "You are a business consultant. Provide strategies on how leveraging technology can boost business efficiency.",
    "You are a motivational speaker. Describe how sharing success stories can inspire and motivate others."
]



prompts = create_prompts(number_of_examples, example_prompts)

# Output file path
output_file_path = 'tuning_data.jsonl'

# Generate data for the JSONL file
data_for_jsonl = generate_respones(model_version, prompts)

# Generate the JSONL file
generate_jsonl_data(data_for_jsonl, output_file_path)

print(f"JSONL file '{output_file_path}' generated successfully.")

Generated 4 prompts.
Generated 4 examples.
JSONL file 'tuning_data.jsonl' generated successfully.
