In [1]:
import csv
import json

def convert_csv_to_openai_jsonl(csv_file: str, jsonl_file: str, model_name: str = "gpt-4-0125-preview"):
    """
    Convert prompts in a CSV file to OpenAI API formatted JSONL.

    Args:
        csv_file (str): Path to the input CSV file containing prompts.
        jsonl_file (str): Path to the output JSONL file.
        model_name (str): The model name to include in the OpenAI API request. Defaults to "gpt-4-0125-preview".
    """
    # Read the CSV file and format the data for OpenAI API
    with open(csv_file, mode='r', encoding='utf-8') as file:
        reader = csv.DictReader(file)
        formatted_prompts = []
        for row in reader:
            prompt_content = row.get('prompt_plus_story', '')
            prompt_content = """
            You are an expert in Arabic language, its dialects, and storytelling. I would like your help in evaluating a story written by a student based on a set of instructions. You are expected to give a score out of five based on the following features:
            **Fluency:** How smooth and natural the text is, including appropriate grammar, vocabulary, and sentence structure.
            **Coherence:** The logical connection and flow of sentences and ideas, making the text easy to understand.
            **Following Instructions:** How well the text adheres to the provided instructions or task requirements.
            **Consistency:** How consistently accurate and uniform the information and style are throughout the text.
            **Variety:** How well does the model generate story in the required Arabic variety.
            Give the scores directly without explanations or additions. I will first give you the instructions on which the story was based, followed by the story written by the student. Remember, I want the evaluation directly without explanation.
            
            """+prompt_content
            # Structure as OpenAI expects
            formatted_prompt = {
                "model": model_name,
                "messages": [
                    {
                        "role": "user",
                        "content": prompt_content
                    }
                ]
            }
            formatted_prompts.append(formatted_prompt)

    # Save the formatted prompts to a JSONL file
    with open(jsonl_file, mode='w', encoding='utf-8') as file:
        for prompt in formatted_prompts:
            file.write(json.dumps(prompt, ensure_ascii=False) + '\n')

    print(f"Formatted prompts have been saved to {jsonl_file}")

# Example Usage
#convert_csv_to_openai_jsonl('./Formatted-Moroccan-prompts-for-eval-acegpt-7b-chat-stories-results.csv', './Formatted-Moroccan-prompts-for-eval-acegpt-7b-chat-stories-results.jsonl')
