In [None]:
import csv
import json

def convert_csv_to_jsonl(csv_file_path, jsonl_file_path):
    """
    Converts a CSV file to a JSON Lines file with Llama 2 formatting.

    Args:
        csv_file_path (str): Path to the input CSV file.  CSV should have 'question' and 'answer' columns.
        jsonl_file_path (str): Path to the output JSON Lines file.
    """

    kiit_finetuning_data = []

    with open(csv_file_path, 'r', encoding='utf-8') as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            question = row['question']
            answer = row['answer']

            # Format the QnA pair using the Llama 2 template
            text = f"<s>[INST] <<SYS>>\nYou are a helpful assistant knowledgeable about Kalinga Institute of Industrial Technology (KIIT). Provide concise and accurate information based on the user's question about KIIT.\n<</SYS>>\n\n{question} [/INST] {answer} </s>"

            kiit_finetuning_data.append({"text": text})

    # Save to JSON Lines format
    with open(jsonl_file_path, 'w', encoding='utf-8') as jsonlfile:
        for entry in kiit_finetuning_data:
            json.dump(entry, jsonlfile, ensure_ascii=False)  # Prevent ASCII encoding issues
            jsonlfile.write('\n')

    print(f"Successfully converted '{csv_file_path}' to '{jsonl_file_path}'")

# Example usage (replace with your actual file paths)
csv_file_path = 'kiit_qna.csv'  # Path to your CSV file
jsonl_file_path = 'kiit_data.jsonl'  # Path for the output JSON Lines file

convert_csv_to_jsonl(csv_file_path, jsonl_file_path)


Successfully converted 'kiit_qna.csv' to 'kiit_data.jsonl'
