In [None]:
'''
Use a fine_tune_data.jsonl file as the training dataset to fine-tune a generative AI model. The fine-tuned model can be used for data transformation 
and creating new features.

Each line in the JSONL file represents a JSON object with prompt and completion keys. Here’s how it would look:

{"prompt": "Log entry: Suspicious login attempt\nClassification: suspicious\nAnomaly score: 0.9\nIs this a threat?", "completion": " Threat"}
{"prompt": "Log entry: Normal login\nClassification: normal\nAnomaly score: 0.1\nIs this a threat?", "completion": " Normal"}
{"prompt": "Log entry: Anomaly detected\nClassification: anomaly\nAnomaly score: 0.8\nIs this a threat?", "completion": " Threat"}
{"prompt": "Log entry: Unexpected file access\nClassification: suspicious\nAnomaly score: 0.85\nIs this a threat?", "completion": " Threat"}
{"prompt": "Log entry: Routine system check\nClassification: normal\nAnomaly score: 0.05\nIs this a threat?", "completion": " Normal"}
'''

import openai
import json
import time
import os
import sys 
sys.path.append('/Users/henrychang/sys_security_ai')
from utility import get_logger, config_file_loc, load_config, set_working_directory

# Set up logging configuration
logger = get_logger()

class FineTuner:
    def __init__(self, api_key, fine_tune_data_path, fine_tune_id_path): 
        try:
            # Create OpenAI client 
            self.client = openai.OpenAI(api_key=api_key)
            
            # Set fine tune data path for input
            self.fine_tune_data_path = fine_tune_data_path

            # Set fine tune id path for output
            self.fine_tune_id_path = fine_tune_id_path
        except Exception as e:
            logger.error(f"Unexpected error during initialization: {e}")
            
    def upload_dataset(self, file_path):
        """
        Upload a dataset file for fine-tuning with OpenAI.
        """
        try:
            logger.info(f"Path of data for fine tuning: {self.fine_tune_data_path}") 
            with open(file_path, 'rb') as f: 
                response = self.client.files.create( 
                    file=f, 
                    purpose='fine-tune' 
                ) 
            file_id = response['id'] 
            logger.info(f"Dataset uploaded successfully with file ID: {file_id}") 
            return file_id
        except FileNotFoundError:
            logger.error(f"File not found: {file_path}")
        except Exception as e:
            logger.error(f"Unexpected error uploading dataset: {e}")
            
    def create_fine_tuning_job(self, file_id, model): 
        try:
            fine_tune_response = self.client.fine_tunes.create( 
                training_file=file_id, 
                model=model 
            ) 
            fine_tune_id = fine_tune_response['id'] 
            status = fine_tune_response['status'] 
            logger.info(f"Fine-tuning job created with ID: {fine_tune_id}, Status: {status}") 
            return fine_tune_id, status
        except Exception as e:
            logger.error(f"Unexpected error creating fine-tuning job: {e}")

    def check_fine_tuning_status(self, status, fine_tune_id):
        """
        Periodically check the status
        The possible status values for a fine-tuning job when using the OpenAI API are
        queued, running, succeeded, failed, cancelled.
        """
        try:
            while status not in ['succeeded', 'failed', 'cancelled']:
                time.sleep(60)  # Wait a minute before checking again
                # Retrieve the fine-tune job details using the client 
                fine_tune_response = self.client.FineTune.retrieve(id=fine_tune_id)
                status = fine_tune_response['status']
                logger.info(f"Fine-tuning status: {status}")

            if status == 'succeeded':
                #output_path = os.path.join(current_directory, 'output/fine_tune_id.txt')
                with open(self.fine_tune_id_path, 'w') as f:
                    f.write(fine_tune_id)
                logger.info("Fine-tuning completed successfully and ID saved.")
            else:
                logger.error(f"Fine-tuning did not succeed. Final status: {status}")
        except Exception as e:
            logger.error(f"Unexpected error checking fine-tuning status: {e}")

    def fine_tune(self, model='gpt-3.5-turbo'):
        """
        If resources are limited, use DistilGPT-2; 
        otherwise, use GPT-4. GPT-3.5 Turbo is a balanced option.
        Or, another model supported for fine-tuning, eg davinci.
        """
        try:
            # Upload dataset
            file_id = self.upload_dataset(self.fine_tune_data_path)
            
            if file_id:
                # Create fine-tuning job
                fine_tune_id, status = self.create_fine_tuning_job(file_id, model)
                
                # Check fine-tuning status
                self.check_fine_tuning_status(status, fine_tune_id)
        except Exception as e:
            logger.error(f"Unexpected error during fine-tuning: {e}")

if __name__ == "__main__":
    try:
        # Load configuration
        config = load_config(config_file_loc)
        
        if config:
            # Set desired_directory as working_directory
            desired_directory = config.get('desired_directory')
            working_directory = set_working_directory(desired_directory)

            # Get API key
            api_key = config.get('api_key')

            if working_directory:
                #  # Get input directory of files
                input_dir = config.get('input_dir')
                input_data_path = os.path.join(working_directory, input_dir)

                # Get fine tune data path
                fine_tune_file = config.get('fine_tune_file')
                fine_tune_data_path = os.path.join(input_data_path, fine_tune_file)

                # Get output directory of files
                output_dir = config.get('output_dir')
                output_data_path = os.path.join(working_directory, output_dir)
                
                # Get output file path
                fine_tune_id_file = config.get('fine_tune_id_file')
                fine_tune_id_path = os.path.join(output_data_path, fine_tune_id_file)

                fine_tuner = FineTuner(api_key, fine_tune_data_path, fine_tune_id_path)
                fine_tuner.fine_tune()
    except Exception as e:
        logger.error(f"Unexpected error in main execution: {e}")

