### Train and Save Models for Uniformity in Environment

In [None]:
# train_and_save_models.py
#
# This script creates, trains, and saves the initial versions of all four
# reinforcement learning models (PPO, A2C, DDPG, SAC). Run this script once
# to generate the healthy .zip files needed by the optimizer script.

# --- Dependencies ---
# pip install numpy
# pip install groq
# pip install google-api-python-client
# pip install wolframalpha
# pip install wikipedia
# pip install scikit-learn
# pip install vaderSentiment
# pip install stable-baselines3[extra]
# pip install tensorflow
# pip install gymnasium
# pip install python-dotenv

import os
import time
import numpy as np
import groq
import wolframalpha
import wikipedia
from googleapiclient.discovery import build
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import gymnasium as gym
from gymnasium import spaces
from stable_baselines3 import PPO, DDPG, A2C, SAC
from stable_baselines3.common.vec_env import DummyVecEnv
from dotenv import load_dotenv

# --- Load environment variables from a .env file for security ---
load_dotenv()

# --- API Configuration ---
GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
WOLFRAM_APP_ID = os.environ.get("WOLFRAM_APP_ID")
CUSTOM_SEARCH_ENGINE_ID = os.environ.get("CUSTOM_SEARCH_ENGINE_ID")

# Initialize API clients
try:
    groq_client = groq.Groq(api_key=GROQ_API_KEY)
    wolfram_client = wolframalpha.Client(WOLFRAM_APP_ID)
    google_search_service = build("customsearch", "v1", developerKey=GOOGLE_API_KEY)
    sentiment_analyzer = SentimentIntensityAnalyzer()
except Exception as e:
    print(f"--- ERROR: Failed to initialize API clients. Please check your API keys in the .env file. ---")
    print(f"Error details: {e}")


# --- Environment Definitions ---

class PromptEnv(gym.Env):
    """Base class for the prompt optimization environment."""
    def __init__(self, initial_prompt=""):
        super(PromptEnv, self).__init__()
        self.initial_prompt = initial_prompt
        self.max_features = 100
        self.vectorizer = TfidfVectorizer(max_features=self.max_features)
        dummy_corpus = [
            "explain the theory of relativity", "what are the main causes of climate change",
            "summarize the plot of hamlet", "how does photosynthesis work",
            "describe the process of machine learning", "what is quantum computing"
        ]
        self.vectorizer.fit(dummy_corpus)
        self.observation_space = spaces.Box(low=0, high=1, shape=(self.max_features,), dtype=np.float32)
        self.current_prompt = initial_prompt

    def _get_obs(self):
        vector = self.vectorizer.transform([self.current_prompt]).toarray()
        flat_vector = vector.flatten()
        fixed_size_obs = np.zeros(self.max_features, dtype=np.float32)
        fixed_size_obs[:flat_vector.shape[0]] = flat_vector
        return fixed_size_obs

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        self.current_prompt = self.initial_prompt
        return self._get_obs(), {}

    def _execute_api_calls(self, discrete_action):
        """Helper function to perform API calls with retry logic."""
        # This is a simplified version for training; a full implementation would use the RewardSystem
        return f"Modified prompt based on action {discrete_action}", "API response."

class PromptEnvDiscrete(PromptEnv):
    """Environment with a Discrete action space for PPO and A2C."""
    def __init__(self, initial_prompt=""):
        super().__init__(initial_prompt)
        self.action_space = spaces.Discrete(4)

    def step(self, action):
        discrete_action = action
        modified_prompt, response = self._execute_api_calls(discrete_action)
        total_reward = 1.0
        terminated = True
        truncated = False
        info = {}
        return self._get_obs(), total_reward, terminated, truncated, info

class PromptEnvContinuous(PromptEnv):
    """Environment with a Box (continuous) action space for DDPG and SAC."""
    def __init__(self, initial_prompt=""):
        super().__init__(initial_prompt)
        self.action_space = spaces.Box(low=-1, high=1, shape=(4,), dtype=np.float32)

    def step(self, action):
        discrete_action = np.argmax(action)
        modified_prompt, response = self._execute_api_calls(discrete_action)
        total_reward = 1.0
        terminated = True
        truncated = False
        info = {}
        return self._get_obs(), total_reward, terminated, truncated, info

# --- Model Training and Saving ---
def train_and_save_model(model_name, file_path, training_steps):
    """Creates, trains, and saves a single RL model."""
    model_classes = {'PPO': PPO, 'A2C': A2C, 'DDPG': DDPG, 'SAC': SAC}
    
    if model_name not in model_classes:
        print(f"Error: Model type '{model_name}' not recognized.")
        return

    # Select the correct environment based on the model type
    if model_name in ['PPO', 'A2C']:
        env_fn = lambda: PromptEnvDiscrete()
    else: # DDPG, SAC
        env_fn = lambda: PromptEnvContinuous()
        
    env = DummyVecEnv([env_fn])
    model_class = model_classes[model_name]
    
    print(f"\n--- Creating and Training {model_name} ---")
    model = model_class("MlpPolicy", env, verbose=1)
    
    model.learn(total_timesteps=training_steps)

    print(f"Saving new model to {file_path}...")
    model.save(file_path)
    print(f"--- {model_name} saved successfully. ---")


if __name__ == '__main__':
    initial_training_steps = 1000 # A small number for initial training

    model_files_to_create = {
        "A2C": "a2c_actor.zip",
        "DDPG": "ddpg_agent.zip",
        "PPO": "ppo_model.zip",
        "SAC": "sac_sb3_model.zip"
    }

    # Loop through the dictionary and create each model
    for model_name, file_path in model_files_to_create.items():
        train_and_save_model(model_name, file_path, initial_training_steps)

    print("\nAll models have been created and saved.")




--- Creating and Training A2C ---
Using cpu device
------------------------------------
| time/                 |          |
|    fps                | 1335     |
|    iterations         | 100      |
|    time_elapsed       | 0        |
|    total_timesteps    | 500      |
| train/                |          |
|    entropy_loss       | -1.39    |
|    explained_variance | nan      |
|    learning_rate      | 0.0007   |
|    n_updates          | 99       |
|    policy_loss        | -0       |
|    value_loss         | 0        |
------------------------------------
------------------------------------
| time/                 |          |
|    fps                | 1472     |
|    iterations         | 200      |
|    time_elapsed       | 0        |
|    total_timesteps    | 1000     |
| train/                |          |
|    entropy_loss       | -1.39    |
|    explained_variance | nan      |
|    learning_rate      | 0.0007   |
|    n_updates          | 199      |
|    policy_loss       

### Secured API;  Optimizing Actual Lables

In [None]:
# optimize_models.py
#
# This script is designed to load pre-existing reinforcement learning models
# (PPO, A2C, DDPG, SAC), continue their training for a specified number of
# additional steps, and then save the newly optimized versions back to their files.
# It is independent of the initial training or inference scripts.

# --- Dependencies ---
# pip install numpy
# pip install groq
# pip install google-api-python-client
# pip install wolframalpha
# pip install wikipedia
# pip install scikit-learn
# pip install vaderSentiment
# pip install stable-baselines3[extra]
# pip install tensorflow
# pip install gymnasium
# pip install python-dotenv

import os
import time
import numpy as np
import groq
import wolframalpha
import wikipedia
from googleapiclient.discovery import build
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import gymnasium as gym
from gymnasium import spaces
from stable_baselines3 import PPO, DDPG, A2C, SAC
from stable_baselines3.common.vec_env import DummyVecEnv
from dotenv import load_dotenv

# --- Load environment variables from a .env file for security ---
load_dotenv()

# --- API Configuration ---
# This script reads API keys from a .env file in the same directory.
GROQ_API_KEY = os.environ.get("GROQ_API_KEY", "YOUR_GROQ_API_KEY")
GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY", "YOUR_GOOGLE_API_KEY")
WOLFRAM_APP_ID = os.environ.get("WOLFRAM_APP_ID", "YOUR_WOLFRAM_APP_ID")
CUSTOM_SEARCH_ENGINE_ID = os.environ.get("CUSTOM_SEARCH_ENGINE_ID", "YOUR_CSE_ID")

# --- Add explicit checks for API keys ---
if "YOUR_GROQ_API_KEY" in GROQ_API_KEY:
    print("--- WARNING: GROQ_API_KEY is not set. Please create a .env file and set it. ---")
if "YOUR_GOOGLE_API_KEY" in GOOGLE_API_KEY:
    print("--- WARNING: GOOGLE_API_KEY is not set. Please create a .env file and set it. ---")
if "YOUR_WOLFRAM_APP_ID" in WOLFRAM_APP_ID:
    print("--- WARNING: WOLFRAM_APP_ID is not set. Please create a .env file and set it. ---")


# Initialize API clients
try:
    groq_client = groq.Groq(api_key=GROQ_API_KEY)
    wolfram_client = wolframalpha.Client(WOLFRAM_APP_ID)
    google_search_service = build("customsearch", "v1", developerKey=GOOGLE_API_KEY)
    sentiment_analyzer = SentimentIntensityAnalyzer()
except Exception as e:
    print(f"--- ERROR: Failed to initialize API clients. Please check your API keys. ---")
    print(f"Error details: {e}")


# --- Environment and Reward System ---
# The environment class MUST be defined here exactly as it was during the
# initial training, otherwise the models will fail to load.

class RewardSystem:
    """Encapsulates all logic for calculating rewards."""
    def __init__(self):
        self.vectorizer = TfidfVectorizer()
    def _calculate_cosine_similarity(self, t1, t2):
        if not t1 or not t2: return 0.0
        try:
            tfidf = self.vectorizer.fit_transform([t1, t2])
            return cosine_similarity(tfidf[0:1], tfidf[1:2])[0][0]
        except ValueError: return 0.0
    def get_clarity_reward(self, orig_prompt, mod_prompt, response): return 0.5 # Placeholder
    def get_relevance_reward(self, user_feedback, followup): return 0.5 # Placeholder
    def get_factual_accuracy_reward(self, response): return 0.5 # Placeholder

class PromptEnv(gym.Env):
    """Base class for the prompt optimization environment."""
    def __init__(self, initial_prompt=""):
        super(PromptEnv, self).__init__()
        self.initial_prompt = initial_prompt
        self.reward_system = RewardSystem()
        self.max_features = 100
        self.vectorizer = TfidfVectorizer(max_features=self.max_features)
        
        dummy_corpus = [
            "explain the theory of relativity", "what are the main causes of climate change",
            "summarize the plot of hamlet", "how does photosynthesis work",
            "describe the process of machine learning", "what is quantum computing"
        ]
        self.vectorizer.fit(dummy_corpus)
        
        self.observation_space = spaces.Box(low=0, high=1, shape=(self.max_features,), dtype=np.float32)
        self.current_prompt = initial_prompt

    def _get_obs(self):
        vector = self.vectorizer.transform([self.current_prompt]).toarray()
        flat_vector = vector.flatten()
        fixed_size_obs = np.zeros(self.max_features, dtype=np.float32)
        fixed_size_obs[:flat_vector.shape[0]] = flat_vector
        return fixed_size_obs

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        self.current_prompt = self.initial_prompt
        return self._get_obs(), {}

    def _execute_api_calls(self, discrete_action):
        """Helper function to perform API calls with retry logic."""
        modified_prompt = self.current_prompt
        response = "Error: Could not get a response from the model."
        max_retries = 3
        base_delay = 1

        for attempt in range(max_retries):
            try:
                modification_instruction = {
                    0: "Make the following prompt more concise: ",
                    1: "Expand the following prompt with more detail: ",
                    2: "Rephrase the following prompt to have a more professional tone: ",
                    3: "Modify the following prompt to ask for a step-by-step explanation: "
                }[discrete_action]
                
                modification_request = f"{modification_instruction} '{self.current_prompt}'"
                chat_completion = groq_client.chat.completions.create(messages=[{"role": "user", "content": modification_request}], model="llama3-8b-8192")
                modified_prompt = chat_completion.choices[0].message.content
                main_completion = groq_client.chat.completions.create(messages=[{"role": "user", "content": modified_prompt}], model="llama3-70b-8192")
                response = main_completion.choices[0].message.content
                break
            except Exception as e:
                print(f"--- API call failed on attempt {attempt + 1}/{max_retries}. Retrying... ---")
                print(f"Error details: {e}")
                if attempt < max_retries - 1:
                    time.sleep(base_delay * (2 ** attempt))
                else:
                    print("--- All API retries failed. ---")
        return modified_prompt, response

# --- (FIX) Create two environment versions for different action spaces ---
class PromptEnvDiscrete(PromptEnv):
    """Environment with a Discrete action space for PPO and A2C."""
    def __init__(self, initial_prompt=""):
        super().__init__(initial_prompt)
        self.action_space = spaces.Discrete(4)

    def step(self, action):
        discrete_action = action
        modified_prompt, response = self._execute_api_calls(discrete_action)
        total_reward = 1.0
        terminated = True
        truncated = False
        info = {}
        return self._get_obs(), total_reward, terminated, truncated, info

class PromptEnvContinuous(PromptEnv):
    """Environment with a Box (continuous) action space for DDPG and SAC."""
    def __init__(self, initial_prompt=""):
        super().__init__(initial_prompt)
        self.action_space = spaces.Box(low=-1, high=1, shape=(4,), dtype=np.float32)

    def step(self, action):
        discrete_action = np.argmax(action)
        modified_prompt, response = self._execute_api_calls(discrete_action)
        total_reward = 1.0
        terminated = True
        truncated = False
        info = {}
        return self._get_obs(), total_reward, terminated, truncated, info

# --- Model Optimizer ---
class ModelOptimizer:
    """Loads, continues training (optimizes), and re-saves RL models."""
    def __init__(self):
        self.model_classes = {'PPO': PPO, 'A2C': A2C, 'DDPG': DDPG, 'SAC': SAC}
        print("Model Optimizer initialized.")

    def optimize_model(self, model_name, model_path, additional_timesteps):
        """Loads a model from a specific path, trains it further, and saves it back."""
        if model_name not in self.model_classes:
            print(f"Error: Model type '{model_name}' not recognized.")
            return

        if not os.path.exists(model_path):
            print(f"Error: Cannot optimize. Model file not found at '{model_path}'.")
            return

        # --- (FIX) Select the correct environment based on the model type ---
        if model_name in ['PPO', 'A2C']:
            env_fn = lambda: PromptEnvDiscrete()
        elif model_name in ['DDPG', 'SAC']:
            env_fn = lambda: PromptEnvContinuous()
        else:
            print(f"Error: No environment defined for model type {model_name}.")
            return
            
        env = DummyVecEnv([env_fn])

        print(f"\n--- Optimizing {model_name} ---")
        print(f"Loading model from {model_path}...")
        
        try:
            model_class = self.model_classes[model_name]
            model = model_class.load(model_path, env=env)
            print(f"Successfully loaded {model_name}.")
        except Exception as e:
            print(f"--- FATAL ERROR: Failed to load model {model_name}. ---")
            print(f"This can happen if the model was trained with a different version of a library or a different environment definition.")
            print(f"Error details: {e}")
            return

        print(f"Continuing training for {additional_timesteps} additional timesteps...")
        model.learn(total_timesteps=additional_timesteps, reset_num_timesteps=False)

        print(f"Saving newly optimized model back to {model_path}...")
        model.save(model_path)
        print(f"--- {model_name} optimization complete. ---")


# --- Example Usage ---
if __name__ == '__main__':
    optimizer = ModelOptimizer()
    additional_training_steps = 500

    # --- (FIX) Define a dictionary with your new model filenames ---
    model_files = {
        "A2C": "a2c_actor.zip",
        "DDPG": "ddpg_agent.zip",
        "PPO": "ppo_model.zip",
        "SAC": "sac_sb3_model.zip"
    }

    # Loop through the dictionary and optimize each model
    for model_name, file_path in model_files.items():
        optimizer.optimize_model(model_name, file_path, additional_training_steps)

    print("\nAll available models have been further optimized.")


Model Optimizer initialized.

--- Optimizing A2C ---
Loading model from a2c_actor.zip...
Successfully loaded A2C.
Continuing training for 500 additional timesteps...
------------------------------------
| time/                 |          |
|    fps                | 0        |
|    iterations         | 100      |
|    time_elapsed       | 1265     |
|    total_timesteps    | 1500     |
| train/                |          |
|    entropy_loss       | -1.39    |
|    explained_variance | nan      |
|    learning_rate      | 0.0007   |
|    n_updates          | 299      |
|    policy_loss        | -0       |
|    value_loss         | 0        |
------------------------------------
Saving newly optimized model back to a2c_actor.zip...
--- A2C optimization complete. ---

--- Optimizing DDPG ---
Loading model from ddpg_agent.zip...
Successfully loaded DDPG.
Continuing training for 500 additional timesteps...
---------------------------------
| time/              |          |
|    episodes      

### Train and Save Models for Uniformity in Environment

In [10]:
# train_and_save_models.py
#
# This script creates, trains, and saves the initial versions of all four
# reinforcement learning models (PPO, A2C, DDPG, SAC). Run this script once
# to generate the healthy .zip files needed by the optimizer script.

# --- Dependencies ---
# pip install numpy
# pip install groq
# pip install google-api-python-client
# pip install wolframalpha
# pip install wikipedia
# pip install scikit-learn
# pip install vaderSentiment
# pip install stable-baselines3[extra]
# pip install tensorflow
# pip install gymnasium
# pip install python-dotenv

import os
import time
import numpy as np
import groq
import wolframalpha
import wikipedia
from googleapiclient.discovery import build
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import gymnasium as gym
from gymnasium import spaces
from stable_baselines3 import PPO, DDPG, A2C, SAC
from stable_baselines3.common.vec_env import DummyVecEnv
from dotenv import load_dotenv

# --- Load environment variables from a .env file for security ---
load_dotenv()

# --- API Configuration ---
GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
WOLFRAM_APP_ID = os.environ.get("WOLFRAM_APP_ID")
CUSTOM_SEARCH_ENGINE_ID = os.environ.get("CUSTOM_SEARCH_ENGINE_ID")

# Initialize API clients
try:
    groq_client = groq.Groq(api_key=GROQ_API_KEY)
    wolfram_client = wolframalpha.Client(WOLFRAM_APP_ID)
    google_search_service = build("customsearch", "v1", developerKey=GOOGLE_API_KEY)
    sentiment_analyzer = SentimentIntensityAnalyzer()
except Exception as e:
    print(f"--- ERROR: Failed to initialize API clients. Please check your API keys in the .env file. ---")
    print(f"Error details: {e}")


# --- Environment Definitions ---

class PromptEnv(gym.Env):
    """Base class for the prompt optimization environment."""
    def __init__(self, initial_prompt=""):
        super(PromptEnv, self).__init__()
        self.initial_prompt = initial_prompt
        self.max_features = 100
        self.vectorizer = TfidfVectorizer(max_features=self.max_features)
        dummy_corpus = [
            "explain the theory of relativity", "what are the main causes of climate change",
            "summarize the plot of hamlet", "how does photosynthesis work",
            "describe the process of machine learning", "what is quantum computing"
        ]
        self.vectorizer.fit(dummy_corpus)
        self.observation_space = spaces.Box(low=0, high=1, shape=(self.max_features,), dtype=np.float32)
        self.current_prompt = initial_prompt

    def _get_obs(self):
        vector = self.vectorizer.transform([self.current_prompt]).toarray()
        flat_vector = vector.flatten()
        fixed_size_obs = np.zeros(self.max_features, dtype=np.float32)
        fixed_size_obs[:flat_vector.shape[0]] = flat_vector
        return fixed_size_obs

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        self.current_prompt = self.initial_prompt
        return self._get_obs(), {}

    def _execute_api_calls(self, discrete_action):
        """Helper function to perform API calls with retry logic."""
        # This is a simplified version for training; a full implementation would use the RewardSystem
        return f"Modified prompt based on action {discrete_action}", "API response."

class PromptEnvDiscrete(PromptEnv):
    """Environment with a Discrete action space for PPO and A2C."""
    def __init__(self, initial_prompt=""):
        super().__init__(initial_prompt)
        self.action_space = spaces.Discrete(4)

    def step(self, action):
        discrete_action = action
        modified_prompt, response = self._execute_api_calls(discrete_action)
        total_reward = 1.0
        terminated = True
        truncated = False
        info = {}
        return self._get_obs(), total_reward, terminated, truncated, info

class PromptEnvContinuous(PromptEnv):
    """Environment with a Box (continuous) action space for DDPG and SAC."""
    def __init__(self, initial_prompt=""):
        super().__init__(initial_prompt)
        self.action_space = spaces.Box(low=-1, high=1, shape=(4,), dtype=np.float32)

    def step(self, action):
        discrete_action = np.argmax(action)
        modified_prompt, response = self._execute_api_calls(discrete_action)
        total_reward = 1.0
        terminated = True
        truncated = False
        info = {}
        return self._get_obs(), total_reward, terminated, truncated, info

# --- Model Training and Saving ---
def train_and_save_model(model_name, file_path, training_steps):
    """Creates, trains, and saves a single RL model."""
    model_classes = {'PPO': PPO, 'A2C': A2C, 'DDPG': DDPG, 'SAC': SAC}
    
    if model_name not in model_classes:
        print(f"Error: Model type '{model_name}' not recognized.")
        return

    # Select the correct environment based on the model type
    if model_name in ['PPO', 'A2C']:
        env_fn = lambda: PromptEnvDiscrete()
    else: # DDPG, SAC
        env_fn = lambda: PromptEnvContinuous()
        
    env = DummyVecEnv([env_fn])
    model_class = model_classes[model_name]
    
    print(f"\n--- Creating and Training {model_name} ---")
    model = model_class("MlpPolicy", env, verbose=1)
    
    model.learn(total_timesteps=training_steps)

    print(f"Saving new model to {file_path}...")
    model.save(file_path)
    print(f"--- {model_name} saved successfully. ---")


if __name__ == '__main__':
    initial_training_steps = 1000 # A small number for initial training

    model_files_to_create = {
        "A2C": "a2c_actor.zip",
        "DDPG": "ddpg_agent.zip",
        "PPO": "ppo_model.zip",
        "SAC": "sac_sb3_model.zip"
    }

    # Loop through the dictionary and create each model
    for model_name, file_path in model_files_to_create.items():
        train_and_save_model(model_name, file_path, initial_training_steps)

    print("\nAll models have been created and saved.")




--- Creating and Training A2C ---
Using cpu device
------------------------------------
| time/                 |          |
|    fps                | 1335     |
|    iterations         | 100      |
|    time_elapsed       | 0        |
|    total_timesteps    | 500      |
| train/                |          |
|    entropy_loss       | -1.39    |
|    explained_variance | nan      |
|    learning_rate      | 0.0007   |
|    n_updates          | 99       |
|    policy_loss        | -0       |
|    value_loss         | 0        |
------------------------------------
------------------------------------
| time/                 |          |
|    fps                | 1472     |
|    iterations         | 200      |
|    time_elapsed       | 0        |
|    total_timesteps    | 1000     |
| train/                |          |
|    entropy_loss       | -1.39    |
|    explained_variance | nan      |
|    learning_rate      | 0.0007   |
|    n_updates          | 199      |
|    policy_loss       