#Query Refinement

In [4]:
import openai

def refine_query(query):
    client = openai.OpenAI()  # Create an OpenAI client instance
    
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": "Improve retrieval-focused queries."},
            {"role": "user", "content": f"Refine the following query to focus on causal reasoning:\nQuery: {query}"}
        ]
    )

    return response.choices[0].message.content  # Corrected way to access the response

# Example Usage
raw_query = "What are the effects of chronic stress?"
refined_query = refine_query(raw_query)
print("Refined Query:", refined_query)


Refined Query: Query: What are the potential physical and psychological impacts caused by chronic stress?


In [10]:
import gymnasium as gym
import numpy as np
from stable_baselines3 import PPO

class QueryRefinementEnv(gym.Env):
    def __init__(self, causal_graph):
        super(QueryRefinementEnv, self).__init__()
        self.causal_graph = causal_graph
        self.action_space = gym.spaces.Discrete(3)  # Actions: Expand, Simplify, Decompose
        self.observation_space = gym.spaces.Box(low=0, high=1, shape=(1,), dtype=np.float32)  # Sample observation space

    def reset(self, seed=None, options=None):
        """Reset the environment at the start of an episode."""
        super().reset(seed=seed)  # Ensure compatibility with Gymnasium
        initial_state = np.array([0.0], dtype=np.float32)  # Example initial state
        info = {}  # No additional info needed
        return initial_state, info  # ✅ Must return (observation, info)

    def step(self, action):
        """Take an action and return the new state, reward, done flag, and info."""
        reward = np.random.randint(1, 10)  # Placeholder reward function
        next_state = np.array([np.random.random()], dtype=np.float32)  # Random next state
        done = False  # Keep episode running
        info = {}  # Additional info
        return next_state, reward, done, False, info  # ✅ Must return (state, reward, done, truncated, info)

# ✅ Initialize environment & PPO Model
env = QueryRefinementEnv(causal_graph={})
model = PPO("MlpPolicy", env, verbose=1, device="cpu")  # Use CPU for stability

# ✅ Train the model
model.learn(total_timesteps=1000)


Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
-----------------------------
| time/              |      |
|    fps             | 2612 |
|    iterations      | 1    |
|    time_elapsed    | 0    |
|    total_timesteps | 2048 |
-----------------------------


<stable_baselines3.ppo.ppo.PPO at 0x7e11dc5019a0>

In [11]:
import gymnasium as gym
import numpy as np
from stable_baselines3 import PPO
from stable_baselines3.common.env_checker import check_env

# Define the Query Refinement Environment
class QueryRefinementEnv(gym.Env):
    def __init__(self, causal_graph):
        super(QueryRefinementEnv, self).__init__()
        self.causal_graph = causal_graph
        self.action_space = gym.spaces.Discrete(3)  # [0: Expand, 1: Simplify, 2: Decompose]
        self.observation_space = gym.spaces.Box(low=0.0, high=1.0, shape=(1,), dtype=np.float32)
        self.current_state = np.array([0.0], dtype=np.float32)

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        self.current_state = np.array([0.0], dtype=np.float32)
        return self.current_state, {}

    def step(self, action):
        # Simulated reward function based on mock retrieval success
        reward = np.random.randint(1, 10)  # Replace with actual reward from retrieval engine
        self.current_state = np.array([np.random.rand()], dtype=np.float32)
        terminated = False
        truncated = False
        info = {}
        return self.current_state, reward, terminated, truncated, info

# Initialize environment
env = QueryRefinementEnv(causal_graph={})
check_env(env)  # Optional: check environment compliance

# Train PPO model
model = PPO("MlpPolicy", env, verbose=1, device="cpu")  # Use CPU (recommended for MLP-based PPO)
model.learn(total_timesteps=10000)

# Save model
model.save("ppo_query_refiner")


Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
-----------------------------
| time/              |      |
|    fps             | 2620 |
|    iterations      | 1    |
|    time_elapsed    | 0    |
|    total_timesteps | 2048 |
-----------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 1887         |
|    iterations           | 2            |
|    time_elapsed         | 2            |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0044086277 |
|    clip_fraction        | 4.88e-05     |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.1         |
|    explained_variance   | -8.26e-05    |
|    learning_rate        | 0.0003       |
|    loss                 | 2.66e+03     |
|    n_updates            | 10           |
|    policy_gradient_loss | -0.00251     |
|    val

In [13]:
import openai
import os
from dotenv import load_dotenv

# Load .env file
load_dotenv()

# Retrieve API key from environment variables
api_key = os.getenv("OPENAI_API_KEY")

# Check if the key is loaded correctly
if api_key is None:
    raise ValueError("❌ API key not found! Make sure it's set in the .env file.")

# Initialize OpenAI client
client = openai.OpenAI(api_key=api_key)

def refine_query_with_llm(query: str) -> str:
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": "Refine user queries to focus on causality and improve information retrieval."},
            {"role": "user", "content": f"Refine the following query for better causal retrieval:\n\nQuery: {query}"}
        ]
    )
    return response.choices[0].message.content.strip()

# Example usage
query = "What are the effects of chronic stress?"
refined_query = refine_query_with_llm(query)
print("Refined Query:", refined_query)


Refined Query: Refined Query: What causes and results emerge from chronic stress conditions?


In [17]:
import openai
import os
from dotenv import load_dotenv

# Load .env file
load_dotenv()

# Retrieve API key from environment variables
api_key = os.getenv("OPENAI_API_KEY")

# Check if the key is loaded correctly
if api_key is None:
    raise ValueError("❌ API key not found! Make sure it's set in the .env file.")

# Initialize OpenAI client
client = openai.OpenAI(api_key=api_key)

def refine_query_with_llm(query: str) -> str:
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": "Refine user queries to focus on causality and improve information retrieval."},
            {"role": "user", "content": f"Refine the following query for better causal retrieval:\n\nQuery: {query}"}
        ]
    )
    return response.choices[0].message.content.strip()

# Example usage
query = "Stress and health"
refined_query = refine_query_with_llm(query)
print("Refined Query:", refined_query)


Refined Query: Refined Query: How does stress cause health problems?


In [14]:
from stable_baselines3 import PPO

# Load trained model
model = PPO.load("ppo_query_refiner")

# Simulate a query refinement action
env = QueryRefinementEnv(causal_graph={})
obs, _ = env.reset()
action, _ = model.predict(obs, deterministic=True)
print("RL Agent Suggested Action:", action)


RL Agent Suggested Action: 1


In [16]:
from stable_baselines3 import PPO

# Load trained model
model = PPO.load("ppo_query_refiner")

# Initialize environment
env = QueryRefinementEnv(causal_graph={})

# Reset environment and get initial observation
obs, _ = env.reset()

# Predict refinement action
action, _ = model.predict(obs, deterministic=True)

# Action mapping
action_map = {0: "Expand Query", 1: "Simplify Query", 2: "Decompose Query"}
print("Suggested refinement:", action_map[int(action)])


Suggested refinement: Simplify Query


In [2]:
from neo4j import GraphDatabase
print("Neo4j Driver is working!")

Neo4j Driver is working!


In [1]:
import neo4j
print(neo4j.__file__)  # This should print the correct install path


/home/hao/anaconda3/envs/colm/lib/python3.9/site-packages/neo4j/__init__.py


In [None]:
import pkg_resources
print(pkg_resources.get_distribution("neo4j").version)


5.28.1


  import pkg_resources
