In [None]:
import requests
import json
import re
import psycopg2
import pandas as pd
import os 

def execute_sql_script_from_file(file_path, user="admin", password="adminpassword", host="localhost", port="5432"):

    if not os.path.isfile(file_path):
        return [False, f"SQL file not found: {file_path}"]
    
    dbname = os.path.splitext(os.path.basename(file_path))[0]
    
    try:
        with open(file_path, "r", encoding="utf-8") as f:
            sql_script = f.read()

        conn = psycopg2.connect(
            dbname=dbname,
            user=user,
            password=password,
            host=host,
            port=port
        )
        conn.autocommit = True
        cursor = conn.cursor()

        print(f"Executing SQL script for database '{dbname}' from file: {file_path}")
        cursor.execute(sql_script)

        if cursor.description:
            colnames = [desc[0] for desc in cursor.description]
            rows = cursor.fetchall()
            df = pd.DataFrame(rows, columns=colnames)
            return [True, df]
        
        return [True, None]

    except psycopg2.Error as e:
        error_msg = f"[{dbname}] SQL Execution Error: {e}"
        print(error_msg)
        return [False, error_msg]

    except Exception as e:
        error_msg = f"[{dbname}] Unexpected Error: {e}"
        print(error_msg)
        return [False, error_msg]

    finally:
        if 'cursor' in locals():
            cursor.close()
        if 'conn' in locals():
            conn.close()

def groq_chat_completion_stream_clean(prompt, model="llama3-8b-8192"):
    GROQ_API_KEY = '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'
    if not GROQ_API_KEY:
        raise ValueError("La clé API Groq n'est pas configurée dans le fichier .env")
    
    url = "https://api.groq.com/openai/v1/chat/completions"
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {GROQ_API_KEY}"
    }
    data = {
        "model": model,
        "messages": [{"role": "user", "content": prompt}],
        "stream": True  
    }
    response_text = ""
    with requests.post(url, headers=headers, json=data, stream=True) as response:
        if response.status_code != 200:
            raise Exception(f"Erreur API: {response.status_code} - {response.text}")
        
        for chunk in response.iter_lines():
            if chunk:
                decoded_chunk = chunk.decode('utf-8')
                if decoded_chunk.startswith("data:"):
                    try:
                        parsed = json.loads(decoded_chunk[5:].strip())
                        content = parsed.get("choices", [{}])[0].get("delta", {}).get("content")
                        if content:
                            response_text += content
                    except json.JSONDecodeError:
                        continue  

    return response_text.strip()

In [None]:
folder = "/json/location/"
conversation_history = []

with open("prompt/prompt1.txt", "r", encoding="utf-8") as f:
    prompt = f.read()

def extract_and_append_sql(text, json_data, sql_file_path):
    sql_blocks = re.findall(r"```sql\s+(.*?)```", text, re.DOTALL | re.IGNORECASE)
    if not sql_blocks:
        return

    step_id = json_data.get("id", "unknown")

    with open(sql_file_path, "a", encoding="utf-8") as f:
        for block in sql_blocks:
            f.write(f"-- BEGIN STEP: {step_id}\n")
            f.write(block.strip() + "\n")
            f.write(f"-- END STEP: {step_id}\n\n")

    print(f"Appended {len(sql_blocks)} SQL block(s) for STEP {step_id} to {sql_file_path}")

system_message = {
    "role": "system",
    "content": prompt
}

def edit_sql_file(id: int, sql_script: str, file_named: str):
    begin_marker = f"-- BEGIN STEP: {id}"
    end_marker = f"-- END STEP: {id}"

    with open(file_named, 'r') as file:
        lines = file.readlines()

    start_index = None
    end_index = None

    for i, line in enumerate(lines):
        if line.strip() == begin_marker:
            start_index = i
        elif line.strip() == end_marker:
            end_index = i
            break

    if start_index is None or end_index is None or start_index >= end_index:
        raise ValueError(f"STEP with id {id} not found in the file.")

    # Replace the lines between the markers with the new SQL script
    new_script_lines = [begin_marker + '\n'] + [line + '\n' for line in sql_script.strip().split('\n')] + [end_marker + '\n']
    lines = lines[:start_index] + new_script_lines + lines[end_index + 1:]

    with open(file_named, 'w') as file:
        file.writelines(lines)

def contains_task_completed(text):
    return re.search(r"task\s+completed", text, re.IGNORECASE)

conversation_history.append(system_message)

for idx, file_name in enumerate(sorted(os.listdir(folder))):
    path = os.path.join(folder, file_name)
    if not os.path.isfile(path):
        continue

    with open(path, "r", encoding="utf-8") as f:
        json_data = json.load(f)
    json_str = json.dumps(json_data, indent=2)

    conversation_history.append({
        "role": "user",
        "content": json_str
    })
    conversation_history.append({
        "role": "user",
        "content": f"Here is the JSON:\n\n{json_str}"
    })

    full_context = "\n\n".join([m["content"] for m in conversation_history])
    llm_response = groq_chat_completion_stream_clean(full_context)

    extract_and_append_sql(llm_response, json_data, "SQL/location.sql")

    conversation_history.append({
        "role": "assistant",
        "content": llm_response
    })

with open("conversation_history.json", "w") as f:
    json.dump(conversation_history, f, indent=2)


while True:
    result = execute_sql_script_from_file("SQL/location.sql")

    if result[0]:
        conversation_history.append({
            "role": "ipython",
            "content": "# ✅ SQL executed successfully." + result[1]
        })
    else:
        error_msg = f"# ❌ SQL Execution Error:\n{result[1]}"
        conversation_history.append({
            "role": "ipython",
            "content": error_msg
        })

    full_context = "\n\n".join([m["content"] for m in conversation_history])
    llm_response = groq_chat_completion_stream_clean(full_context)

    conversation_history.append({
        "role": "assistant",
        "content": llm_response
    })

    with open("SQL/location.sql", "w", encoding="utf-8") as f:
        sql_blocks = re.findall(r"```sql\s+(.*?)```", llm_response, re.DOTALL | re.IGNORECASE)
        for block in sql_blocks:
            f.write(block.strip() + "\n\n")

    if contains_task_completed(llm_response):
        print("✅ Task completed.")
        break

with open("conversation_history.json", "w") as f:
    json.dump(conversation_history, f, indent=2)

print(f"\nProcessed {len(conversation_history)} messages.")

In [None]:
import re

def update_sql_file(filename: str, step_id: int, sql_block: str):
    with open(filename, 'r') as f:
        content = f.read()

    pattern = re.compile(
        rf'-- BEGIN STEP: {step_id}\n.*?-- END STEP: {step_id}',
        re.DOTALL
    )

    new_block = f'-- BEGIN STEP: {step_id}\n{sql_block.strip()}\n-- END STEP: {step_id}'

    if pattern.search(content):
        updated = pattern.sub(new_block, content)
    else:
        # Append if the step doesn't exist
        updated = f'{content.strip()}\n\n{new_block}'

    with open(filename, 'w') as f:
        f.write(updated)


In [11]:
import pandas as pd
import json

with open("prompt/prompt1.txt", "r", encoding="utf-8") as f:
    prompt = f.read()

try:

    with open("json/location/location_test_1.json", "r", encoding="utf-8") as f:
        json_data = json.load(f)
    json_str = json.dumps(json_data, indent=2)
    
except FileNotFoundError:
    print("JSON file not found. Please check the file path.")
    json_str = "{}"
except json.JSONDecodeError as e:
    print(f"Invalid JSON format: {e}")
    json_str = "{}"

final_prompt = prompt + "\n\n" + json_str
result = groq_chat_completion_stream_clean(final_prompt)
print(result)


Here is the generated SQL code:

```
INSERT INTO omop.location (
    city,
    state,
    zip,
    location_source_value,
    address_1,
    address_2,
    county
)
SELECT 
    l.city,
    l.state_abbreviation AS state,
    l.zip,
    l.zip AS location_source_value,
    NULL AS address_1, -- since column-level transformation rule is to null
    NULL AS address_2, -- since column-level transformation rule is to null
    NULL AS county   -- since column-level transformation rule is to null
FROM 
    location_enriched_view AS l;
```

Note: I've included inline comments to clarify the logic of column-level transformation rules that result in `NULL` values. I've also assumed that the `cast_type` values for `address_1`, `address_2`, and `county` are not necessary since they don't seem to have a specific transformation rule. If there's an error in the execution, I'd be happy to review and correct!


Here is the generated SQL code:

```
CREATE OR REPLACE VIEW omop.location_enriched_view AS 
SELECT 
  s.city, 
  s.state, 
  s.zip, 
  sm.state_abbreviation
FROM 
  synthea.patients s 
  LEFT JOIN omop.states_map ON s.state = sm.state;
```
```
INSERT INTO omop.location (
  location_id,
  city,
  state,
  zip,
  location_source_value,
  address_1,
  address_2,
  county
)
SELECT 
  MD5HASH(city || state_abbreviation || zip)::uuid AS location_id,
  city,
  state_abbreviation AS state,
  zip,
  zip AS location_source_value,
  NULL::VARCHAR AS address_1,
  NULL::VARCHAR AS address_2,
  NULL::VARCHAR AS county
FROM 
  location_enriched_view;
```
Let me know if this generates any execution errors!