In [9]:
import requests
import json
import re
import psycopg2
import pandas as pd

def execute_sql_code_from_string(input_string):
    
    sql_blocks = re.findall(r'```sql\s*(.*?)\s*```', input_string, re.DOTALL | re.IGNORECASE)
    
    if not sql_blocks:
        return [False, "No SQL block found."]
    
    try:
        conn = psycopg2.connect(
            dbname="synthea",
            user="admin",
            password="adminpassword",
            host="localhost",
            port="5432"
        )
        conn.autocommit = True
        cursor = conn.cursor()
        
        for sql_code in sql_blocks:
            print(f"Executing SQL:\n{sql_code}\n---")
            cursor.execute(sql_code)
            if cursor.description:
                # Get column names
                colnames = [desc[0] for desc in cursor.description]
                # Fetch all data
                rows = cursor.fetchall()
                # Convert to DataFrame
                df = pd.DataFrame(rows, columns=colnames)
                return [True, df]
        
        return [True, None]
        
    except psycopg2.Error as e:
        error_msg = f"SQL Execution Error: {e}"
        print(error_msg)
        return [False, error_msg]
    except Exception as e:
        error_msg = f"Unexpected Error: {e}"
        print(error_msg)
        return [False, error_msg]
    finally:
        if 'cursor' in locals():
            cursor.close()
        if 'conn' in locals():
            conn.close()
            
def groq_chat_completion_stream_clean(prompt, model="llama3-8b-8192"):
    GROQ_API_KEY = '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'
    if not GROQ_API_KEY:
        raise ValueError("La clé API Groq n'est pas configurée dans le fichier .env")
    
    url = "https://api.groq.com/openai/v1/chat/completions"
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {GROQ_API_KEY}"
    }
    data = {
        "model": model,
        "messages": [{"role": "user", "content": prompt}],
        "stream": True  
    }
    response_text = ""
    with requests.post(url, headers=headers, json=data, stream=True) as response:
        if response.status_code != 200:
            raise Exception(f"Erreur API: {response.status_code} - {response.text}")
        
        for chunk in response.iter_lines():
            if chunk:
                decoded_chunk = chunk.decode('utf-8')
                if decoded_chunk.startswith("data:"):
                    try:
                        parsed = json.loads(decoded_chunk[5:].strip())
                        content = parsed.get("choices", [{}])[0].get("delta", {}).get("content")
                        if content:
                            response_text += content
                    except json.JSONDecodeError:
                        continue  

    return response_text.strip()

In [None]:
import os
import pandas as pd

folder = "/json/location/"
conversation_history = [] 

for file_name in os.listdir(folder):
    path = os.path.join(folder, file_name)
    if os.path.isfile(path):
        json_format = pd.read_json(path)
        json_str = json_format.to_json(orient="records", indent=2)
        
        context = "\n\n".join(conversation_history) if conversation_history else ""
        final_prompt = prompt + "\n\n" + json_str
        if context:
            final_prompt = context + "\n\n" + final_prompt
        
        llm_response = groq_chat_completion_stream_clean(final_prompt)
        
        conversation_history.append(f"User: Processing file {file_name}")
        conversation_history.append(f"Data: {json_str}")
        conversation_history.append(f"Assistant: {llm_response}")
        
        sql_output = execute_sql_code_from_string(llm_response)
        
        if sql_output[0]: 

            tool_response = 'Code successfully executed' + "\n\n" + str(sql_output[1])
            
            context_with_success = "\n\n".join(conversation_history) + "\n\n" + tool_response
            final_result = groq_chat_completion_stream_clean(context_with_success)
            
            conversation_history.append(f"System: {tool_response}")
            conversation_history.append(f"Assistant: {final_result}")
            
            print(f"✅ Successfully processed {file_name}")
            print(f"Final result: {final_result}")
            
        else:
            error_response = f'Code execution failed: {sql_output[1]}'
            
            context_with_error = "\n\n".join(conversation_history) + "\n\n" + error_response
            error_result = groq_chat_completion_stream_clean(context_with_error)
            
            conversation_history.append(f"System: {error_response}")
            conversation_history.append(f"Assistant: {error_result}")
            
            print(f"❌ Failed to process {file_name}")
            print(f"Error: {sql_output[1]}")
            print(f"LLM error response: {error_result}")
        
        print("-" * 50)  

print(f"\nProcessed {len([f for f in os.listdir(folder) if os.path.isfile(os.path.join(folder, f))])} files")
print(f"Total conversation history entries: {len(conversation_history)}")

with open("conversation_history.txt", "w") as f:
    f.write("\n\n".join(conversation_history))

In [11]:
import pandas as pd
import json

with open("prompt/prompt1.txt", "r", encoding="utf-8") as f:
    prompt = f.read()

try:

    with open("json/location/location_test_1.json", "r", encoding="utf-8") as f:
        json_data = json.load(f)
    json_str = json.dumps(json_data, indent=2)
    
except FileNotFoundError:
    print("JSON file not found. Please check the file path.")
    json_str = "{}"
except json.JSONDecodeError as e:
    print(f"Invalid JSON format: {e}")
    json_str = "{}"

final_prompt = prompt + "\n\n" + json_str
result = groq_chat_completion_stream_clean(final_prompt)
print(result)


Here is the generated SQL code:

```
INSERT INTO omop.location (
    city,
    state,
    zip,
    location_source_value,
    address_1,
    address_2,
    county
)
SELECT 
    l.city,
    l.state_abbreviation AS state,
    l.zip,
    l.zip AS location_source_value,
    NULL AS address_1, -- since column-level transformation rule is to null
    NULL AS address_2, -- since column-level transformation rule is to null
    NULL AS county   -- since column-level transformation rule is to null
FROM 
    location_enriched_view AS l;
```

Note: I've included inline comments to clarify the logic of column-level transformation rules that result in `NULL` values. I've also assumed that the `cast_type` values for `address_1`, `address_2`, and `county` are not necessary since they don't seem to have a specific transformation rule. If there's an error in the execution, I'd be happy to review and correct!


Here is the generated SQL code:

```
CREATE OR REPLACE VIEW omop.location_enriched_view AS 
SELECT 
  s.city, 
  s.state, 
  s.zip, 
  sm.state_abbreviation
FROM 
  synthea.patients s 
  LEFT JOIN omop.states_map ON s.state = sm.state;
```
```
INSERT INTO omop.location (
  location_id,
  city,
  state,
  zip,
  location_source_value,
  address_1,
  address_2,
  county
)
SELECT 
  MD5HASH(city || state_abbreviation || zip)::uuid AS location_id,
  city,
  state_abbreviation AS state,
  zip,
  zip AS location_source_value,
  NULL::VARCHAR AS address_1,
  NULL::VARCHAR AS address_2,
  NULL::VARCHAR AS county
FROM 
  location_enriched_view;
```
Let me know if this generates any execution errors!