In [19]:
import requests
import json
import re
import psycopg2
import pandas as pd

def execute_sql_code_from_string(input_string):
    sql_blocks = re.findall(r'```sql\s*(.*?)\s*```', input_string, re.DOTALL | re.IGNORECASE)
    
    if not sql_blocks:
        print("No SQL block found.")
        return None
    
    try:
        conn = psycopg2.connect(
            dbname="synthea",
            user="admin",
            password="adminpassword",
            host="localhost",
            port="5432"
        )
        conn.autocommit = True
        cursor = conn.cursor()

        for sql_code in sql_blocks:
            print(f"Executing SQL:\n{sql_code}\n---")
            cursor.execute(sql_code)

            if cursor.description:
                # Get column names
                colnames = [desc[0] for desc in cursor.description]
                # Fetch all data
                rows = cursor.fetchall()
                # Convert to DataFrame
                df = pd.DataFrame(rows, columns=colnames)
                return df

        return None  

    except psycopg2.Error as e:
        print(f"SQL Execution Error: {e}")
        return None

    finally:
        if 'cursor' in locals():
            cursor.close()
        if 'conn' in locals():
            conn.close()
            
def groq_chat_completion_stream_clean(prompt, model="llama3-70b-8192"):
    GROQ_API_KEY = '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'
    if not GROQ_API_KEY:
        raise ValueError("La clé API Groq n'est pas configurée dans le fichier .env")
    
    url = "https://api.groq.com/openai/v1/chat/completions"
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {GROQ_API_KEY}"
    }
    data = {
        "model": model,
        "messages": [{"role": "user", "content": prompt}],
        "stream": True  
    }
    response_text = ""
    with requests.post(url, headers=headers, json=data, stream=True) as response:
        if response.status_code != 200:
            raise Exception(f"Erreur API: {response.status_code} - {response.text}")
        
        for chunk in response.iter_lines():
            if chunk:
                decoded_chunk = chunk.decode('utf-8')
                if decoded_chunk.startswith("data:"):
                    try:
                        parsed = json.loads(decoded_chunk[5:].strip())
                        content = parsed.get("choices", [{}])[0].get("delta", {}).get("content")
                        if content:
                            response_text += content
                    except json.JSONDecodeError:
                        continue  

    return response_text.strip()

In [None]:
with open("/prompt/prompt0.text.txt", "r", encoding="utf-8") as f:
    prompt = f.read()
    
json_format = pd.read_json("location.json")
json_str = json_format.to_json(orient="records", indent=2)

final_prompt = prompt + "\n\n" + json_str

result = groq_chat_completion_stream_clean(prompt)
print(result)

```sql
-- Create the target table with the required structure
CREATE TABLE clients (
    client_id INT,
    last_name VARCHAR(255),
    first_name VARCHAR(255),
    birth_date DATE,
    postal_code VARCHAR(10),
    email VARCHAR(255)
);

-- Create a view to handle complex transformations
CREATE VIEW clients_source_view AS
SELECT 
    ID_CLIENT,
    NOM_PRENOM,
    DATE_NAISSANCE,
    ADRESSE_COMPLETE,
    EMAIL
FROM CLIENTS_SOURCE;

-- Insert into target table with transformations
INSERT INTO clients (
    client_id,
    last_name,
    first_name,
    birth_date,
    postal_code,
    email
)
SELECT 
    CSV.ID_CLIENT AS client_id,
    -- Split NOM_PRENOM to extract last name
    SPLIT_PART(CSV.NOM_PRENOM, ' ', 1) AS last_name,
    -- Split NOM_PRENOM to extract first name (handle multiple delimiters)
    CASE 
        WHEN POSITION('_', CSV.NOM_PRENOM) > 0 THEN SPLIT_PART(REPLACE(CSV.NOM_PRENOM, '_', ' '), ' ', 2)
        WHEN POSITION('-', CSV.NOM_PRENOM) > 0 THEN SPLIT_PART(REPLACE(C

In [11]:
check_query = """```sql
SELECT COUNT(*) FROM patients;
```"""
print(execute_sql_code_from_string(check_query))

Exécution du SQL :
SELECT COUNT(*) FROM patients;
---
Exécution terminée avec succès.
None


In [None]:
import psycopg2
from psycopg2.extras import RealDictCursor

def location_table_test(dbname, user, password, host='localhost', port=5432, cdm_schema='cdm') -> bool:
    query = f"""
        WITH duplicates AS (
            SELECT
                address_1,
                address_2,
                city,
                state,
                county,
                zip,
                location_source_value,
                COUNT(*) as count
            FROM {cdm_schema}.location
            GROUP BY
                address_1, address_2, city, state, county, zip, location_source_value
            HAVING COUNT(*) > 1
        )
        SELECT COUNT(*) as duplicate_count FROM duplicates;
    """
    query = f"""
            SELECT COUNT(*) AS total_rows,
               COUNT(DISTINCT location_id) AS unique_location_ids
            FROM omop.location;
    """
    try:
        with psycopg2.connect(
            dbname=dbname,
            user=user,
            password=password,
            host=host,
            port=port
        ) as conn:
            with conn.cursor(cursor_factory=RealDictCursor) as cur:
                cur.execute(query)
                result = cur.fetchone()
                return result['duplicate_count'] == 0
    except Exception as e:
        print(f"Erreur lors de la vérification : {e}")
        return False



llama3.1

Here is the optimized SQL code that transforms the data from the source database to the target database for the target table "clients":

```sql
CREATE OR REPLACE VIEW clients_view AS
SELECT 
    c.ID_CLIENT AS client_id,
    SPLIT(NOM_PRENOM, ' ', 1) AS first_name,
    SPLIT(NOM_PRENOM, ' ', 2) AS last_name,
    TO_DATE(SUBSTRING(DATE_NAISSANCE FROM '^\d{2}/\d{2}/\d{4}$'), 'DD/MM/YYYY') AS birth_date,
    REGEXP_EXTRACT(ADRESSE_COMPLETE, r'\b(\d{5})\b') AS postal_code,
    LOWER(EMAIL) AS email
FROM 
    CLIENTS_SOURCE c;
```

This SQL code creates a view called `clients_view` that performs the described transformations for the target table "clients".

In [None]:
import unittest
import pandas as pd
from sqlalchemy import create_engine

def check_duplicate_locations(database_uri: str) -> pd.DataFrame:
    engine = create_engine(database_uri)
    query = """
    SELECT
        city,
        state,
        zip,
        address_1,
        address_2,
        county,
        location_source_value,
        COUNT(*) AS occurrence_count
    FROM omop.location
    GROUP BY
        city,
        state,
        zip,
        address_1,
        address_2,
        county,
        location_source_value
    HAVING COUNT(*) > 1;
    """
    with engine.connect() as conn:
        return pd.read_sql(query, conn)

class TestOMOPLocationTable(unittest.TestCase):

    def setUp(self):
        self.database_uri = "postgresql://user:password@localhost:5432/your_database"

    def test_no_duplicate_locations(self):
        duplicates = check_duplicate_locations(self.database_uri)
        self.assertTrue(
            duplicates.empty,
            f"❌ Duplicate location entries found in omop.location:\n{duplicates}"
        )

Unnamed: 0,person_id,gender_concept_id,year_of_birth,month_of_birth,day_of_birth,birth_datetime,race_concept_id,ethnicity_concept_id,location_id,provider_id,care_site_id,person_source_value,gender_source_value,gender_source_concept_id,race_source_value,race_source_concept_id,ethnicity_source_value,ethnicity_source_concept_id
0,1,8532,2005,7,6,2005-07-06,0,38003563,314,,,0002513e-8009-d8c4-9bf8-bdbb316deae8,F,0,native,0,hispanic,0
1,2,8532,2000,10,18,2000-10-18,8527,38003563,96,,,00035f01-cb9a-d253-eb67-7007a4e19ded,F,0,white,0,hispanic,0
2,3,8532,2001,9,25,2001-09-25,8527,38003563,103,,,000b8952-a1f1-e576-834c-d55c9d7b0941,F,0,white,0,hispanic,0
3,4,8532,1989,10,1,1989-10-01,8527,38003563,632,,,001046c2-98bd-1b63-14c4-ab8f9a7ddfdc,F,0,white,0,hispanic,0
4,5,8532,1992,11,10,1992-11-10,8527,38003563,456,,,0011424c-182d-59ac-5942-056a7f68d983,F,0,white,0,hispanic,0
