In [1]:
import duckdb
import yaml
import datetime
import openai

In [2]:
with open("config.yaml", "r") as stream:
    try:
        PARAM = yaml.safe_load(stream)
    except yaml.YAMLError as exc:
        print(exc)

openai.api_key  = PARAM['openai_api']
client = openai.OpenAI(api_key = PARAM['openai_api'])

def get_embedding(text, model=PARAM['vector_embedding_model']):
   text = text.replace("\n", " ")
   return client.embeddings.create(input = [text], model=model).data[0].embedding

In [3]:
con = duckdb.connect('md:')

Attempting to automatically open the SSO authorization page in your default browser.
Please open this link to login into your account: https://auth.motherduck.com/activate?user_code=JDHV-JSXG


Token successfully retrieved âœ…

You can display the token and store it as an environment variable to avoid having to log in again:
  PRAGMA PRINT_MD_TOKEN;


In [4]:
con.execute("install duckpgq from community;")
con.execute("load duckpgq;")

con.execute("install ducklake;")
con.execute("load ducklake;")

con.execute("install postgres;")
con.execute("load postgres;")

<_duckdb.DuckDBPyConnection at 0x113620bb0>

Create a ducklake in MotherDuck

In [5]:
con.sql("""CREATE DATABASE ducklake_omop (
    TYPE DUCKLAKE,
    DATA_INLINING_ROW_LIMIT 100
);""")

In [6]:
con.sql(f"""
USE ducklake_omop;
""")

In [7]:
con.execute("""
    CREATE TABLE Trial AS
    SELECT
        *  EXCLUDE (CUI_umls, preferred_umls_name, condition_cui, condition_preferred_umls_name),
        string_split(CUI_umls, ';')                       AS CUI_umls,
        string_split(preferred_umls_name, ';')             AS preferred_umls_name,
        string_split(condition_cui, ';')                   AS condition_cui,
        string_split(condition_preferred_umls_name, ';')   AS condition_preferred_umls_name
    FROM read_csv('drugs/trials_umls_extended.tsv', delim='\t', header=true, auto_detect=true)
""")

<_duckdb.DuckDBPyConnection at 0x113620bb0>

In [8]:
command = """
CREATE TABLE Drug AS
    SELECT *
    FROM read_json_auto('drugs/drugs_8.json');
"""

con.execute(command)


command = """
CREATE TABLE Disease AS
    SELECT *
    FROM read_json_auto('drugs/disease_8.json');
"""

con.execute(command)


command = """
CREATE TABLE MOA AS
    SELECT *
    FROM 'drugs/moa_8_cui.tsv';
"""

con.execute(command)


command = """
CREATE TABLE Drug_Disease AS
    SELECT *
    FROM 'drugs/cui_indication_8.tsv';
"""

con.execute(command)


command = """
CREATE TABLE Drug_MOA AS
    SELECT *
    FROM 'drugs/cui_moa_8_cui.tsv';
"""

con.execute(command)




<_duckdb.DuckDBPyConnection at 0x113620bb0>

In [9]:
con.close()