In [1]:
from sqlalchemy import create_engine, MetaData
import pandas as pd
import os
from dotenv import load_dotenv

load_dotenv(verbose=True,
            dotenv_path='./.env')
engine = create_engine(f'postgresql://{os.getenv("USERNAME")}:{os.getenv("PASSWORD")}@{os.getenv("HOST")}/{os.getenv("DATABASE")}', 
                       connect_args={'options': '-csearch_path={}'.format('de')})

In [2]:
note = create_engine('postgresql://walker103:forcebewithyou@49.50.167.136:5432/synthea_1000', 
                       connect_args={'options': '-csearch_path={}'.format('de')})
clinical_note = pd.read_sql_table('clinical_note', note)

In [3]:
clinical_note.iloc[0][0].split('\n')

['Andrea7 Wolf938',
 'Race:                White',
 'Ethnicity:           Non-Hispanic',
 'Gender:              M',
 'Age:                 55',
 'Birth Date:          1965-04-22',
 'Marital Status:      M',
 '--------------------------------------------------------------------------------',
 'ALLERGIES:',
 'No Known Allergies',
 '--------------------------------------------------------------------------------',
 'ENCOUNTER',
 '2011-06-20 : Encounter at Cape Cod Vet Center : Encounter for Acute bronchitis (disorder)',
 'Type: ambulatory',
 '   ',
 '   MEDICATIONS:',
 '  2011-06-20 : Acetaminophen 325 MG Oral Tablet for Acute bronchitis (disorder)',
 '   ',
 '   CONDITIONS:',
 '  2011-06-20 : Acute bronchitis (disorder)',
 '   ',
 '   CARE PLANS:',
 '  2011-06-20 : Respiratory therapy',
 '                         Reason: Acute bronchitis (disorder)',
 '                         Activity: Recommendation to avoid exercise',
 '                         Activity: Deep breathing and coughing ex

In [20]:
import re
from datetime import datetime

# person info / death date 없음
race = re.search(r'Race:.*\n', clinical_note.iloc[0][0]).group().split()[-1]
birthday = re.search(r'Birth.*\n', clinical_note.iloc[0][0]).group().split()[-1]
birth_year = int(birthday.split("-")[0])
birth_month = int(birthday.split("-")[1])
birth_day = int(birthday.split("-")[2])
gender = re.search(r'Gender.*\n', clinical_note.iloc[0][0]).group().split()[-1]
ethnicity = re.search(r'Ethnicity.*\n', clinical_note.iloc[0][0]).group().split()[-1]

In [21]:
# visit_occurrence info
encounter = re.search(r'ENCOUNTER\n[\w\d\-: ]{2,}', clinical_note.iloc[0][0]).group()
visit_date = encounter.split()[1]
visit_date = datetime.strptime(visit_date, "%Y-%m-%d")
site = " ".join(encounter.split(":")[1].split()[2:])
reason = " ".join(encounter.split(":")[2].split()[2:])

In [22]:
# drug_exposure info
date = re.search(r'MEDICATIONS:\n.*\n', clinical_note.iloc[0][0]).group().split(":")[1].strip()
drug_exposure_date = datetime.strptime(date, "%Y-%m-%d")
prescription = re.search(r'MEDICATIONS:\n.*\n', clinical_note.iloc[0][0]).group().split(":")[-1].strip()
drug_info = re.search('[a-zA-Z ]+\d+ \w+', prescription).group()
drug = re.search('[a-zA-Z ]+', drug_info).group().strip()
medicine = re.search('\d+.*', drug_info).group().split()
dose = medicine[0]
unit = medicine[1]

In [23]:
# condition_occurrence info
conditions = re.search(r'CONDITIONS:\n[\w\d\-: ]{2,}', clinical_note.iloc[0][0]).group()
condition_start_date = datetime.strptime(conditions.split()[1], "%Y-%m-%d")
condition_value = conditions.split(":")[-1].strip() # 진단이 여러개면?

# Insert data

In [24]:
from sqlalchemy import create_engine
from sqlalchemy.orm import Session
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import insert, Table
from models import *
import random

engine = create_engine('postgresql://walker103:forcebewithyou@49.50.167.136:5432/synthea_1000', 
                       connect_args={'options': '-csearch_path={}'.format('walker103')})

In [25]:
connect = engine.connect()
session = Session(bind=engine)
metadata = MetaData()
Base = declarative_base(bind=engine)

In [26]:
# tables
pers = Table('person', metadata, autoload_with=connect)
visit = Table('visit_occurrence', metadata, autoload_with=connect)
drug_table = Table('drug_exposure', metadata, autoload_with=connect)
condition = Table('condition_occurrence', metadata, autoload_with=connect)

In [33]:
b = datetime.strptime(birthday, "%Y-%m-%d")
if visit_date > b and drug_exposure_date > b: # 내원일자, 처방일자 >= 환자의 생년월일
    print('insert!')
    p = insert(pers).values(person_id=random.randint(1, 100000),
                        year_of_birth=birth_year,
                         month_of_birth=birth_month,
                         day_of_birth=birth_day,
                         gender_value=gender,
                         race_value=race,
                         ethnicity_value=ethnicity)
    
    v = insert(visit).values(visit_occurrence_id=random.randint(1,100000),
                             visit_start_date=visit_date,
                              care_site_nm=site,
                              visit_type_value=reason
                             )
    
    d = insert(drug_table).values(drug_exposure_id=random.randint(1,100000),
                           drug_exposure_start_date=drug_exposure_date,
                            drug_value=drug,
                            dose_value=dose,
                            unit_value=unit
                           )
    
    c = insert(condition).values(condition_occurrence_id=random.randint(1, 100000),
                                condition_start_date=condition_start_date,
                                 condition_value=condition_value
                                )
    for sql in [p, v, d, c]:
        connect.execute(sql)
    

insert!
