In [1]:
from google.colab import drive

# 구글 드라이브 마운트
drive.mount('/content/drive')

# 마운트 후 경로 예시
# /content/drive/MyDrive/ 경로에 파일이 있음


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
!pip -q install ipython-sql SQLAlchemy psycopg2-binary "prettytable==3.6.0"
%load_ext sql
%config SqlMagic.style = 'PLAIN_COLUMNS'   # prettytable 호환 이슈 회피용


# supabase DB에 연결되는지 확인

In [3]:
!pip -q install ipython-sql SQLAlchemy psycopg2-binary "prettytable==3.6.0"

%load_ext sql
%config SqlMagic.style = 'PLAIN_COLUMNS'

from urllib.parse import quote_plus

HOST = "aws-1-ap-northeast-2.pooler.supabase.com"   # Supabase Session Pooler 호스트
PORT = 5432                                         # 대시보드의 포트
DB   = "postgres"                                   # DB 이름
USER = "postgres.ghzfugjynevhvhhgcugw"              # 유저(프로젝트별 접미사 포함)
PWD  = quote_plus("mimic4sql")                      # 비번에 특문 있으면 반드시 인코딩

# ✅ 올바른 접속 문자열 (드라이버 + SSL 필수)
CONN = f"postgresql+psycopg2://{USER}:{PWD}@{HOST}:{PORT}/{DB}?sslmode=require"

print(CONN)   # 확인용 (앞뒤 공백/개행 없는지 확인)
%sql $CONN    # 연결

# 테스트 쿼리
%sql SELECT current_database() AS db, current_user AS usr, version() AS ver;


The sql extension is already loaded. To reload it, use:
  %reload_ext sql
postgresql+psycopg2://postgres.ghzfugjynevhvhhgcugw:mimic4sql@aws-1-ap-northeast-2.pooler.supabase.com:5432/postgres?sslmode=require
(psycopg2.errors.SyntaxError) syntax error at or near "#"
LINE 1: # 연결
        ^

[SQL: # 연결]
(Background on this error at: https://sqlalche.me/e/20/f405)
 * postgresql+psycopg2://postgres.ghzfugjynevhvhhgcugw:***@aws-1-ap-northeast-2.pooler.supabase.com:5432/postgres?sslmode=require
1 rows affected.


db,usr,ver
postgres,postgres,"PostgreSQL 17.6 on aarch64-unknown-linux-gnu, compiled by gcc (GCC) 13.2.0, 64-bit"


In [4]:
%%sql
CREATE EXTENSION IF NOT EXISTS vector;
SELECT extname, extversion FROM pg_extension WHERE extname='vector';


 * postgresql+psycopg2://postgres.ghzfugjynevhvhhgcugw:***@aws-1-ap-northeast-2.pooler.supabase.com:5432/postgres?sslmode=require
Done.
1 rows affected.


extname,extversion
vector,0.8.0


# 연결하고 스키마 준비 및 CSV파일 테이블로 업로드

In [5]:
from sqlalchemy import create_engine, text
from urllib.parse import quote_plus

HOST = "aws-1-ap-northeast-2.pooler.supabase.com"
PORT = 5432
DB   = "postgres"
USER = "postgres.ghzfugjynevhvhhgcugw"
PWD  = quote_plus("mimic4sql")

CONN = (f"postgresql+psycopg2://{USER}:{PWD}@{HOST}:{PORT}/{DB}"
        "?sslmode=require&options=-csearch_path%3Dmimic4,public")
engine = create_engine(CONN)

with engine.begin() as conn:
    # 내부에서 실제 어떤 롤로 보이는지 확인
    owner = conn.execute(text("SELECT current_user")).scalar()
    print("current_user =", owner)

    # 1) 스키마 생성 (AUTHORIZATION 생략하면 현재 유저가 오너가 됨)
    conn.execute(text("CREATE SCHEMA IF NOT EXISTS mimic4;"))

    # 2) (선택) 명시적으로 오너 지정하고 싶으면 CURRENT_USER로
    conn.execute(text("ALTER SCHEMA mimic4 OWNER TO CURRENT_USER;"))

    # 3) 이 세션 search_path 설정(이미 접속 옵션에 넣었지만 한번 더 안전하게)
    conn.execute(text("SET search_path TO mimic4, public;"))


current_user = postgres


### 진행률 표시를 위한 tqdm 설치 및 함수 선언

In [6]:
!pip -q install tqdm

In [7]:
import math
from tqdm.auto import tqdm

def to_sql_with_progress(df, table_name, engine, schema="public", chunksize=50_000, method="multi"):
    """
    DataFrame을 청크로 나눠 업로드하며 tqdm 진행바를 표시.
    첫 청크는 replace, 이후는 append로 이어붙임.
    """
    n = len(df)
    if n == 0:
        print(f"[{table_name}] 빈 DataFrame — 건너뜀")
        return

    total_chunks = math.ceil(n / chunksize)
    first = True

    for start in tqdm(range(0, n, chunksize), total=total_chunks, desc=f"Uploading {table_name}"):
        end = min(start + chunksize, n)
        chunk = df.iloc[start:end]
        chunk.to_sql(
            table_name, engine, schema=schema,
            if_exists="replace" if first else "append",
            index=False, chunksize=chunksize, method=method
        )
        first = False

    print(f"[{table_name}] 업로드 완료: {n:,} rows · {total_chunks} chunks")


In [8]:
# 4) CSV 로드 (여기서 실제로 patients_df / discharge_df 변수를 만듭니다)
import pandas as pd

PATIENTS_CSV  = "/content/drive/MyDrive/DILAB/MARS/mimiciv_3.1/files/hosp/patients.csv"
DISCHARGE_CSV = "/content/drive/MyDrive/DILAB/MARS/mimic-iv-note_2.2/files/note/discharge.csv"

patients_df  = pd.read_csv(PATIENTS_CSV, dtype=str, keep_default_na=False)
discharge_df = pd.read_csv(DISCHARGE_CSV, dtype=str, keep_default_na=False)

print("patients rows:", len(patients_df), " | discharge rows:", len(discharge_df))

# 5) 진행바로 업로드 (중복 방지: 아래 두 줄만! 기존 to_sql 직접 호출은 삭제)
to_sql_with_progress(patients_df,  "patients_raw",  engine, schema="mimic4", chunksize=50_000, method="multi")
to_sql_with_progress(discharge_df, "discharge_raw", engine, schema="mimic4", chunksize=1000, method="multi")

patients rows: 364627  | discharge rows: 331793


Uploading patients_raw:   0%|          | 0/8 [00:00<?, ?it/s]

[patients_raw] 업로드 완료: 364,627 rows · 8 chunks


Uploading discharge_raw:   0%|          | 0/332 [00:00<?, ?it/s]

[discharge_raw] 업로드 완료: 331,793 rows · 332 chunks


In [None]:
from sqlalchemy import text
with engine.begin() as conn:
    c1 = conn.execute(text("SELECT COUNT(*) FROM mimic4.patients_raw")).scalar()
    c2 = conn.execute(text("SELECT COUNT(*) FROM mimic4.discharge_raw")).scalar()
c1, c2


In [None]:
!service postgresql status || true
!service postgresql start
!pg_isready -h 127.0.0.1 -p 5432


postgresql: unrecognized service
postgresql: unrecognized service
/bin/bash: line 1: pg_isready: command not found


In [None]:
# 1) 스키마 분리(없으면 생성)
with engine.begin() as conn:
    conn.execute(text("CREATE SCHEMA IF NOT EXISTS hosp;"))
    conn.execute(text("CREATE SCHEMA IF NOT EXISTS note;"))

# 2) CSV 읽기
#    - 대용량이면 chunksize로 나누어 적재 가능
#    - 날짜 컬럼 자동 파싱을 원하면 parse_dates에 해당 컬럼명을 넣으세요(모르면 일단 생략)
patients_df  = pd.read_csv(PATIENTS_CSV)      # parse_dates=['dod'] 등 필요시 지정
discharge_df = pd.read_csv(DISCHARGE_CSV)     # parse_dates=['charttime'] 등 필요시 지정

# 3) 적재 (테이블명은 예시: hosp.patients, note.discharge)
#    - 처음엔 if_exists="replace"로 스키마를 pandas가 생성하게 한 뒤
#    - 이후에는 "append"로 추가 적재하세요.
patients_df.to_sql("patients", engine, schema="hosp", if_exists="replace", index=False)
discharge_df.to_sql("discharge", engine, schema="note", if_exists="replace", index=False)

# 4) 간단 검증
with engine.begin() as conn:
    print(conn.execute(text("SELECT COUNT(*) FROM hosp.patients")).scalar())
    print(conn.execute(text("SELECT COUNT(*) FROM note.discharge")).scalar())


OperationalError: (psycopg2.OperationalError) connection to server at "127.0.0.1", port 5432 failed: Connection refused
	Is the server running on that host and accepting TCP/IP connections?

(Background on this error at: https://sqlalche.me/e/20/e3q8)