In [None]:
# medications.csv 읽기 (약물 복용 기록)
import pandas as pd
from langchain.schema import Document

df = pd.read_csv("medications.csv")
df["START"] = pd.to_datetime(df["START"])
df["STOP"] = pd.to_datetime(df["STOP"], errors="coerce")

grouped = df.groupby("PATIENT")
medication_docs = []

for patient_id, group in grouped:
    med_grouped = group.groupby("DESCRIPTION")
    text_lines = []

    for description, med_subgroup in med_grouped:
        earliest = med_subgroup["START"].min()
        latest = med_subgroup["STOP"].max() if med_subgroup["STOP"].notnull().any() else None
        total_dispenses = med_subgroup["DISPENSES"].sum()

        start_str = earliest.strftime("%Y-%m")
        if latest is None:
            period_str = f"{start_str} ~ "
        else:
            stop_str = latest.strftime("%Y-%m")
            period_str = start_str if start_str == stop_str else f"{start_str} ~ {stop_str}"

        text_lines.append(f"- {description}: {period_str}, 총 {total_dispenses}회")

    combined_text = f"환자 ID: {patient_id[:8]}\n환자는 다음 약물을 복용했습니다:\n" + "\n".join(text_lines)
    medication_docs.append(Document(page_content=combined_text, metadata={"patient_id": patient_id[:8], "table": "medications"}))

print("환자 수 (문서 수):", len(medication_docs))
print("예시 문서:\n", medication_docs[0].page_content)


In [None]:
# allergies.csv 읽기 (알러지 반응 기록)
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document
import pandas as pd
from typing import List
df = pd.read_csv("allergies.csv")


# 환자별로 그룹화
grouped = df.groupby("PATIENT")

# 문서 리스트
allergy_docs = []

for patient_id, group in grouped:
    text_lines = []
    for _, row in group.iterrows():
        # 한 줄 설명 생성
        line = (
            f"{row['DESCRIPTION']}"
        )

        text_lines.append(line)

    # 전체 문서 구성
    combined_text = f"환자 ID: {patient_id[:8]}\n환자는 다음과 같은 알레르기 반응을 보였습니다:\n" + "\n".join(text_lines)

    # Document 생성
    allergy_docs.append(Document(page_content=combined_text, metadata={"patient_id": patient_id[:8], "table": "allergies"}))

print("환자 수 (문서 수):", len(allergy_docs))
print("예시 문서:\n", allergy_docs[0].page_content)