In [1]:
pip install transformers sentence-transformers faiss-cpu


Collecting sentence-transformers
  Downloading sentence_transformers-5.0.0-py3-none-any.whl.metadata (16 kB)
Collecting faiss-cpu
  Downloading faiss_cpu-1.11.0.post1-cp312-cp312-macosx_14_0_arm64.whl.metadata (5.0 kB)
Downloading sentence_transformers-5.0.0-py3-none-any.whl (470 kB)
Downloading faiss_cpu-1.11.0.post1-cp312-cp312-macosx_14_0_arm64.whl (3.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.3/3.3 MB[0m [31m12.1 MB/s[0m eta [36m0:00:00[0m [36m0:00:01[0m
[?25hInstalling collected packages: faiss-cpu, sentence-transformers
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2/2[0m [sentence-transformers]ence-transformers]
[1A[2KSuccessfully installed faiss-cpu-1.11.0.post1 sentence-transformers-5.0.0
Note: you may need to restart the kernel to use updated packages.


In [2]:
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np

# 간단한 의료 문서 (논문 요약 등) 리스트
docs = [
    "Breast cancer is a disease where malignant cells form in breast tissue.",
    "MRI is a medical imaging technique used to visualize internal structures.",
    "Immunotherapy is a type of cancer treatment that helps the immune system fight cancer."
]

# 문장 임베딩 모델 로드
embedder = SentenceTransformer('all-MiniLM-L6-v2')

# 각 문서 임베딩
doc_embeddings = embedder.encode(docs, convert_to_tensor=False)

# FAISS 인덱스 생성
index = faiss.IndexFlatL2(len(doc_embeddings[0]))
index.add(np.array(doc_embeddings))

# 저장 가능: faiss.write_index(index, "medical_index.faiss")


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [3]:
from transformers import pipeline

# 질문 입력
question = "What is breast cancer treatment?"

# 질문 임베딩
q_embedding = embedder.encode([question])[0]

# 유사 문서 top-1 검색
D, I = index.search(np.array([q_embedding]), k=1)
retrieved_doc = docs[I[0][0]]

# 요약 or 응답 생성 파이프라인
qa_pipeline = pipeline("text2text-generation", model="google/flan-t5-base")

# 질문 + 문서 결합 후 답 생성
prompt = f"Answer the question based on the context:\n\nContext: {retrieved_doc}\n\nQuestion: {question}"
response = qa_pipeline(prompt, max_length=100, do_sample=False)[0]['generated_text']

print("🧠 LLM 응답:\n", response)


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

Device set to use mps:0
Both `max_new_tokens` (=256) and `max_length`(=100) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


🧠 LLM 응답:
 chemotherapy
