In [None]:
!pip install qdrant-client sentence-transformers transformers torch accelerate openai bitsandbytes

In [49]:
import json, uuid, os, torch
from typing import List, Dict, Any
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from sentence_transformers import SentenceTransformer
from qdrant_client import QdrantClient
from qdrant_client.models import PointStruct, VectorParams, Distance
import openai
from google.colab import userdata

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Device: {device}")

Device: cuda


### DB 구축하기

In [None]:
embedding_model = SentenceTransformer('dragonkue/BGE-m3-ko')

In [4]:
DB_PATH = "/content/drive/MyDrive/yonsei/YAI/qdrant_bge"
COLLECTION_NAME = "dcinside"

client = QdrantClient(path=DB_PATH)

collections = client.get_collections()
collection_names = [c.name for c in collections.collections]

if COLLECTION_NAME in collection_names:
    print(f" existing collection '{COLLECTION_NAME}'")
else:
    client.create_collection(
        collection_name=COLLECTION_NAME,
        vectors_config=VectorParams(size=1024, distance=Distance.COSINE)
    )
    print(f" new collection '{COLLECTION_NAME}' ")

 existing collection 'dcinside'


인덱스 생성하기

In [None]:
data_path = "/content/drive/MyDrive/yonsei/YAI/merged_dataset.json"

with open(data_path, 'r', encoding='utf-8') as f:
    dataset = json.load(f)

points_to_upsert = []
batch_size = 100

for idx, item in enumerate(dataset):
    main_text = item.get('main', '')
    comments = item.get('comments', [])
    comments_text = ' '.join(comments) if comments else ''

    full_text = f"{main_text} {comments_text}".strip()

    if not full_text:
        continue

    vector = embedding_model.encode(full_text).tolist()

    payload = {
        "date": item.get('date', ''),
        "main": main_text,
        "comments": comments,
        "source_url": item.get('source_url', ''),
        "gallery": item.get('gallery', ''),
        "full_text": full_text
    }

    point = PointStruct(
        id=str(uuid.uuid4()),
        vector=vector,
        payload=payload
    )
    points_to_upsert.append(point)

    if len(points_to_upsert) >= batch_size:
        client.upsert(
            collection_name=COLLECTION_NAME,
            points=points_to_upsert,
            wait=True
        )
        print(f"Indexed {idx + 1}/{len(dataset)} items...")
        points_to_upsert = []

if points_to_upsert:
    client.upsert(
        collection_name=COLLECTION_NAME,
        points=points_to_upsert,
        wait=True
    )

print(f"index completed : {len(dataset)}")

In [None]:
openai.api_key = userdata.get('OPENAI_KEY')

### RAG 함수 정의

In [5]:
def search_documents(query: str, top_k: int = 3, gallery_filter: str = None) -> List[Dict]:
    query_vector = embedding_model.encode(query).tolist()

    query_filter = None
    if gallery_filter:
        from qdrant_client.models import Filter, FieldCondition, MatchValue
        query_filter = Filter(
            must=[
                FieldCondition(
                    key="gallery",
                    match=MatchValue(value=gallery_filter)
                )
            ]
        )

    search_response = client.query_points(
        collection_name=COLLECTION_NAME,
        query=query_vector,
        query_filter=query_filter,
        limit=top_k,
        with_payload=True
    )

    results = []
    for hit in search_response.points:
        results.append({
            "score": hit.score,
            "date": hit.payload.get('date', ''),
            "main": hit.payload.get('main', ''),
            "comments": hit.payload.get('comments', []),
            "source_url": hit.payload.get('source_url', ''),
            "gallery": hit.payload.get('gallery', ''),
            "full_text": hit.payload.get('full_text', '')
        })

    return results

In [6]:
def _prepare_rag_context(query: str, top_k: int = 3, gallery_filter: str = None) -> tuple:
    retrieved_docs = search_documents(query, top_k=top_k, gallery_filter=gallery_filter)

    context_parts = []
    for i, doc in enumerate(retrieved_docs, 1):
        context_parts.append(f"[문서 {i}]")
        context_parts.append(f"날짜: {doc['date']}")
        context_parts.append(f"갤러리: {doc['gallery']}")
        context_parts.append(f"내용: {doc['main']}")
        if doc['comments']:
            context_parts.append(f"댓글: {', '.join(doc['comments'][:3])}")
        context_parts.append("")

    context = "\n".join(context_parts)
    return retrieved_docs, context

api로 데려오기

In [None]:
def generate_rag_response_api(
    query: str,
    model: str = "gpt-4o-mini",
    top_k: int = 3,
    gallery_filter: str = None,
    max_tokens: int = 512,
    temperature: float = 0.7
) -> Dict[str, Any]:

    retrieved_docs, context = _prepare_rag_context(query, top_k, gallery_filter)
    prompt = f"""다음 문서에 있는대로 질문에 답변해주세요.

{context}

질문: {query}

답변:"""

    response = openai.chat.completions.create(
        model=model,
        messages=[
            {"role": "system", "content": "당신은 제공된 문서에 있는대로 답변하는 도우미입니다."},
            {"role": "user", "content": prompt}
        ],
        max_tokens=max_tokens,
        temperature=temperature
    )
    answer = response.choices[0].message.content.strip()

    return {
        "query": query,
        "answer": answer,
        "retrieved_docs": retrieved_docs,
        "model_used": model
    }

In [None]:
!ls -lh ~/.cache/huggingface/hub | grep models

drwxr-xr-x 6 root root 4.0K Nov 21 05:35 models--dragonkue--BGE-m3-ko
drwxr-xr-x 6 root root 4.0K Nov 21 07:47 models--naver-hyperclovax--HyperCLOVAX-SEED-Text-Instruct-1.5B
drwxr-xr-x 6 root root 4.0K Nov 21 06:20 models--QuixiAI--WizardLM-7B-Uncensored


로컬 모델로 데려오기

In [None]:
# model_name = "naver-hyperclovax/HyperCLOVAX-SEED-Text-Instruct-1.5B"
# model_name = "EleutherAI/polyglot-ko-5.8b"
# model_name = "LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct"
# model_name = "QuixiAI/WizardLM-7B-Uncensored" 한국어를 못함
# model_name = "MLP-KTLim/llama-3-Korean-Bllossom-8B" 코랩에서 안돌아감 (양자화 필요)
# model_name = "Qwen/Qwen2.5-3B"
# model_name = "chuanli11/Llama-3.2-3B-Instruct-uncensored"
# model_name = "bartowski/Llama-3.2-3B-Instruct-uncensored-GGUF"
model_name = "s5ya/Ko-Llama-3.1-8B-Lexi-Uncensored-V2"

# 양자화 필요할 시 (코랩 기준 8B 이상)
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True
)

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=quantization_config, # 필요할 시
    torch_dtype=torch.float16 if device == "cuda" else torch.float32,
    device_map="auto" if device == "cuda" else None,
    low_cpu_mem_usage=True
)

print('load done !')

In [44]:
def generate_rag_response_local(
    query: str,
    top_k: int = 3,
    gallery_filter: str = None,
    max_tokens: int = 200,
    temperature: float = 0.2
) -> Dict[str, Any]:

    retrieved_docs, context = _prepare_rag_context(query, top_k, gallery_filter)

    # Prompt 구성
    prompt = f"""Answer the question based ONLY on the provided documents.
  Do not make up information.

  Documents:
  {context}

  Question: {query}

  Answer:

  Answer in Korean based on the documents:"""

    inputs = tokenizer(prompt, return_tensors="pt").to(device)

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=max_tokens,
            temperature=temperature,
            do_sample=True,
            top_p=0.8,
            pad_token_id=tokenizer.eos_token_id,
            repetition_penalty = 1.3
        )

    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    answer = generated_text.split("Answer:")[-1].strip()

    return {
        "query": query,
        "answer": answer
    }

테스트 1. 질문에 답해보거라

In [45]:
result = generate_rag_response_local("여자도 군대에 가야한다고 생각해?")
print(result)

{'query': '여자도 군대에 가야한다고 생각해?', 'answer': 'Answer the question based ONLY on the provided documents. \n  Do not make up information.\n\n  Documents:\n  [문서 1]\n날짜: 2024.05.12 13:19:15\n갤러리: \n내용: 여성은 군대 3년 보내라\n댓글: 이, ㅇㅈ\n\n[문서 2]\n날짜: 2025.03.12 23:47:04\n갤러리: \n내용: 나 여잔데 군대가고싶음\n댓글: 통일되도 여자도 군대가는 제도로 바꿔야함\n\n[문서 3]\n날짜: 2023.05.12 17:59:50\n갤러리: \n내용: 여자가 군대가는건 애 안낳아서 가는거.\n댓글: 천재인데?\n\n\n  Question: 여자도 군대에 가야한다고 생각해?\n\n  Answer in Korean based on the documents: \n\n   (no answer)    // if there is no relevant document or clear opinion expressed\n\n\n      (yes/strongly agree)\n        : \n\n\n       (somehow/agree)\n         : \n\n\n\n     (disagree/somewhat disagree)\n          : \n\n\n\n\n     (clearly/dismissively disagree)\n           : \n\n\n\n\nNote:\n\n* The response should be indicated by one of the above options only.\n* If a strong emotion like "hate" or "love" appears in any comment but it\'s related to another user and not directly answering your question then ignore t

In [None]:
def interactive_rag():
    while True:
        user_input = input("\n질문: ").strip()
        if user_input.lower() in ['quit', 'exit']:
            print("\n대화를 종료합니다.")
            break

In [None]:
interactive_rag()