In [1]:
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams, PointStruct
from qdrant_client.http import models
import uuid
from tqdm import tqdm
import json
from typing import List, Dict, Any
import re
from FlagEmbedding import BGEM3FlagModel

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from dotenv import load_dotenv
import os

load_dotenv()

qdrant_url = os.getenv("QDRANT_URL")
api_key = os.getenv("QDRANT_API_KEY")
collection_name = os.getenv("QDRANT_COLLECTION")

### class langchain recursive+bge-m3

In [5]:
class LegalDocumentVectorDB:
    def __init__(self, qdrant_url: str, api_key: str, collection_name: str = "hukuki kararlar"):
        """
        Hukuki belgeler i√ßin vector database sƒ±nƒ±fƒ±
        
        Args:
            qdrant_url: Qdrant sunucu URL'i
            api_key: Qdrant API anahtarƒ±  
            collection_name: Collection adƒ±
        """
        self.client = QdrantClient(
            url=qdrant_url,
            api_key=api_key,
            timeout=60
        )
        self.collection_name = collection_name
        
        print("BGE-M3 modeli y√ºkleniyor...")
        self.model = BGEM3FlagModel("BAAI/bge-m3", use_fp16=True)  # fp16 hƒ±z i√ßin
        print("Model ba≈üarƒ±yla y√ºklendi!")

        self.vector_size = self.model.encode(
            ["test"], 
            return_dense=True, 
            return_sparse=False, 
            return_colbert_vecs=False
        )["dense_vecs"].shape[1]
        print(f"Vector boyutu: {self.vector_size}")

    def create_collection(self, recreate: bool = False):
        """Collection olu≈ütur"""
        try:
            if recreate:
                self.client.delete_collection(collection_name=self.collection_name)
                print(f"Eski collection '{self.collection_name}' silindi.")
        except:
            pass
        
        try:
            self.client.create_collection(
                collection_name=self.collection_name,
                vectors_config=VectorParams(
                    size=self.vector_size, 
                    distance=Distance.COSINE
                )
            )
            print(f"Collection '{self.collection_name}' olu≈üturuldu.")
        except Exception as e:
            if "already exists" in str(e).lower():
                print(f"Collection '{self.collection_name}' zaten mevcut.")
            else:
                raise e

    def clean_text(self, text: str) -> str:
        """Metni temizle"""
        if pd.isna(text):
            return ""
        
        # Encoding sorunlarƒ±nƒ± d√ºzelt
        text = str(text)
        replacements = {
            '√É¬§': '√§', '√É¬∂': '√∂', '√É¬º': '√º', '√É≈∏': '√ü',
            '√É‚Ä°': '√á', '√Ñ¬±': 'ƒ±', '√Ñ¬∞': 'ƒ∞', '√Ö≈∏': '≈ü',
            '√Ñ\x9f': 'ƒü', '√É¬ß': '√ß', '√É¬∂': '√∂', '√É¬º': '√º'
        }
        
        for old, new in replacements.items():
            text = text.replace(old, new)
        
        # Fazla bo≈üluklarƒ± temizle
        text = re.sub(r'\s+', ' ', text)
        text = text.strip()
        
        return text

    def process_csv(self, csv_path: str) -> pd.DataFrame:
        """CSV dosyasƒ±nƒ± i≈üle"""
        print(f"CSV dosyasƒ± okunuyor: {csv_path}")
        
        # Dosya varlƒ±ƒüƒ±nƒ± kontrol et
        import os
        if not os.path.exists(csv_path):
            print(f"‚ùå HATA: Dosya bulunamadƒ±: {csv_path}")
            return None
        
        try:
            # Encoding denemesi
            try:
                df = pd.read_csv(csv_path, encoding='utf-8')
                print("‚úÖ UTF-8 encoding ile ba≈üarƒ±yla okundu")
            except UnicodeDecodeError:
                df = pd.read_csv(csv_path, encoding='latin-1')
                print("‚úÖ Latin-1 encoding ile ba≈üarƒ±yla okundu")
        except Exception as e:
            print(f"‚ùå CSV okuma hatasƒ±: {e}")
            return None
        
        print(f"Toplam satƒ±r sayƒ±sƒ±: {len(df)}")
        print(f"S√ºtunlar: {df.columns.tolist()}")
        
        # Bo≈ü chunk_text'leri filtrele
        initial_count = len(df)
        df = df.dropna(subset=['chunk_text'])
        df = df[df['chunk_text'].str.strip() != '']
        final_count = len(df)
        
        print(f"Bo≈ü metin filtrelemesi: {initial_count} -> {final_count}")
        
        # Metinleri temizle
        df['chunk_text_clean'] = df['chunk_text'].apply(self.clean_text)
        
        return df

    def create_embeddings_batch(self, texts: List[str], batch_size: int = 32) -> List[List[float]]:
        """Metinleri batch halinde embedding'e √ßevir"""
        embeddings = []
        
        for i in tqdm(range(0, len(texts), batch_size), desc="Embedding olu≈üturuluyor"):
            batch = texts[i:i+batch_size]
            batch_embeddings = self.model.encode(batch)["dense_vecs"]

            embeddings.extend(batch_embeddings.tolist())
            #print(embeddings)
        
        return embeddings

    def upload_to_qdrant(self, df: pd.DataFrame, batch_size: int = 100):
        """DataFrame'i Qdrant'a y√ºkle"""
        print("Qdrant'a y√ºkleme ba≈ülƒ±yor...")
        
        # Embeddings olu≈ütur
        texts = df['chunk_text_clean'].tolist()
        embeddings = self.create_embeddings_batch(texts, batch_size=32)
        
        # Points olu≈ütur
        points = []
        for idx, (_, row) in enumerate(df.iterrows()):
            payload = {
            "document_id": str(row['_id']),
            "location": str(row['location']),
            "dates": str(row['extractedDates']),
            "esas_no": str(row['esasNo']),
            "karar_no": str(row['kararNo']),
            "esas_no_num": str(row['esasNo_num']),
            "esas_no_tip": str(row['esasNo_tip']),
            "karar_no_num": str(row['kararNo_num']),
            "karar_no_tip": str(row['kararNo_tip']),
            "chunk_id": str(row['chunk_id']),
            "chunk_text": str(row['chunk_text']),
            "token_count": int(row['token_count']),
            "num_sentences": int(row['num_sentences'])
        }

            
            point = PointStruct(
                id=str(uuid.uuid4()),
                vector=embeddings[idx],
                payload=payload
            )
            points.append(point)
        
        # Batch halinde y√ºkle
        for i in tqdm(range(0, len(points), batch_size), desc="Qdrant'a y√ºkleniyor"):
            batch_points = points[i:i+batch_size]
            
            try:
                self.client.upsert(
                    collection_name=self.collection_name,
                    points=batch_points
                )
            except Exception as e:
                print(f"Y√ºkleme hatasƒ± (batch {i//batch_size + 1}): {e}")
                continue
        
        print(f"Toplam {len(points)} dok√ºman ba≈üarƒ±yla y√ºklendi!")

    def search(self, query: str, limit: int = 5, score_threshold: float = 0.5) -> List[Dict[str, Any]]:
        """Semantik arama yap"""
        print(f"Arama yapƒ±lƒ±yor: '{query}'")
        
        # Query embedding'i olu≈ütur
        query_embedding = self.model.encode([query])["dense_vecs"][0].tolist()
        
        # Arama yap (query_points kullan)
        search_results = self.client.query_points(
            collection_name=self.collection_name,
            query=query_embedding,
            limit=limit,
            score_threshold=score_threshold
        )
        
        results = []
        for result in search_results.points:
            results.append({
                "score": result.score,
                "payload": result.payload
            })
        
        return results

    def advanced_search(self, query: str, filters: Dict = None, limit: int = 5) -> List[Dict[str, Any]]:
        """Geli≈ümi≈ü filtreleme ile arama"""
        embedding_output = self.model.encode([query], convert_to_numpy=True)
        query_embedding = embedding_output["dense_vecs"][0].tolist()
        
        # Filter olu≈ütur
        filter_conditions = None
        if filters:
            conditions = []
            
            # if 'daire' in filters:
            #     conditions.append(models.FieldCondition(
            #         key="daire",
            #         match=models.MatchValue(value=filters['daire'])
            #     ))
            
            if 'kararNo_tip' in filters:
                conditions.append(models.FieldCondition(
                    key="kararNo_tip", 
                    match=models.MatchValue(value=filters['kararNo_tip'])
                ))
            
            if 'year' in filters:
                conditions.append(models.FieldCondition(
                    key="dates",
                    match=models.MatchText(text=str(filters['year']))
                ))
            
            if conditions:
                filter_conditions = models.Filter(must=conditions)
        
        # query_points kullan
        search_results = self.client.query_points(
            collection_name=self.collection_name,
            query=query_embedding,
            query_filter=filter_conditions,
            limit=limit
        )
        
        results = []
        for result in search_results.points:
            results.append({
                "score": result.score,
                "payload": result.payload
            })
        
        return results

    def get_collection_info(self):
        """Collection bilgilerini getir"""
        try:
            info = self.client.get_collection(collection_name=self.collection_name)
            result = {
                "status": str(info.status),
                "vectors_count": info.vectors_count if hasattr(info, 'vectors_count') else 0,
            }
            
            # Mevcut attributelarƒ± kontrol et ve ekle
            if hasattr(info, 'segments_count'):
                result["segments_count"] = info.segments_count
            if hasattr(info, 'indexed_vectors_count'):
                result["indexed_vectors_count"] = info.indexed_vectors_count
            if hasattr(info, 'points_count'):
                result["points_count"] = info.points_count
                
            return result
        except Exception as e:
            return {"error": str(e)}

### main

In [None]:
def main():
    """Ana fonksiyon"""
    # Konfig√ºrasyon
    QDRANT_URL = qdrant_url
    API_KEY = api_key  # Buraya ger√ßek API anahtarƒ±nƒ±zƒ± yazƒ±n
    CSV_FILE = "/home/yapayzeka/ahsen_bulbul/model/chonkie/semantic/semantic_with_metadata.csv"  # CSV dosya yolu
    
    # Vector DB instance olu≈ütur
    db = LegalDocumentVectorDB(
        qdrant_url=QDRANT_URL,
        api_key=API_KEY,
        collection_name="hukuki_kararlar"
    )
    
    # Collection olu≈ütur
    db.create_collection(recreate=True)
    
    # CSV'yi i≈üle ve y√ºkle
    df = db.process_csv(CSV_FILE)
    if df is not None:
        db.upload_to_qdrant(df, batch_size=50)
    
    # Collection bilgilerini g√∂ster
    info = db.get_collection_info()
    print("\n=== Collection Bilgileri ===")
    print(json.dumps(info, indent=2, ensure_ascii=False))
    
    # √ñrnek aramalar
    print("\n=== √ñrnek Aramalar ===")
    
    # Basit arama (score threshold d√º≈ü√ºr√ºld√º)
    results = db.search("ihtiyati √∂nlem tazminat", limit=3, score_threshold=0.6)
    print(f"\n1. Arama: 'ihtiyati √∂nlem tazminat' - {len(results)} sonu√ß")
    for i, result in enumerate(results, 1):
        print(f"\n{i}. Sonu√ß (Skor: {result['score']:.3f})")
        #print(f"   Daire: {result['payload']['daire']}")
        # print(f"   Esas No: {result['payload']['esasNo_num']}")
        # print(f"   Karar No: {result['payload']['kararNo_num']}")
        print(f"   Metin: {result['payload']['chunk_text'][:200]}...")
    
    # Filtreli arama
    filters = { "karar_turu": "RED"}
    results2 = db.advanced_search("mahkeme kararƒ±", filters=filters, limit=2)
    print(f"\n2. Filtreli Arama: '6. Hukuk Dairesi + RED' - {len(results2)} sonu√ß")
    for i, result in enumerate(results2, 1):
        print(f"\n{i}. Sonu√ß (Skor: {result['score']:.3f})")
        # print(f"   Esas No: {result['payload']['esasNo_num']}")
        print(f"   Metin: {result['payload']['chunk_text'][:150]}...")

if __name__ == "__main__":
    # Interaktif kullanƒ±m i√ßin ayrƒ± class
    class InteractiveLegalSearch:
        def __init__(self, qdrant_url: str, api_key: str):
            self.db = LegalDocumentVectorDB(qdrant_url, api_key)
        
        def setup_database(self, csv_file: str):
            """Database'i kur"""
            print("Database kurulumu ba≈ülƒ±yor...")
            self.db.create_collection(recreate=True)
            
            df = self.db.process_csv(csv_file)
            if df is not None:
                self.db.upload_to_qdrant(df)
                print("Database kurulumu tamamlandƒ±!")
                return True
            return False
        
        def interactive_search(self):
            """Interaktif arama"""
            while True:
                print("\n" + "="*50)
                print("HUKUKƒ∞ KARAR ARAMA Sƒ∞STEMƒ∞")
                print("="*50)
                
                query = input("\nAramak istediƒüiniz metni girin (√ßƒ±kmak i√ßin 'q'): ")
                if query.lower() == 'q':
                    break
                
                try:
                    limit = int(input("Ka√ß sonu√ß g√∂sterilsin? (varsayƒ±lan 5): ") or "5")
                except:
                    limit = 5
                
                results = self.db.search(query, limit=limit)
                
                if not results:
                    print("‚ùå Sonu√ß bulunamadƒ±.")
                    continue
                
                print(f"\nüîç '{query}' i√ßin {len(results)} sonu√ß bulundu:")
                print("-" * 50)
                
                for i, result in enumerate(results, 1):
                    payload = result['payload']
                    print(f"\nüìÑ {i}. SONU√á (Benzerlik: {result['score']:.3f})")
                    #print(f"   üìÇ Daire: {payload['daire']}")
                    print(f"   üìã Esas No: {payload['esasNo_num']}")
                    print(f"   ‚öñÔ∏è Karar No: {payload['kararNo_num']}")
                    # print(f"   üèõÔ∏è Mahkeme: {payload['mahkeme']}")
                    print(f"   üìÖ Tarihler: {payload['dates']}")
                    # print(f"   üìù Chunk: {payload['chunk_index']}/{payload['total_chunks']}")
                    print(f"   üìÑ Metin √ñnizleme:")
                    print(f"      {payload['chunk_text'][:300]}...")
                    print("-" * 30)
    
    # Ana √ßalƒ±≈ütƒ±rma
    main()

BGE-M3 modeli y√ºkleniyor...


Fetching 30 files: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 30/30 [00:00<00:00, 226719.14it/s]


Model ba≈üarƒ±yla y√ºklendi!


You're using a XLMRobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Vector boyutu: 1024
Eski collection 'hukuki_kararlar' silindi.
Collection 'hukuki_kararlar' olu≈üturuldu.
CSV dosyasƒ± okunuyor: /home/yapayzeka/ahsen_bulbul/model/chonkie/semantic/semantic_with_metadata.csv
‚úÖ UTF-8 encoding ile ba≈üarƒ±yla okundu
Toplam satƒ±r sayƒ±sƒ±: 105
S√ºtunlar: ['_id', 'location', 'extractedDates', 'esasNo', 'kararNo', 'esasNo_num', 'esasNo_tip', 'kararNo_num', 'kararNo_tip', 'chunk_id', 'chunk_text', 'token_count', 'num_sentences']
Bo≈ü metin filtrelemesi: 105 -> 105
Qdrant'a y√ºkleme ba≈ülƒ±yor...


Embedding olu≈üturuluyor: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 4/4 [00:01<00:00,  3.93it/s]
Qdrant'a y√ºkleniyor: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3/3 [00:00<00:00, 21.74it/s]


Toplam 105 dok√ºman ba≈üarƒ±yla y√ºklendi!

=== Collection Bilgileri ===
{
  "status": "green",
  "vectors_count": null,
  "segments_count": 8,
  "indexed_vectors_count": 0,
  "points_count": 105
}

=== √ñrnek Aramalar ===
Arama yapƒ±lƒ±yor: 'ihtiyati √∂nlem tazminat'

1. Arama: 'ihtiyati √∂nlem tazminat' - 3 sonu√ß

1. Sonu√ß (Skor: 0.634)


KeyError: 'kararNo_num'

### chonkie semantic+bge-m3

In [None]:
class LegalDocumentVectorDB:
    def __init__(self, qdrant_url: str, api_key: str, collection_name: str = "hukuki kararlar"):
        """
        Hukuki belgeler i√ßin vector database sƒ±nƒ±fƒ±
        
        Args:
            qdrant_url: Qdrant sunucu URL'i
            api_key: Qdrant API anahtarƒ±  
            collection_name: Collection adƒ±
        """
        self.client = QdrantClient(
            url=qdrant_url,
            api_key=api_key,
            timeout=60
        )
        self.collection_name = collection_name
        
        print("BGE-M3 modeli y√ºkleniyor...")
        self.model = BGEM3FlagModel("BAAI/bge-m3", use_fp16=True)  # fp16 hƒ±z i√ßin
        print("Model ba≈üarƒ±yla y√ºklendi!")

        self.vector_size = self.model.encode(
            ["test"], 
            return_dense=True, 
            return_sparse=False, 
            return_colbert_vecs=False
        )["dense_vecs"].shape[1]
        print(f"Vector boyutu: {self.vector_size}")

    def create_collection(self, recreate: bool = False):
        """Collection olu≈ütur"""
        try:
            if recreate:
                self.client.delete_collection(collection_name=self.collection_name)
                print(f"Eski collection '{self.collection_name}' silindi.")
        except:
            pass
        
        try:
            self.client.create_collection(
                collection_name=self.collection_name,
                vectors_config=VectorParams(
                    size=self.vector_size, 
                    distance=Distance.COSINE
                )
            )
            print(f"Collection '{self.collection_name}' olu≈üturuldu.")
        except Exception as e:
            if "already exists" in str(e).lower():
                print(f"Collection '{self.collection_name}' zaten mevcut.")
            else:
                raise e

    def clean_text(self, text: str) -> str:
        """Metni temizle"""
        if pd.isna(text):
            return ""
        
        # Encoding sorunlarƒ±nƒ± d√ºzelt
        text = str(text)
        replacements = {
            '√É¬§': '√§', '√É¬∂': '√∂', '√É¬º': '√º', '√É≈∏': '√ü',
            '√É‚Ä°': '√á', '√Ñ¬±': 'ƒ±', '√Ñ¬∞': 'ƒ∞', '√Ö≈∏': '≈ü',
            '√Ñ\x9f': 'ƒü', '√É¬ß': '√ß', '√É¬∂': '√∂', '√É¬º': '√º'
        }
        
        for old, new in replacements.items():
            text = text.replace(old, new)
        
        # Fazla bo≈üluklarƒ± temizle
        text = re.sub(r'\s+', ' ', text)
        text = text.strip()
        
        return text

    def process_csv(self, csv_path: str) -> pd.DataFrame:
        """CSV dosyasƒ±nƒ± i≈üle"""
        print(f"CSV dosyasƒ± okunuyor: {csv_path}")
        
        # Dosya varlƒ±ƒüƒ±nƒ± kontrol et
        import os
        if not os.path.exists(csv_path):
            print(f"‚ùå HATA: Dosya bulunamadƒ±: {csv_path}")
            return None
        
        try:
            # Encoding denemesi
            try:
                df = pd.read_csv(csv_path, encoding='utf-8')
                print("‚úÖ UTF-8 encoding ile ba≈üarƒ±yla okundu")
            except UnicodeDecodeError:
                df = pd.read_csv(csv_path, encoding='latin-1')
                print("‚úÖ Latin-1 encoding ile ba≈üarƒ±yla okundu")
        except Exception as e:
            print(f"‚ùå CSV okuma hatasƒ±: {e}")
            return None
        
        print(f"Toplam satƒ±r sayƒ±sƒ±: {len(df)}")
        print(f"S√ºtunlar: {df.columns.tolist()}")
        
        # Bo≈ü chunk_text'leri filtrele
        initial_count = len(df)
        df = df.dropna(subset=['chunk_text'])
        df = df[df['chunk_text'].str.strip() != '']
        final_count = len(df)
        
        print(f"Bo≈ü metin filtrelemesi: {initial_count} -> {final_count}")
        
        # Metinleri temizle
        df['chunk_text_clean'] = df['chunk_text'].apply(self.clean_text)
        
        return df

    def create_embeddings_batch(self, texts: List[str], batch_size: int = 32) -> List[List[float]]:
        """Metinleri batch halinde embedding'e √ßevir"""
        embeddings = []
        
        for i in tqdm(range(0, len(texts), batch_size), desc="Embedding olu≈üturuluyor"):
            batch = texts[i:i+batch_size]
            batch_embeddings = self.model.encode(batch)["dense_vecs"]

            embeddings.extend(batch_embeddings.tolist())
            #print(embeddings)
        
        return embeddings

    def upload_to_qdrant(self, df: pd.DataFrame, batch_size: int = 100):
        """DataFrame'i Qdrant'a y√ºkle"""
        print("Qdrant'a y√ºkleme ba≈ülƒ±yor...")
        
        # Embeddings olu≈ütur
        texts = df['chunk_text_clean'].tolist()
        embeddings = self.create_embeddings_batch(texts, batch_size=32)
        
        # Points olu≈ütur
        points = []
        for idx, (_, row) in enumerate(df.iterrows()):
            payload = {
                "document_id": str(row['_id']),
                "location": str(row['location']),
                "esas_no": str(row['esasNo']),
                "karar_no": str(row['kararNo']),
                "dates": str(row['extractedDates']),
                "chunk_id": str(row['chunk_id']),
                
                "token_count": int(row['token_count']),
                "chunk_text": str(row['chunk_text_clean']),
                "num_sentences": int(row['num_sentences'])
            }
            
            point = PointStruct(
                id=str(uuid.uuid4()),
                vector=embeddings[idx],
                payload=payload
            )
            points.append(point)
        
        # Batch halinde y√ºkle
        for i in tqdm(range(0, len(points), batch_size), desc="Qdrant'a y√ºkleniyor"):
            batch_points = points[i:i+batch_size]
            
            try:
                self.client.upsert(
                    collection_name=self.collection_name,
                    points=batch_points
                )
            except Exception as e:
                print(f"Y√ºkleme hatasƒ± (batch {i//batch_size + 1}): {e}")
                continue
        
        print(f"Toplam {len(points)} dok√ºman ba≈üarƒ±yla y√ºklendi!")

    def search(self, query: str, limit: int = 5, score_threshold: float = 0.5) -> List[Dict[str, Any]]:
        """Semantik arama yap"""
        print(f"Arama yapƒ±lƒ±yor: '{query}'")
        
        # Query embedding'i olu≈ütur
        query_embedding = self.model.encode([query])["dense_vecs"][0].tolist()
        
        # Arama yap (query_points kullan)
        search_results = self.client.query_points(
            collection_name=self.collection_name,
            query=query_embedding,
            limit=limit,
            score_threshold=score_threshold
        )
        
        results = []
        for result in search_results.points:
            results.append({
                "score": result.score,
                "payload": result.payload
            })
        
        return results

    def advanced_search(self, query: str, filters: Dict = None, limit: int = 5) -> List[Dict[str, Any]]:
        """Geli≈ümi≈ü filtreleme ile arama"""
        embedding_output = self.model.encode([query], convert_to_numpy=True)
        query_embedding = embedding_output["dense_vecs"][0].tolist()
        
        # Filter olu≈ütur
        filter_conditions = None
        if filters:
            conditions = []
            
            if 'location' in filters:
                conditions.append(models.FieldCondition(
                    key="location",
                    match=models.MatchValue(value=filters['location'])
                ))
            
            if 'kararNo_num' in filters:
                conditions.append(models.FieldCondition(
                    key="kararNo_num", 
                    match=models.MatchValue(value=filters['kararNo_num'])
                ))
            
            if 'year' in filters:
                conditions.append(models.FieldCondition(
                    key="dates",
                    match=models.MatchText(text=str(filters['year']))
                ))
            
            if conditions:
                filter_conditions = models.Filter(must=conditions)
        
        # query_points kullan
        search_results = self.client.query_points(
            collection_name=self.collection_name,
            query=query_embedding,
            query_filter=filter_conditions,
            limit=limit
        )
        
        results = []
        for result in search_results.points:
            results.append({
                "score": result.score,
                "payload": result.payload
            })
        
        return results

    def get_collection_info(self):
        """Collection bilgilerini getir"""
        try:
            info = self.client.get_collection(collection_name=self.collection_name)
            result = {
                "status": str(info.status),
                "vectors_count": info.vectors_count if hasattr(info, 'vectors_count') else 0,
            }
            
            # Mevcut attributelarƒ± kontrol et ve ekle
            if hasattr(info, 'segments_count'):
                result["segments_count"] = info.segments_count
            if hasattr(info, 'indexed_vectors_count'):
                result["indexed_vectors_count"] = info.indexed_vectors_count
            if hasattr(info, 'points_count'):
                result["points_count"] = info.points_count
                
            return result
        except Exception as e:
            return {"error": str(e)}

### main

In [None]:
def main():
    """Ana fonksiyon"""
    # Konfig√ºrasyon
    QDRANT_URL = qdrant_url
    API_KEY = api_key  # Buraya ger√ßek API anahtarƒ±nƒ±zƒ± yazƒ±n
    CSV_FILE = "/home/yapayzeka/ahsen_bulbul/model/chonkie/semantic/2semantic_with_metadata.csv"  # CSV dosya yolu
    
    # Vector DB instance olu≈ütur
    db = LegalDocumentVectorDB(
        qdrant_url=QDRANT_URL,
        api_key=API_KEY,
        collection_name="hukuki_kararlar"
    )
    
    # Collection olu≈ütur
    db.create_collection(recreate=True)
    
    # CSV'yi i≈üle ve y√ºkle
    df = db.process_csv(CSV_FILE)
    if df is not None:
        db.upload_to_qdrant(df, batch_size=50)
    
    # Collection bilgilerini g√∂ster
    info = db.get_collection_info()
    print("\n=== Collection Bilgileri ===")
    print(json.dumps(info, indent=2, ensure_ascii=False))
    
    # √ñrnek aramalar
    print("\n=== √ñrnek Aramalar ===")
    
    # Basit arama (score threshold d√º≈ü√ºr√ºld√º)
    results = db.search("ihtiyati √∂nlem tazminat", limit=10, score_threshold=0.6)
    print(f"\n1. Arama: 'ihtiyati √∂nlem tazminat' - {len(results)} sonu√ß")
    for i, result in enumerate(results, 1):
        print(f"\n{i}. Sonu√ß (Skor: {result['score']:.3f})")
        print(f"   Esas No: {result['payload']['esas_no']}")
        print(f"   Karar No: {result['payload']['karar_no']}")
        print(f"   Metin: {result['payload']['chunk_text'][:200]}...")
    
    # Filtreli arama
    filters = {"location": "6. Hukuk Dairesi"}
    # , "karar_turu": "RED"}
    results2 = db.advanced_search("mahkeme kararƒ±", filters=filters, limit=10)
    print(f"\n2. Filtreli Arama: '6. Hukuk Dairesi' - {len(results2)} sonu√ß")
    for i, result in enumerate(results2, 1):
        print(f"\n{i}. Sonu√ß (Skor: {result['score']:.3f})")
        print(f"   Esas No: {result['payload']['esas_no']}")
        print(f"   Metin: {result['payload']['chunk_text'][:150]}...")

if __name__ == "__main__":
    # Interaktif kullanƒ±m i√ßin ayrƒ± class
    class InteractiveLegalSearch:
        def __init__(self, qdrant_url: str, api_key: str, collection_name:str):
            self.db = LegalDocumentVectorDB(qdrant_url, api_key,collection_name)
        
        def setup_database(self, csv_file: str):
            """Database'i kur"""
            print("Database kurulumu ba≈ülƒ±yor...")
            self.db.create_collection(recreate=True)
            
            df = self.db.process_csv(csv_file)
            if df is not None:
                self.db.upload_to_qdrant(df)
                print("Database kurulumu tamamlandƒ±!")
                return True
            return False
        
        def interactive_search(self):
            """Interaktif arama"""
            while True:
                print("\n" + "="*50)
                print("HUKUKƒ∞ KARAR ARAMA Sƒ∞STEMƒ∞")
                print("="*50)
                
                query = input("\nAramak istediƒüiniz metni girin (√ßƒ±kmak i√ßin 'q'): ")
                if query.lower() == 'q':
                    break
                
                try:
                    limit = int(input("Ka√ß sonu√ß g√∂sterilsin? (varsayƒ±lan 5): ") or "5")
                except:
                    limit = 5
                
                results = self.db.search(query, limit=limit)
                
                if not results:
                    print("‚ùå Sonu√ß bulunamadƒ±.")
                    continue
                
                print(f"\nüîç '{query}' i√ßin {len(results)} sonu√ß bulundu:")
                print("-" * 50)
                
                for i, result in enumerate(results, 1):
                    payload = result['payload']
                    print(f"\nüìÑ {i}. SONU√á (Benzerlik: {result['score']:.3f})")
                    
                    print(f"   üìã Esas No: {payload['esas_no']}")
                    print(f"   ‚öñÔ∏è Karar No: {payload['karar_no']}")
                
                    print(f"   üìÖ Tarihler: {payload['dates']}")
                    print(f"   üìù Chunk: {payload['token_count']}")
                    print(f"   üìÑ Metin √ñnizleme:")
                    print(f"      {payload['chunk_text'][:300]}...")
                    print("-" * 30)
    searcher = InteractiveLegalSearch(
    qdrant_url=qdrant_url,
    api_key=api_key,
    collection_name="hukuki_kararlar"
    )
    searcher.interactive_search()
main()

#### berturk(facia)

In [None]:
from typing import List, Dict, Any
import pandas as pd
import os, re, uuid
import torch
from tqdm import tqdm
from transformers import AutoTokenizer, AutoModel
from qdrant_client import QdrantClient
from qdrant_client.http.models import VectorParams, Distance, PointStruct, FieldCondition, MatchValue, Filter


In [None]:
from dotenv import load_dotenv
import os

load_dotenv()

qdrant_url = os.getenv("QDRANT_URL")
api_key = os.getenv("QDRANT_API_KEY")
collection_name = os.getenv("QDRANT_COLLECTION")

In [None]:
class LegalDocumentVectorDB:
    def __init__(self, qdrant_url: str, api_key: str, collection_name: str = "hukuki_kararlar"):
        self.client = QdrantClient(url=qdrant_url, api_key=api_key, timeout=60)
        self.collection_name = collection_name
        
        print("BERTurk modeli y√ºkleniyor...")
        self.tokenizer = AutoTokenizer.from_pretrained("dbmdz/bert-base-turkish-cased")
        self.model = AutoModel.from_pretrained("dbmdz/bert-base-turkish-cased")
        print("Model ba≈üarƒ±yla y√ºklendi!")
        
        # Embedding boyutu
        self.vector_size = self.model.config.hidden_size
        print(f"Vector boyutu: {self.vector_size}")

    def clean_text(self, text: str) -> str:
        if pd.isna(text): return ""
        text = str(text)
        replacements = {
            '√É¬§':'√§', '√É¬∂':'√∂', '√É¬º':'√º', '√É≈∏':'√ü',
            '√É‚Ä°':'√á', '√Ñ¬±':'ƒ±', '√Ñ¬∞':'ƒ∞', '√Ö≈∏':'≈ü',
            '√Ñ\x9f':'ƒü', '√É¬ß':'√ß'
        }
        for old, new in replacements.items():
            text = text.replace(old, new)
        return re.sub(r'\s+', ' ', text).strip()

    def process_csv(self, csv_path: str) -> pd.DataFrame:
        print(f"CSV okunuyor: {csv_path}")
        if not os.path.exists(csv_path):
            print(f"‚ùå Dosya bulunamadƒ±: {csv_path}")
            return None
        
        try:
            df = pd.read_csv(csv_path, encoding='utf-8')
        except UnicodeDecodeError:
            df = pd.read_csv(csv_path, encoding='latin-1')
        print(f"S√ºtunlar: {df.columns.tolist()} ‚Äì Satƒ±r: {len(df)}")
        
        df = df.dropna(subset=['chunk_text'])
        df = df[df['chunk_text'].str.strip() != '']
        df['chunk_text_clean'] = df['chunk_text'].apply(self.clean_text)
        return df

    def embed(self, texts: List[str]) -> List[List[float]]:
        inputs = self.tokenizer(texts, truncation=True, padding=True, return_tensors="pt")
        with torch.no_grad():
            outputs = self.model(**inputs)
        embeddings = outputs.last_hidden_state[:, 0, :].cpu().numpy()
        return embeddings.tolist()

    def create_collection(self, recreate: bool = False):
        if recreate:
            try:
                self.client.delete_collection(collection_name=self.collection_name)
                print("Eski collection silindi.")
            except:
                pass
        try:
            self.client.create_collection(
                collection_name=self.collection_name,
                vectors_config=VectorParams(size=self.vector_size, distance=Distance.COSINE)
            )
            print("Collection olu≈üturuldu.")
        except Exception as e:
            if "already exists" in str(e).lower():
                print("Collection zaten mevcut.")
            else:
                raise

    def upload_to_qdrant(self, df: pd.DataFrame, batch_size: int = 100):
        texts = df['chunk_text_clean'].tolist()
        embeddings = self.embed(texts)
        points = [
            PointStruct(
                id=str(uuid.uuid4()),
                vector=embeddings[idx],
                payload={
                    "chunk_id": row['chunk_id'],
                    "chunk_text": row['chunk_text_clean'],
                    "other": row[['location','esasNo','kararNo']].to_dict()
                }
            ) for idx, (_, row) in enumerate(df.iterrows())
        ]
        for i in tqdm(range(0, len(points), batch_size), desc="Y√ºkleniyor"):
            self.client.upsert(collection_name=self.collection_name, points=points[i:i+batch_size])
        print(f"{len(points)} dok√ºman y√ºklendi!")

    def search(self, query: str, limit: int = 5, score_threshold: float = 0.5) -> List[Dict[str, Any]]:
        print(f"Arama: {query}")
        query_vec = self.embed([query])[0]
        search_results = self.client.search(collection_name=self.collection_name, query_vector=query_vec, limit=limit)
        return [{"score": r.score, "payload": r.payload} for r in search_results]

    def get_collection_info(self):
        info = self.client.get_collection(collection_name=self.collection_name)
        return {
            "status": str(info.status),
            "points_count": getattr(info, 'points_count', None)
        }

In [None]:
import json

def main():
    """Ana fonksiyon"""
    # Konfig√ºrasyon
    QDRANT_URL = qdrant_url
    API_KEY = api_key
    CSV_FILE = "/home/yapayzeka/ahsen_bulbul/model/chonkie/semantic/2semantic_with_metadata.csv"
    
    # Vector DB instance olu≈ütur
    db = LegalDocumentVectorDB(
        qdrant_url=QDRANT_URL,
        api_key=API_KEY,
        collection_name="hukuki_kararlar"
    )
    
    # Collection olu≈ütur
    db.create_collection(recreate=True)
    
    # CSV'yi i≈üle ve y√ºkle
    df = db.process_csv(CSV_FILE)
    if df is not None:
        db.upload_to_qdrant(df, batch_size=50)
    
    # Collection bilgilerini g√∂ster
    info = db.get_collection_info()
    print("\n=== Collection Bilgileri ===")
    print(json.dumps(info, indent=2, ensure_ascii=False))
    
    # √ñrnek aramalar
    print("\n=== √ñrnek Aramalar ===")
    
    # Basit arama
    results = db.search("ihtiyati tedbir tazminat", limit=5)
    print(f"\n1. Arama: 'ihtiyati edbir tazminat' - {len(results)} sonu√ß")
    for i, result in enumerate(results, 1):
        payload = result['payload']
        print(f"\n{i}. Sonu√ß (Skor: {result['score']:.3f})")
        print(f"   Esas No: {payload.get('esasNo')}")
        print(f"   Karar No: {payload.get('kararNo')}")
        print(f"   Metin: {payload['chunk_text'][:200]}...")

if __name__ == "__main__":
    main()


### chonkie semantic ve allminiLM12 deniyoruz

In [None]:
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams, PointStruct
from qdrant_client.http import models
import uuid
from tqdm import tqdm
import json
from typing import List, Dict, Any
import re

In [None]:
from dotenv import load_dotenv
import os

load_dotenv()

qdrant_url = os.getenv("QDRANT_URL")
api_key = os.getenv("QDRANT_API_KEY")
collection_name = os.getenv("QDRANT_COLLECTION")

In [None]:
class LegalDocumentVectorDB:
    def __init__(self, qdrant_url: str, api_key: str, collection_name: str = "hukuki kararlar"):
        """
        Hukuki belgeler i√ßin vector database sƒ±nƒ±fƒ±
        
        Args:
            qdrant_url: Qdrant sunucu URL'i
            api_key: Qdrant API anahtarƒ±  
            collection_name: Collection adƒ±
        """
        self.client = QdrantClient(
            url=qdrant_url,
            api_key=api_key,
            timeout=60
        )
        self.collection_name = collection_name
        
        # T√ºrk√ße i√ßin optimize edilmi≈ü multilingual model
        print("Sentence Transformer modeli y√ºkleniyor...")
        self.model = SentenceTransformer('sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2')
        print("Model ba≈üarƒ±yla y√ºklendi!")
        
        # Vector boyutunu al
        self.vector_size = self.model.get_sentence_embedding_dimension()
        print(f"Vector boyutu: {self.vector_size}")

    def create_collection(self, recreate: bool = False):
        """Collection olu≈ütur"""
        try:
            if recreate:
                self.client.delete_collection(collection_name=self.collection_name)
                print(f"Eski collection '{self.collection_name}' silindi.")
        except:
            pass
        
        try:
            self.client.create_collection(
                collection_name=self.collection_name,
                vectors_config=VectorParams(
                    size=self.vector_size, 
                    distance=Distance.COSINE
                )
            )
            print(f"Collection '{self.collection_name}' olu≈üturuldu.")
        except Exception as e:
            if "already exists" in str(e).lower():
                print(f"Collection '{self.collection_name}' zaten mevcut.")
            else:
                raise e

    def clean_text(self, text: str) -> str:
        """Metni temizle"""
        if pd.isna(text):
            return ""
        
        # Encoding sorunlarƒ±nƒ± d√ºzelt
        text = str(text)
        replacements = {
            '√É¬§': '√§', '√É¬∂': '√∂', '√É¬º': '√º', '√É≈∏': '√ü',
            '√É‚Ä°': '√á', '√Ñ¬±': 'ƒ±', '√Ñ¬∞': 'ƒ∞', '√Ö≈∏': '≈ü',
            '√Ñ\x9f': 'ƒü', '√É¬ß': '√ß', '√É¬∂': '√∂', '√É¬º': '√º'
        }
        
        for old, new in replacements.items():
            text = text.replace(old, new)
        
        # Fazla bo≈üluklarƒ± temizle
        text = re.sub(r'\s+', ' ', text)
        text = text.strip()
        
        return text

    def process_csv(self, csv_path: str) -> pd.DataFrame:
        """CSV dosyasƒ±nƒ± i≈üle"""
        print(f"CSV dosyasƒ± okunuyor: {csv_path}")
        
        # Dosya varlƒ±ƒüƒ±nƒ± kontrol et
        import os
        if not os.path.exists(csv_path):
            print(f"‚ùå HATA: Dosya bulunamadƒ±: {csv_path}")
            return None
        
        try:
            # Encoding denemesi
            try:
                df = pd.read_csv(csv_path, encoding='utf-8')
                print("‚úÖ UTF-8 encoding ile ba≈üarƒ±yla okundu")
            except UnicodeDecodeError:
                df = pd.read_csv(csv_path, encoding='latin-1')
                print("‚úÖ Latin-1 encoding ile ba≈üarƒ±yla okundu")
        except Exception as e:
            print(f"‚ùå CSV okuma hatasƒ±: {e}")
            return None
        
        print(f"Toplam satƒ±r sayƒ±sƒ±: {len(df)}")
        print(f"S√ºtunlar: {df.columns.tolist()}")
        
        # Bo≈ü chunk_text'leri filtrele
        initial_count = len(df)
        df = df.dropna(subset=['chunk_text'])
        df = df[df['chunk_text'].str.strip() != '']
        final_count = len(df)
        
        print(f"Bo≈ü metin filtrelemesi: {initial_count} -> {final_count}")
        
        # Metinleri temizle
        df['chunk_text_clean'] = df['chunk_text'].apply(self.clean_text)
        
        return df

    def create_embeddings_batch(self, texts: List[str], batch_size: int = 32) -> List[List[float]]:
        """Metinleri batch halinde embedding'e √ßevir"""
        embeddings = []
        
        for i in tqdm(range(0, len(texts), batch_size), desc="Embedding olu≈üturuluyor"):
            batch = texts[i:i+batch_size]
            batch_embeddings = self.model.encode(
                batch, 
                convert_to_numpy=True,
                show_progress_bar=False,
                normalize_embeddings=True  # Cosine similarity i√ßin normalize et
            )
            embeddings.extend(batch_embeddings.tolist())
            print(embeddings)
        
        return embeddings

    def upload_to_qdrant(self, df: pd.DataFrame, batch_size: int = 100):
        """DataFrame'i Qdrant'a y√ºkle"""
        print("Qdrant'a y√ºkleme ba≈ülƒ±yor...")
        
        # Embeddings olu≈ütur
        texts = df['chunk_text_clean'].tolist()
        embeddings = self.create_embeddings_batch(texts, batch_size=32)
        
        # Points olu≈ütur
        points = []
        for idx, (_, row) in enumerate(df.iterrows()):
            payload = {
                "document_id": str(row['_id']),
                "location": str(row['location']),
                "esas_no": str(row['esasNo']),
                "karar_no": str(row['kararNo']),
                "dates": str(row['extractedDates']),
                "daire": str(row['daire']),
                "mahkeme": str(row['mahkeme']),
                "karar_turu": str(row['karar_turu']),
                "chunk_id": str(row['chunk_id']),
                "chunk_index": int(row['chunk_index']),
                "total_chunks": int(row['total_chunks']),
                "chunk_text": str(row['chunk_text_clean']),
                "chunk_length": int(row['chunk_length'])
            }
            
            point = PointStruct(
                id=str(uuid.uuid4()),
                vector=embeddings[idx],
                payload=payload
            )
            points.append(point)
        
        # Batch halinde y√ºkle
        for i in tqdm(range(0, len(points), batch_size), desc="Qdrant'a y√ºkleniyor"):
            batch_points = points[i:i+batch_size]
            
            try:
                self.client.upsert(
                    collection_name=self.collection_name,
                    points=batch_points
                )
            except Exception as e:
                print(f"Y√ºkleme hatasƒ± (batch {i//batch_size + 1}): {e}")
                continue
        
        print(f"Toplam {len(points)} dok√ºman ba≈üarƒ±yla y√ºklendi!")

    def search(self, query: str, limit: int = 5, score_threshold: float = 0.5) -> List[Dict[str, Any]]:
        """Semantik arama yap"""
        print(f"Arama yapƒ±lƒ±yor: '{query}'")
        
        # Query embedding'i olu≈ütur
        query_embedding = self.model.encode(
            [query], 
            convert_to_numpy=True,
            normalize_embeddings=True
        )[0].tolist()
        
        # Arama yap (query_points kullan)
        search_results = self.client.query_points(
            collection_name=self.collection_name,
            query=query_embedding,
            limit=limit,
            score_threshold=score_threshold
        )
        
        results = []
        for result in search_results.points:
            results.append({
                "score": result.score,
                "payload": result.payload
            })
        
        return results

    def advanced_search(self, query: str, filters: Dict = None, limit: int = 5) -> List[Dict[str, Any]]:
        """Geli≈ümi≈ü filtreleme ile arama"""
        query_embedding = self.model.encode(
            [query], 
            convert_to_numpy=True,
            normalize_embeddings=True
        )[0].tolist()
        
        # Filter olu≈ütur
        filter_conditions = None
        if filters:
            conditions = []
            
            if 'daire' in filters:
                conditions.append(models.FieldCondition(
                    key="daire",
                    match=models.MatchValue(value=filters['daire'])
                ))
            
            if 'karar_turu' in filters:
                conditions.append(models.FieldCondition(
                    key="karar_turu", 
                    match=models.MatchValue(value=filters['karar_turu'])
                ))
            
            if 'year' in filters:
                conditions.append(models.FieldCondition(
                    key="dates",
                    match=models.MatchText(text=str(filters['year']))
                ))
            
            if conditions:
                filter_conditions = models.Filter(must=conditions)
        
        # query_points kullan
        search_results = self.client.query_points(
            collection_name=self.collection_name,
            query=query_embedding,
            query_filter=filter_conditions,
            limit=limit
        )
        
        results = []
        for result in search_results.points:
            results.append({
                "score": result.score,
                "payload": result.payload
            })
        
        return results

    def get_collection_info(self):
        """Collection bilgilerini getir"""
        try:
            info = self.client.get_collection(collection_name=self.collection_name)
            result = {
                "status": str(info.status),
                "vectors_count": info.vectors_count if hasattr(info, 'vectors_count') else 0,
            }
            
            # Mevcut attributelarƒ± kontrol et ve ekle
            if hasattr(info, 'segments_count'):
                result["segments_count"] = info.segments_count
            if hasattr(info, 'indexed_vectors_count'):
                result["indexed_vectors_count"] = info.indexed_vectors_count
            if hasattr(info, 'points_count'):
                result["points_count"] = info.points_count
                
            return result
        except Exception as e:
            return {"error": str(e)}

### main

In [None]:
def main():
    """Ana fonksiyon"""
    # Konfig√ºrasyon
    QDRANT_URL = qdrant_url
    API_KEY = api_key   # Buraya ger√ßek API anahtarƒ±nƒ±zƒ± yazƒ±n
    CSV_FILE = "/home/yapayzeka/ahsen_bulbul/model/langchain/recursive/yargitay_chunks.csv"  # CSV dosya yolu
    
    # Vector DB instance olu≈ütur
    db = LegalDocumentVectorDB(
        qdrant_url=QDRANT_URL,
        api_key=API_KEY,
        collection_name="hukuki_kararlar"
    )
    
    # Collection olu≈ütur
    db.create_collection(recreate=True)
    
    # CSV'yi i≈üle ve y√ºkle
    df = db.process_csv(CSV_FILE)
    if df is not None:
        db.upload_to_qdrant(df, batch_size=50)
    
    # Collection bilgilerini g√∂ster
    info = db.get_collection_info()
    print("\n=== Collection Bilgileri ===")
    print(json.dumps(info, indent=2, ensure_ascii=False))
    
    # √ñrnek aramalar
    print("\n=== √ñrnek Aramalar ===")
    
    # Basit arama (score threshold d√º≈ü√ºr√ºld√º)
    results = db.search("ihtiyati tedbir tazminat", limit=3, score_threshold=0.6)
    print(f"\n1. Arama: 'ihtiyati tedbir tazminat' - {len(results)} sonu√ß")
    for i, result in enumerate(results, 1):
        print(f"\n{i}. Sonu√ß (Skor: {result['score']:.3f})")
        print(f"   Daire: {result['payload']['daire']}")
        print(f"   Esas No: {result['payload']['esas_no']}")
        print(f"   Karar No: {result['payload']['karar_no']}")
        print(f"   Metin: {result['payload']['chunk_text'][:200]}...")
    
    # Filtreli arama
    filters = {"daire": "6. Hukuk Dairesi", "karar_turu": "RED"}
    results2 = db.advanced_search("mahkeme kararƒ±", filters=filters, limit=2)
    print(f"\n2. Filtreli Arama: '6. Hukuk Dairesi + RED' - {len(results2)} sonu√ß")
    for i, result in enumerate(results2, 1):
        print(f"\n{i}. Sonu√ß (Skor: {result['score']:.3f})")
        print(f"   Esas No: {result['payload']['esas_no']}")
        print(f"   Metin: {result['payload']['chunk_text'][:150]}...")

if __name__ == "__main__":
    # Interaktif kullanƒ±m i√ßin ayrƒ± class
    class InteractiveLegalSearch:
        def __init__(self, qdrant_url: str, api_key: str):
            self.db = LegalDocumentVectorDB(qdrant_url, api_key)
        
        def setup_database(self, csv_file: str):
            """Database'i kur"""
            print("Database kurulumu ba≈ülƒ±yor...")
            self.db.create_collection(recreate=True)
            
            df = self.db.process_csv(csv_file)
            if df is not None:
                self.db.upload_to_qdrant(df)
                print("Database kurulumu tamamlandƒ±!")
                return True
            return False
        
        def interactive_search(self):
            """Interaktif arama"""
            while True:
                print("\n" + "="*50)
                print("HUKUKƒ∞ KARAR ARAMA Sƒ∞STEMƒ∞")
                print("="*50)
                
                query = input("\nAramak istediƒüiniz metni girin (√ßƒ±kmak i√ßin 'q'): ")
                if query.lower() == 'q':
                    break
                
                try:
                    limit = int(input("Ka√ß sonu√ß g√∂sterilsin? (varsayƒ±lan 5): ") or "5")
                except:
                    limit = 5
                
                results = self.db.search(query, limit=limit)
                
                if not results:
                    print("‚ùå Sonu√ß bulunamadƒ±.")
                    continue
                
                print(f"\nüîç '{query}' i√ßin {len(results)} sonu√ß bulundu:")
                print("-" * 50)
                
                for i, result in enumerate(results, 1):
                    payload = result['payload']
                    print(f"\nüìÑ {i}. SONU√á (Benzerlik: {result['score']:.3f})")
                    print(f"   üìÇ Daire: {payload['daire']}")
                    print(f"   üìã Esas No: {payload['esas_no']}")
                    print(f"   ‚öñÔ∏è Karar No: {payload['karar_no']}")
                    print(f"   üèõÔ∏è Mahkeme: {payload['mahkeme']}")
                    print(f"   üìÖ Tarihler: {payload['dates']}")
                    print(f"   üìù Chunk: {payload['chunk_index']}/{payload['total_chunks']}")
                    print(f"   üìÑ Metin √ñnizleme:")
                    print(f"      {payload['chunk_text'][:300]}...")
                    print("-" * 30)
    
    # Ana √ßalƒ±≈ütƒ±rma
    main()