In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [1]:
pip install rank_bm25

Collecting rank_bm25
  Downloading rank_bm25-0.2.2-py3-none-any.whl.metadata (3.2 kB)
Downloading rank_bm25-0.2.2-py3-none-any.whl (8.6 kB)
Installing collected packages: rank_bm25
Successfully installed rank_bm25-0.2.2
Note: you may need to restart the kernel to use updated packages.


In [2]:
pip install llama_index llama_index.embeddings.huggingface

Collecting llama_index
  Downloading llama_index-0.12.35-py3-none-any.whl.metadata (12 kB)
Collecting llama_index.embeddings.huggingface
  Downloading llama_index_embeddings_huggingface-0.5.4-py3-none-any.whl.metadata (458 bytes)
Collecting llama-index-agent-openai<0.5,>=0.4.0 (from llama_index)
  Downloading llama_index_agent_openai-0.4.7-py3-none-any.whl.metadata (438 bytes)
Collecting llama-index-cli<0.5,>=0.4.1 (from llama_index)
  Downloading llama_index_cli-0.4.1-py3-none-any.whl.metadata (1.5 kB)
Collecting llama-index-core<0.13,>=0.12.35 (from llama_index)
  Downloading llama_index_core-0.12.35-py3-none-any.whl.metadata (2.4 kB)
Collecting llama-index-embeddings-openai<0.4,>=0.3.0 (from llama_index)
  Downloading llama_index_embeddings_openai-0.3.1-py3-none-any.whl.metadata (684 bytes)
Collecting llama-index-indices-managed-llama-cloud>=0.4.0 (from llama_index)
  Downloading llama_index_indices_managed_llama_cloud-0.6.11-py3-none-any.whl.metadata (3.6 kB)
Collecting llama-index

In [1]:
import os
import json
import pandas as pd
import numpy as np
import pickle
import time
import torch
from tqdm import tqdm
from pathlib import Path

# For sparse retrieval
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from rank_bm25 import BM25Plus

# For dense retrieval
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Document
from llama_index.core import Settings
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core.vector_stores import SimpleVectorStore

# Download required NLTK resources
nltk.download('punkt')
nltk.download('punkt_tab')
nltk.download('stopwords')

print("Building Hybrid Retriever from Legal Knowledge Base")

[nltk_data] Downloading package punkt to /home/youssef/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to
[nltk_data]     /home/youssef/nltk_data...


Building Hybrid Retriever from Legal Knowledge Base


[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /home/youssef/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


## 1. Load the knowledge base previously prepared

In [None]:
def load_knowledge_base(base_dir='/content/drive/MyDrive/legal-rag-assistant/knowledge_base/Codes'):
    """Load the legal knowledge base DataFrame"""
    data = []

    # Track statistics for reporting
    stats = {
        'total_files': 0,
        'processed_files': 0,
        'error_files': 0,
        'total_documents': 0,
        'codes': set()
    }

    # Check if the directory exists
    if not os.path.exists(base_dir):
        print(f"Directory does not exist: {base_dir}")
        return pd.DataFrame(), stats

    # List all code folders
    all_folders = [f for f in os.listdir(base_dir) if os.path.isdir(os.path.join(base_dir, f))]
    print(f"Found {len(all_folders)} code folders: {', '.join(all_folders)}")

    for code_folder in tqdm(all_folders, desc="Processing code folders"):
        folder_path = os.path.join(base_dir, code_folder)
        if not os.path.isdir(folder_path) or code_folder == 'combined':
            continue

        stats['codes'].add(code_folder)

        all_files = [f for f in os.listdir(folder_path) if f.endswith('.json')]
        stats['total_files'] += len(all_files)

        for file in tqdm(all_files, desc=f"Processing {code_folder} files", leave=False):
            file_path = os.path.join(folder_path, file)
            with open(file_path, 'r', encoding='utf-8') as f:
                try:
                    docs = json.load(f)
                    # If it's a list of articles
                    if isinstance(docs, list):
                        for doc in docs:
                            doc['code'] = code_folder
                            doc['source_file'] = file
                            data.append(doc)
                            stats['total_documents'] += 1
                    # If it's a dict of articles
                    elif isinstance(docs, dict):
                        for k, doc in docs.items():
                            doc['code'] = code_folder
                            doc['source_file'] = file
                            data.append(doc)
                            stats['total_documents'] += 1
                    stats['processed_files'] += 1
                except Exception as e:
                    stats['error_files'] += 1
                    print(f"Error loading {file_path}: {e}")

    df = pd.DataFrame(data)

    # Check for the combined comprehensive knowledge base
    comprehensive_path = os.path.join(base_dir, "comprehensive_knowledge_base.parquet")
    if os.path.exists(comprehensive_path):
        print(f"Found comprehensive knowledge base at {comprehensive_path}")
        df_comprehensive = pd.read_parquet(comprehensive_path)
        print(f"Loading comprehensive knowledge base with {len(df_comprehensive)} documents")
        return df_comprehensive, stats

    return df, stats

# Load the knowledge base
print("Loading legal knowledge base...")
df, stats = load_knowledge_base()

print(f"\nLoaded {len(df)} legal documents from {len(stats['codes'])} legal codes")

Loading legal knowledge base...
Found 6 code folders: constitution_marocaine_2011, code_penale_2018, code_obligation_contrats_2019, code_travail_2011, code_comerce_2019, code_famille_2016


Processing code folders:   0%|          | 0/6 [00:00<?, ?it/s]
Processing constitution_marocaine_2011 files:   0%|          | 0/1 [00:00<?, ?it/s][A
                                                                                   [A
Processing code_penale_2018 files:   0%|          | 0/1 [00:00<?, ?it/s][A
                                                                        [A
Processing code_obligation_contrats_2019 files:   0%|          | 0/1 [00:00<?, ?it/s][A
                                                                                     [A
Processing code_travail_2011 files:   0%|          | 0/1 [00:00<?, ?it/s][A
                                                                         [A
Processing code_comerce_2019 files:   0%|          | 0/1 [00:00<?, ?it/s][A
                                                                         [A
Processing code_famille_2016 files:   0%|          | 0/1 [00:00<?, ?it/s][A
Processing code folders: 100%|██████████| 6/6 [0


Loaded 1285 legal documents from 6 legal codes





In [None]:
df

Unnamed: 0,code,livre,titre,chapitre,section,article_no,text,source_file
0,constitution_marocaine_2011,Préambule,,,,Préambule,Fidèle à son choix irréversible de construire ...,Constitution_marocaine_2011_full.json
1,constitution_marocaine_2011,TITRE PREMIER DISPOSITIONS GENERALES,,,,Article premier,"Le Maroc est une monarchie constitutionnelle, ...",Constitution_marocaine_2011_full.json
2,constitution_marocaine_2011,TITRE PREMIER DISPOSITIONS GENERALES,,,,Article 2,La souveraineté appartient à la Nation qui l'e...,Constitution_marocaine_2011_full.json
3,constitution_marocaine_2011,TITRE PREMIER DISPOSITIONS GENERALES,,,,Article 3,"L'Islam est la religion de l'Etat, qui garanti...",Constitution_marocaine_2011_full.json
4,constitution_marocaine_2011,TITRE PREMIER DISPOSITIONS GENERALES,,,,Article 4,L'emblème du Royaume est le drapeau rouge frap...,Constitution_marocaine_2011_full.json
...,...,...,...,...,...,...,...,...
1280,code_famille_2016,LIVRE VII: DISPOSITIONS TRANSITOIRES ET FINALES,,,,Article 396,Les délais prévus par le présent Code sont des...,code_famille_2016_full.json
1281,code_famille_2016,LIVRE VII: DISPOSITIONS TRANSITOIRES ET FINALES,,,,Article 397,Sont abrogées toutes les dispositions contrair...,code_famille_2016_full.json
1282,code_famille_2016,LIVRE VII: DISPOSITIONS TRANSITOIRES ET FINALES,,,,Article 398,"Demeurent valables, les actes de procédures ef...",code_famille_2016_full.json
1283,code_famille_2016,LIVRE VII: DISPOSITIONS TRANSITOIRES ET FINALES,,,,Article 399,Les décisions prononcées avant la date d'entré...,code_famille_2016_full.json


## 2. Prepare the corpus for both retrieval systems with enhanced metadata capture

In [None]:
def prepare_corpus(df):
    """Prepare corpus for retrieval systems by extracting text and generating IDs with complete metadata"""

    # Identify the primary text field ('article', 'text', or 'content')
    text_fields = ['article', 'text', 'content']
    primary_field = None

    for field in text_fields:
        if field in df.columns and df[field].notna().sum() > 0:
            primary_field = field
            break

    if primary_field is None:
        raise ValueError("No suitable text field found in the data")

    print(f"Using '{primary_field}' as the primary text field")

    # Generate consistent document IDs if they don't exist
    if 'id' not in df.columns:
        df['id'] = [f"doc_{i}" for i in range(len(df))]

    # Create corpus list and ID list
    corpus = []
    doc_ids = []

    # Create metadata for dense retrieval
    documents = []

    # Filter out rows with missing text
    valid_rows = df[df[primary_field].notna()]

    print(f"Found {len(valid_rows)} documents with valid text (out of {len(df)} total)")

    # Important metadata fields to preserve specifically
    important_fields = [
        'article_number', 'article_id', 'title', 'chapter', 'section',
        'reference', 'authority', 'description', 'date', 'version',
        'jurisdiction'
    ]

    # Build corpus and IDs
    for i, row in valid_rows.iterrows():
        # Get text content
        text = str(row[primary_field])

        # Skip empty texts
        if not text or len(text.strip()) < 20:  # Minimum 20 chars to be considered valid
            continue

        # Add to corpus
        corpus.append(text)
        doc_ids.append(row['id'])

        # Create comprehensive metadata dict
        metadata = {
            'id': row['id'],
            'code': row.get('code', ''),
            'source_file': row.get('source_file', '')
        }

        # Process each column in the dataframe to preserve all metadata
        for col in df.columns:
            if col not in ['id', 'code', 'source_file', primary_field]:
                if col in row and pd.notna(row[col]):
                    metadata[col] = row[col]

        # Extract article number from filename or id if not already present
        if 'article_number' not in metadata:
            # Try to extract from source file if available
            if 'source_file' in metadata:
                import re
                file_match = re.search(r'article[_\-]?(\d+[\w\-\.]*)', metadata['source_file'], re.IGNORECASE)
                if file_match:
                    metadata['article_number'] = file_match.group(1)

            # Try to extract from document ID
            if 'article_number' not in metadata:
                id_match = re.search(r'_(\d+)$', row['id'])
                if id_match:
                    metadata['article_number'] = id_match.group(1)

        # Try to find article number in text content if still not found
        if 'article_number' not in metadata:
            article_match = re.search(r'(?:Article|Art\.)\s+(\d+[\w\-\.]*)', text[:100])
            if article_match:
                metadata['article_number'] = article_match.group(1)

        # Process code name to make it more readable
        if 'code' in metadata and metadata['code']:
            metadata['code_display'] = metadata['code'].replace('_', ' ').title()

        # Create document for dense retrieval
        doc = Document(
            text=text,
            metadata=metadata
        )
        documents.append(doc)

    print(f"Prepared corpus with {len(corpus)} documents")

    # Create lookup dictionaries
    corpus_lookup = {doc_id: text for doc_id, text in zip(doc_ids, corpus)}

    # Print some sample document metadata to verify
    if documents:
        print("\nSample document metadata:")
        sample = documents[0].metadata
        for key, value in sample.items():
            print(f"  {key}: {value}")

    return {
        "corpus": corpus,
        "doc_ids": doc_ids,
        "corpus_lookup": corpus_lookup,
        "documents": documents
    }

# Prepare corpus with enhanced metadata
corpus_data = prepare_corpus(df)

Using 'text' as the primary text field
Found 1285 documents with valid text (out of 1285 total)
Prepared corpus with 1275 documents

Sample document metadata:
  id: doc_0
  code: constitution_marocaine_2011
  source_file: Constitution_marocaine_2011_full.json
  livre: Préambule
  article_no: Préambule
  article_number: 0
  code_display: Constitution Marocaine 2011


In [None]:
import pickle
import os

# Create the directory if it doesn't exist
os.makedirs("D:/a_PROJECTS/legal-rag-assistant/Models", exist_ok=True)

# Save the corpus_data dictionary to a pickle file
with open("/content/drive/MyDrive/legal-rag-assistant/knowledge_base/vector_store/corpus_lookup.pkl", "wb") as f:
    pickle.dump(corpus_data, f)

print("Corpus lookup saved successfully")

Corpus lookup saved successfully


## 3. Create BM25 Plus Sparse Retrieval Model

In [2]:
def preprocess_text(text, language='french'):
    """Preprocess text for sparse retrieval"""
    # Lowercase
    text = text.lower()

    # Tokenize
    tokens = word_tokenize(text, language=language)

    # Remove stopwords and punctuation
    french_stopwords = set(stopwords.words(language))
    tokens = [token for token in tokens if token.isalnum() and token not in french_stopwords]

    return tokens

class BM25PlusRetriever:
    """BM25 Plus based retrieval model."""
    def __init__(self):
        self.bm25 = None
        self.tokenized_corpus = None
        self.doc_ids = None

    def fit(self, corpus, doc_ids):
        """Build the BM25 index."""
        print("Tokenizing corpus for BM25 Plus...")
        self.tokenized_corpus = [preprocess_text(doc) for doc in tqdm(corpus)]
        self.doc_ids = doc_ids

        print("Building BM25 Plus index...")
        self.bm25 = BM25Plus(self.tokenized_corpus)
        print("BM25 Plus index built successfully")

    def retrieve(self, query, top_k=5):
        """Retrieve top-k relevant documents."""
        query_tokens = preprocess_text(query)
        scores = self.bm25.get_scores(query_tokens)
        top_indices = np.argsort(scores)[::-1][:top_k]

        results = [(self.doc_ids[idx], scores[idx]) for idx in top_indices]
        return results

    def save(self, path):
        """Save the model to disk."""
        os.makedirs(os.path.dirname(path), exist_ok=True)
        with open(path, 'wb') as f:
            pickle.dump({
                'bm25': self.bm25,
                'doc_ids': self.doc_ids
            }, f)
        print(f"BM25 Plus model saved to {path}")

    @classmethod
    def load(cls, path):
        """Load the model from disk."""
        with open(path, 'rb') as f:
            data = pickle.load(f)
            model = cls()
            model.bm25 = data['bm25']
            model.doc_ids = data['doc_ids']
            print(f"BM25 Plus model loaded from {path}")
            return model

## Create sparse model directory
# sparse_model_dir = '/mnt/d/a_PROJECTS/legal-rag-assistant/knowledge_base/vector_store/sparse'
# os.makedirs(sparse_model_dir, exist_ok=True)

# # Build or load BM25 Plus model
# bm25_plus_path = f"{sparse_model_dir}/bm25_plus.pkl"
# if os.path.exists(bm25_plus_path):
#     print("Loading existing BM25 Plus model...")
#     sparse_model = BM25PlusRetriever.load(bm25_plus_path)
# else:
#     print("Building new BM25 Plus model...")
#     sparse_model = BM25PlusRetriever()
#     sparse_model.fit(corpus_data['corpus'], corpus_data['doc_ids'])
#     sparse_model.save(bm25_plus_path)

## 4. Create LlamaIndex Dense Retrieval Model


In [4]:
class DenseRetriever:
    """Dense retrieval model using LlamaIndex."""
    def __init__(self, embed_model_name="intfloat/multilingual-e5-large"):
        """Initialize with a multilingual embedding model that works well for French"""
        # Check if GPU is available
        device = "cuda" if torch.cuda.is_available() else "cpu"
        print(f"Using device: {device} for embeddings")

        self.embed_model = HuggingFaceEmbedding(
            model_name=embed_model_name,
            device=device
        )
        Settings.embed_model = self.embed_model
        self.index = None
        self.doc_ids = None

    def fit(self, documents):
        """Build the vector index from documents."""
        print("Building dense vector index...")
        start_time = time.time()

        # Store doc IDs for retrieval
        self.doc_ids = [doc.metadata['id'] for doc in documents]

        # Build index
        vector_store = SimpleVectorStore()
        self.index = VectorStoreIndex.from_documents(
            documents,
            vector_store=vector_store,
            show_progress=True
        )

        print(f"Vector index built in {time.time() - start_time:.2f} seconds")

    def retrieve(self, query, top_k=5):
        """Retrieve top-k relevant documents."""
        retriever = self.index.as_retriever(similarity_top_k=top_k)
        results = retriever.retrieve(query)

        retrieved_docs = []
        for node in results:
            doc_id = node.metadata["id"]
            score = node.score if hasattr(node, "score") else 0.0
            retrieved_docs.append((doc_id, score))

        return retrieved_docs

    def save(self, path):
        """Save the index to disk."""
        os.makedirs(path, exist_ok=True)
        self.index.storage_context.persist(persist_dir=path)

        # Save doc_ids separately since they're not stored in the index
        with open(os.path.join(path, "doc_ids.pkl"), "wb") as f:
            pickle.dump(self.doc_ids, f)

        print(f"Dense vector index saved to {path}")

    @classmethod
    def load(cls, path, embed_model_name="intfloat/multilingual-e5-large"):
        """Load the index from disk."""
        from llama_index.core import load_index_from_storage
        from llama_index.core import StorageContext

        if not os.path.exists(path):
            raise FileNotFoundError(f"Path not found: {path}")

        model = cls(embed_model_name=embed_model_name)
        storage_context = StorageContext.from_defaults(persist_dir=path)
        model.index = load_index_from_storage(storage_context)

        # Load doc_ids
        with open(os.path.join(path, "doc_ids.pkl"), "rb") as f:
            model.doc_ids = pickle.load(f)

        print(f"Dense vector index loaded from {path}")
        return model

# # Create dense model directory
# dense_model_dir = '/mnt/d/a_PROJECTS/legal-rag-assistant/knowledge_base/vector_store/dense'
# dense_index_path = f"{dense_model_dir}/legal_dense_index"

# # Build or load Dense model
# if os.path.exists(dense_index_path):
#     print("Loading existing Dense vector index...")
#     dense_model = DenseRetriever.load(dense_index_path)
# else:
#     print("Building new Dense vector index...")
#     dense_model = DenseRetriever()
#     dense_model.fit(corpus_data['documents'])
#     dense_model.save(dense_index_path)

## 5. Reciprocal Rank Fusion Retriever

In [5]:
class ReciprocalRankFusionRetriever:
    """Implements Reciprocal Rank Fusion for combining multiple retrieval methods."""

    def __init__(self, retrievers, k=20):
        """
        Args:
            retrievers: List of retriever models
            k: Constant to prevent items with very low ranks from having too much influence
        """
        self.retrievers = retrievers
        self.k = k
        self.name = f"RRF(k={k})"

    def retrieve(self, query, top_k=5, per_retriever_k=50):
        """Retrieve documents using RRF ranking."""
        # Get results from all retrievers
        all_results = []
        for retriever in self.retrievers:
            results = retriever.retrieve(query, top_k=per_retriever_k)
            all_results.append(results)

        # Calculate RRF scores
        rrf_scores = {}

        for result_set in all_results:
            for rank, (doc_id, _) in enumerate(result_set):
                if doc_id not in rrf_scores:
                    rrf_scores[doc_id] = 0
                # RRF formula: 1 / (k + rank)
                rrf_scores[doc_id] += 1 / (self.k + rank + 1)  # +1 because rank is 0-indexed

        # Sort by RRF score and return top-k
        sorted_results = sorted(rrf_scores.items(), key=lambda x: x[1], reverse=True)
        return sorted_results[:top_k]

### 5.1 Initialise and save the hybrid strategy

In [None]:
best_hybrid = ReciprocalRankFusionRetriever([sparse_model, dense_model], k=20)

In [None]:
# Let's save the hybrid retrievers
hybrid_model_dir = '/content/drive/MyDrive/legal-rag-assistant/hybrid-retrieval'
os.makedirs(hybrid_model_dir, exist_ok=True)

# Save configuration for hybrid models
hybrid_config = {
    'rrf': {
        'type': 'rrf',
        'k': 20
    }
}

with open(f"{hybrid_model_dir}/hybrid_config.json", 'w') as f:
    json.dump(hybrid_config, f, indent=2)

print("Saved hybrid retriever configurations")

Saved hybrid retriever configurations


### 5.2 Test the hybrid retrieval strategy

In [None]:
def retrieve_and_display_legal_documents(query, top_k=3, include_text=True, max_text_length=500):
    """
    Retrieve the top k documents for a legal question using the best hybrid retriever
    and display them in an organized way with complete metadata.

    Args:
        query: User's legal question
        top_k: Number of documents to retrieve
        include_text: Whether to include the document text in the display
        max_text_length: Maximum length of displayed text (if include_text is True)

    Returns:
        Retrieved documents (for further processing if needed)
    """
    print(f"\n{'='*80}")
    print(f"QUERY: {query}")
    print(f"{'='*80}\n")

    # Retrieve documents using the best hybrid retriever
    start_time = time.time()
    results = best_hybrid.retrieve(query, top_k=top_k)
    retrieval_time = time.time() - start_time

    print(f"Retrieved {len(results)} documents in {retrieval_time:.2f} seconds\n")

    # Process and display each document
    retrieved_docs = []
    for i, (doc_id, score) in enumerate(results):
        # Get document text
        document_text = corpus_data['corpus_lookup'].get(doc_id, "Document text not found")

        # Get document metadata
        doc_idx = corpus_data['doc_ids'].index(doc_id) if doc_id in corpus_data['doc_ids'] else -1

        if doc_idx >= 0 and doc_idx < len(corpus_data['documents']):
            metadata = corpus_data['documents'][doc_idx].metadata
        else:
            metadata = {'id': doc_id}

        # Save document info
        retrieved_docs.append({
            'id': doc_id,
            'text': document_text,
            'score': score,
            'metadata': metadata
        })

        # Get formatted code name for display
        if 'code_display' in metadata:
            code_name = metadata['code_display']
        elif 'code' in metadata:
            code_name = metadata['code'].replace('_', ' ').title()
        else:
            code_name = "Unknown Code"

        # Get article number for header
        article_info = ""
        if 'article_number' in metadata:
            article_info = f"Article {metadata['article_number']} | "
        elif 'article_id' in metadata:
            article_info = f"Article {metadata['article_id']} | "

        # Display document header with core info
        print(f"\n{'-'*80}")
        print(f"DOCUMENT {i+1} | Score: {score:.4f} | {article_info}{code_name}")
        print(f"{'-'*80}")

        # Display the most important metadata fields first
        print("SOURCE INFORMATION:")

        # Display legal code (always important)
        if 'code' in metadata:
            display_code = metadata.get('code_display', metadata['code'].replace('_', ' ').title())
            print(f"• Legal Code:        {display_code}")

        # Display article number or reference (key identifiers)
        article_shown = False
        if 'article_number' in metadata:
            print(f"• Article Number:    {metadata['article_number']}")
            article_shown = True
        if 'article_id' in metadata and not article_shown:
            print(f"• Article ID:        {metadata['article_id']}")
            article_shown = True
        if 'reference' in metadata and not article_shown:
            print(f"• Reference:         {metadata['reference']}")

        # Display section/chapter info if available
        if 'section' in metadata:
            print(f"• Section:           {metadata['section']}")
        if 'chapter' in metadata:
            print(f"• Chapter:           {metadata['chapter']}")
        if 'title' in metadata:
            print(f"• Title:             {metadata['title']}")

        # Display authority/jurisdiction
        if 'authority' in metadata:
            print(f"• Authority:         {metadata['authority']}")
        if 'jurisdiction' in metadata:
            print(f"• Jurisdiction:      {metadata['jurisdiction']}")

        # Display version/date info
        if 'version' in metadata:
            print(f"• Version:           {metadata['version']}")
        if 'date' in metadata:
            print(f"• Date:              {metadata['date']}")

        # Display description if available
        if 'description' in metadata:
            desc = metadata['description']
            if len(desc) > 100:
                desc = desc[:97] + "..."
            print(f"• Description:       {desc}")

        # Display source file info
        if 'source_file' in metadata:
            print(f"• Source File:       {metadata['source_file']}")

        # Display document ID as fallback or for reference
        print(f"• Document ID:       {doc_id}")

        # Display any remaining metadata fields
        excluded_fields = [
            'id', 'code', 'code_display', 'article_number', 'article_id', 'reference',
            'section', 'chapter', 'title', 'authority', 'jurisdiction',
            'version', 'date', 'description', 'source_file'
        ]

        other_fields = [k for k in metadata.keys() if k not in excluded_fields]

        if other_fields:
            print("\nADDITIONAL METADATA:")
            for field in other_fields:
                value = metadata[field]
                # Truncate long values
                if isinstance(value, str) and len(value) > 100:
                    value = value[:97] + "..."
                print(f"• {field.replace('_', ' ').title()}:".ljust(20) + f" {value}")

        # Display document text if requested
        if include_text:
            print("\nDOCUMENT TEXT:")
            if len(document_text) > max_text_length:
                display_text = document_text[:max_text_length] + "..."
            else:
                display_text = document_text
            print(f"{display_text}")

    # Display citation format for all documents
    print(f"\n{'='*80}")
    print("CITATIONS:")
    for i, doc in enumerate(retrieved_docs):
        citation = format_legal_citation(doc['metadata'])
        print(f"[{i+1}] {citation}")
    print(f"{'='*80}\n")

    return retrieved_docs

def format_legal_citation(metadata):
    """
    Format metadata into a proper legal citation string.
    Formats citations in a standardized format depending on available metadata.
    """
    parts = []

    # Add code/law name if available with nice formatting
    if 'code_display' in metadata:
        parts.append(metadata['code_display'])
    elif 'code' in metadata:
        parts.append(metadata['code'].replace('_', ' ').title())

    # Add article reference with proper formatting
    if 'article_number' in metadata:
        parts.append(f"Article {metadata['article_number']}")
    elif 'article_id' in metadata:
        parts.append(f"Article {metadata['article_id']}")
    elif 'reference' in metadata:
        parts.append(f"{metadata['reference']}")

    # Add section/chapter if available
    section_info = []
    if 'section' in metadata:
        section_info.append(f"Section {metadata['section']}")
    if 'chapter' in metadata:
        section_info.append(f"Chapter {metadata['chapter']}")

    if section_info:
        parts.append(", ".join(section_info))

    # Add authority/jurisdiction
    if 'authority' in metadata:
        parts.append(f"{metadata['authority']}")

    # Add date/version if available
    if 'date' in metadata:
        parts.append(f"({metadata['date']})")
    elif 'version' in metadata:
        parts.append(f"({metadata['version']})")

    # If we still don't have any parts, use ID as fallback
    if not parts and 'id' in metadata:
        parts.append(f"Document ID: {metadata['id']}")

    return " — ".join(parts)

# Example usage
test_query = "Comment fonctionne la procédure de divorce par consentement mutuel?"
retrieved_docs = retrieve_and_display_legal_documents(test_query, top_k=3)


QUERY: Comment fonctionne la procédure de divorce par consentement mutuel?

Retrieved 3 documents in 0.11 seconds


--------------------------------------------------------------------------------
DOCUMENT 1 | Score: 0.0911 | Article 1007 | Code Famille 2016
--------------------------------------------------------------------------------
SOURCE INFORMATION:
• Legal Code:        Code Famille 2016
• Article Number:    1007
• Source File:       code_famille_2016_full.json
• Document ID:       doc_1007

ADDITIONAL METADATA:
• Livre:             LIVRE II: DE LA DISSOLUTION DU PACTE DE MARIAGE ET DE SES EFFETS
• Titre:             TITRE VI: DES CATEGORIES DE DIVORCE SOUS CONTROLE JUDICIAIRE ET DE DIVORCE JUDICIAIRE
• Chapitre:          CHAPITRE II: DU DIVORCE RÉVOCABLE (RIJII) ET DU DIVORCE IRRÉVOCABLE (BAIN)
• Article No:        Article 123

DOCUMENT TEXT:
Tout divorce du fait de l'époux est révocable, à l'exception du divorce
prononcé à la suite de deux précédents divorces successifs, du 

## 6. RAG legal Piepline

In [None]:
pip install vllm

Collecting vllm
  Downloading vllm-0.8.5.post1-cp38-abi3-manylinux1_x86_64.whl.metadata (14 kB)
Collecting blake3 (from vllm)
  Downloading blake3-1.0.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.2 kB)
Collecting fastapi>=0.115.0 (from fastapi[standard]>=0.115.0->vllm)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting prometheus-fastapi-instrumentator>=7.0.0 (from vllm)
  Downloading prometheus_fastapi_instrumentator-7.1.0-py3-none-any.whl.metadata (13 kB)
Collecting lm-format-enforcer<0.11,>=0.10.11 (from vllm)
  Downloading lm_format_enforcer-0.10.11-py3-none-any.whl.metadata (17 kB)
Collecting llguidance<0.8.0,>=0.7.9 (from vllm)
  Downloading llguidance-0.7.19-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.6 kB)
Collecting outlines==0.1.11 (from vllm)
  Downloading outlines-0.1.11-py3-none-any.whl.metadata (17 kB)
Collecting lark==1.2.2 (from vllm)
  Downloading lark-1.2.2-py3-none-any.whl.metadata (1.8 kB)

In [7]:
pip install pandas

Note: you may need to restart the kernel to use updated packages.


In [6]:
import os
import time
import json
import torch
import pandas as pd
from pathlib import Path
from typing import List, Dict, Any, Tuple, Optional
import pickle

# # For hybrid retrieval
# from Models.sparse.bm25_retriever import BM25PlusRetriever
# from Models.dense.dense_retriever import DenseRetriever
# from Models.hybrid.hybrid_retriever import ReciprocalRankFusionRetriever


In [7]:
# For VLLM inference
from vllm import LLM, SamplingParams
from transformers import AutoTokenizer

INFO 05-13 00:04:04 [importing.py:53] Triton module has been replaced with a placeholder.
INFO 05-13 00:04:04 [__init__.py:239] Automatically detected platform cuda.


In [8]:
class LegalRAGPipeline:
    """
    End-to-end RAG pipeline for legal question answering using:
    - Hybrid retrieval (BM25 + Dense)
    - Fine-tuned Qwen1.5 model
    """

    def __init__(
        self,
        model_path: str = "/mnt/d/a_PROJECTS/legal-rag-assistant/FineTuned-Qwen2/qwen-legal-assistant/merged_16bit",
        sparse_model_path: str = "/mnt/d/a_PROJECTS/legal-rag-assistant/knowledge_base/vector_store/sparse/bm25_plus.pkl",
        dense_model_path: str = "/mnt/d/a_PROJECTS/legal-rag-assistant/knowledge_base/vector_store/dense/legal_dense_index",
        hybrid_config_path: str = "/mnt/d/a_PROJECTS/legal-rag-assistant/hybrid-retrieval/hybrid_config.json",
        corpus_lookup_path: str = "/mnt/d/a_PROJECTS/legal-rag-assistant/knowledge_base/vector_store/corpus_lookup.pkl",
        max_gpu_memory: float = 0.85,
        top_k: int = 3
    ):
        """Initialize the legal RAG pipeline with the specified models and parameters"""
        self.top_k = top_k

        print("Initializing Legal RAG Pipeline...")

        # Step 1: Load the lookup dictionary for document retrieval
        print("Loading corpus lookup...")
        import pickle
        with open(corpus_lookup_path, 'rb') as f:
            self.corpus_data = pickle.load(f)

        print(f"Loaded corpus with {len(self.corpus_data['doc_ids'])} documents")

        # Step 2: Load the retrieval models
        print("Loading retrieval models...")
        self._load_retrieval_models(sparse_model_path, dense_model_path, hybrid_config_path)

        # Step 3: Load the LLM for generation
        print("Loading Qwen model with VLLM...")
        self._load_llm(model_path, max_gpu_memory)

        print("Legal RAG Pipeline initialized successfully!")

    def _load_retrieval_models(self, sparse_model_path, dense_model_path, hybrid_config_path):
        """Load and configure retrieval models"""
        # Load BM25Plus retriever
        print("Loading BM25+ retriever...")
        self.sparse_model = BM25PlusRetriever.load(sparse_model_path)

        # Load Dense retriever
        print("Loading dense retriever...")
        self.dense_model = DenseRetriever.load(dense_model_path)

        # Load hybrid configuration
        with open(hybrid_config_path, 'r') as f:
            hybrid_config = json.load(f)

        # Create RRF hybrid retriever with configured k parameter
        self.hybrid_retriever = ReciprocalRankFusionRetriever(
            [self.sparse_model, self.dense_model],
            k=hybrid_config.get('rrf_k', 60)
        )

    def _load_llm(self, model_path, max_gpu_memory):
        """Load the LLM model with VLLM for efficient inference"""
        # Load tokenizer
        self.tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)

        # Check for GPU and determine appropriate params
        if torch.cuda.is_available():
            gpu_name = torch.cuda.get_device_name(0)
            print(f"Using GPU: {gpu_name}")

            # Lower context window on lower-end GPUs
            max_model_len = 4096

            # Load model with VLLM
            self.llm = LLM(
                model=model_path,
                tensor_parallel_size=1,  # Using single GPU
                gpu_memory_utilization=max_gpu_memory,
                max_model_len=max_model_len,
                trust_remote_code=True
            )
        else:
            print("WARNING: No GPU detected. This will be extremely slow.")
            self.llm = LLM(
                model=model_path,
                tensor_parallel_size=1,
                max_model_len=2048,  # Conservative for CPU
                trust_remote_code=True
            )

    def _create_legal_system_prompt(self):
        """Create the system prompt for the legal assistant"""
        return """You are LegalAssistant, a professional legal advisor specializing in Moroccan law.

When answering questions:
- Base your answers strictly on the provided legal context
- Cite specific articles mentioned in the context by code name and article number
- If information is insufficient, state clearly "Based on the provided context, I don't have enough information to answer this question completely" rather than guessing
- Be concise and direct, avoiding unnecessary elaboration
- Use clear language that non-lawyers can understand
- Structure complex answers with numbered points for clarity
- Maintain a professional, helpful tone throughout

Your goal is to provide accurate legal information without hallucination or speculation."""

    def retrieve_documents(self, query: str) -> List[Dict]:
        """Retrieve relevant documents using hybrid retrieval"""
        # Get document IDs and scores from the hybrid retriever
        results = self.hybrid_retriever.retrieve(query, top_k=self.top_k)

        # Format results
        documents = []
        for doc_id, score in results:
            document_text = self.corpus_data['corpus_lookup'].get(doc_id, "")

            # Find the original document to get metadata
            doc_idx = self.corpus_data['doc_ids'].index(doc_id) if doc_id in self.corpus_data['doc_ids'] else -1

            if doc_idx >= 0 and doc_idx < len(self.corpus_data['documents']):
                metadata = self.corpus_data['documents'][doc_idx].metadata
            else:
                metadata = {'id': doc_id}

            documents.append({
                'id': doc_id,
                'text': document_text,
                'score': score,
                'metadata': metadata
            })

        return documents

    def format_context(self, documents: List[Dict]) -> str:
        """Format retrieved documents into a context string for the LLM"""
        context_parts = []

        for i, doc in enumerate(documents):
            # Format article reference
            article_ref = self._format_article_reference(doc['metadata'])

            # Add to context with clear separation
            context_parts.append(f"[Document {i+1}] {article_ref}\n{doc['text']}")

        return "\n\n" + "\n\n".join(context_parts)

    def _format_article_reference(self, metadata: Dict) -> str:
        """Format article reference with code name and article number"""
        parts = []

        # Add code name with formatting
        if 'code_display' in metadata:
            parts.append(metadata['code_display'])
        elif 'code' in metadata:
            parts.append(metadata['code'].replace('_', ' ').title())

        # Add article number
        if 'article_number' in metadata:
            parts.append(f"Article {metadata['article_number']}")
        elif 'article_id' in metadata:
            parts.append(f"Article {metadata['article_id']}")
        elif 'reference' in metadata:
            parts.append(metadata['reference'])

        return " - ".join(parts) if parts else "Unknown Reference"

    def format_legal_citation(self, metadata: Dict) -> str:
        """Format metadata into a proper legal citation string"""
        parts = []

        # Add code/law name if available with nice formatting
        if 'code_display' in metadata:
            parts.append(metadata['code_display'])
        elif 'code' in metadata:
            parts.append(metadata['code'].replace('_', ' ').title())

        # Add article reference with proper formatting
        if 'article_number' in metadata:
            parts.append(f"Article {metadata['article_number']}")
        elif 'article_id' in metadata:
            parts.append(f"Article {metadata['article_id']}")
        elif 'reference' in metadata:
            parts.append(f"{metadata['reference']}")

        # Add section/chapter if available
        section_info = []
        if 'section' in metadata:
            section_info.append(f"Section {metadata['section']}")
        if 'chapter' in metadata:
            section_info.append(f"Chapter {metadata['chapter']}")

        if section_info:
            parts.append(", ".join(section_info))

        # Add authority/jurisdiction
        if 'authority' in metadata:
            parts.append(f"{metadata['authority']}")

        # Add date/version if available
        if 'date' in metadata:
            parts.append(f"({metadata['date']})")
        elif 'version' in metadata:
            parts.append(f"({metadata['version']})")

        # If we still don't have any parts, use ID as fallback
        if not parts and 'id' in metadata:
            parts.append(f"Document ID: {metadata['id']}")

        return " — ".join(parts)

    def answer_question(self, query: str, stream: bool = True) -> Tuple[str, List[Dict], float]:
        """
        Answer a legal question using RAG pipeline

        Args:
            query: The legal question to answer
            stream: Whether to stream the response (print tokens as they're generated)

        Returns:
            response: The LLM's response
            documents: The retrieved documents
            timing: Dict of timing information
        """
        timing = {}

        # Step 1: Retrieve relevant documents
        start_retrieve = time.time()
        documents = self.retrieve_documents(query)
        timing['retrieve'] = time.time() - start_retrieve

        # Step 2: Format context for the LLM
        start_format = time.time()
        context = self.format_context(documents)
        timing['format'] = time.time() - start_format

        # Step 3: Create prompt for LLM
        system_prompt = self._create_legal_system_prompt()
        prompt = f"# Question: {query}\n\n# Relevant legal context:{context}\n\nPlease answer based only on this information."

        # Create chat messages
        messages = [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": prompt}
        ]

        # Apply chat template
        text = self.tokenizer.apply_chat_template(messages, tokenize=False)

        # Step 4: Generate answer with the LLM
        sampling_params = SamplingParams(
            temperature=0.4,
            top_p=0.80,
            repetition_penalty=1.2,
            top_k=50,
            max_tokens=512,
        )

        # Start timing
        start_generate = time.time()

        # Generate with or without streaming
        if stream:
            print("\n\033[1mGenerating response...\033[0m")
            output = ""
            for output_obj in self.llm.generate(text, sampling_params=sampling_params):
                new_text = output_obj.outputs[0].text
                new_token = new_text[len(output):]
                output = new_text
                print(new_token, end="", flush=True)
            print("\n")
            response = output
        else:
            outputs = self.llm.generate(text, sampling_params=sampling_params)
            response = outputs[0].outputs[0].text

        timing['generate'] = time.time() - start_generate
        timing['total'] = timing['retrieve'] + timing['format'] + timing['generate']

        return response, documents, timing

    def display_results(self, query: str, response: str, documents: List[Dict], timing: Dict,
                        show_timing: bool = True, show_full_docs: bool = True):
        """
        Display the results in a well-formatted way

        Args:
            query: The original question
            response: The generated response
            documents: The retrieved documents
            timing: Dictionary with timing information
            show_timing: Whether to display timing information
            show_full_docs: Whether to display full document text or just snippets
        """
        # Print the query
        print(f"\n{'='*100}")
        print(f"\033[1mQUESTION:\033[0m {query}")
        print(f"{'='*100}\n")

        # Print the answer
        print(f"\033[1mANSWER:\033[0m\n{response}")
        print(f"\n{'-'*100}\n")

        # Print the sources
        print(f"\033[1mSOURCES:\033[0m\n")
        for i, doc in enumerate(documents):
            # Format citation
            citation = self.format_legal_citation(doc['metadata'])

            print(f"\033[1m[{i+1}] {citation}\033[0m")

            if show_full_docs:
                print(f"\nRelevance Score: {doc['score']:.4f}\n")
                print(doc['text'])
                print(f"\n{'-'*50}\n")
            else:
                # Show snippet
                snippet = doc['text'][:300] + "..." if len(doc['text']) > 300 else doc['text']
                print(f"{snippet}\n")

        # Print timing information if requested
        if show_timing:
            print(f"\n{'='*100}")
            print(f"\033[1mPERFORMANCE METRICS:\033[0m")
            print(f"Retrieval: {timing['retrieve']:.3f}s")
            print(f"Context formatting: {timing['format']:.3f}s")
            print(f"Response generation: {timing['generate']:.3f}s")
            print(f"Total time: {timing['total']:.3f}s")

        print(f"\n{'='*100}")

def main():
    """Interactive demo of the Legal RAG Pipeline"""
    print("\n\033[1m====== Legal RAG Assistant ======\033[0m\n")

    # Initialize the pipeline
    pipeline = LegalRAGPipeline()

    print("\nLegal RAG Assistant is ready! Ask a legal question (or type 'quit' to exit).\n")

    while True:
        query = input("\033[1mQuestion:\033[0m ")

        if query.lower() in ('quit', 'exit', 'q'):
            print("\nThank you for using Legal RAG Assistant. Goodbye!")
            break

        # Answer the question
        response, documents, timing = pipeline.answer_question(query, stream=True)

        # Display the results
        pipeline.display_results(query, response, documents, timing)

In [9]:
pipeline = LegalRAGPipeline()

Initializing Legal RAG Pipeline...
Loading corpus lookup...
Loaded corpus with 1275 documents
Loading retrieval models...
Loading BM25+ retriever...
BM25 Plus model loaded from /mnt/d/a_PROJECTS/legal-rag-assistant/knowledge_base/vector_store/sparse/bm25_plus.pkl
Loading dense retriever...
Using device: cuda for embeddings


model.safetensors:  44%|####4     | 1.79G/4.03G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/418 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/280 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/201 [00:00<?, ?B/s]

Dense vector index loaded from /mnt/d/a_PROJECTS/legal-rag-assistant/knowledge_base/vector_store/dense/legal_dense_index
Loading Qwen model with VLLM...
Using GPU: NVIDIA GeForce GTX 1650
INFO 05-13 00:06:19 [config.py:717] This model supports multiple tasks: {'generate', 'embed', 'classify', 'reward', 'score'}. Defaulting to 'generate'.
INFO 05-13 00:06:19 [llm_engine.py:240] Initializing a V0 LLM engine (v0.8.5.post1) with config: model='/mnt/d/a_PROJECTS/legal-rag-assistant/FineTuned-Qwen2/qwen-legal-assistant/merged_16bit', speculative_config=None, tokenizer='/mnt/d/a_PROJECTS/legal-rag-assistant/FineTuned-Qwen2/qwen-legal-assistant/merged_16bit', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, tokenizer_revision=None, trust_remote_code=True, dtype=torch.float16, max_seq_len=4096, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_ea

Loading pt checkpoint shards:   0% Completed | 0/1 [00:00<?, ?it/s]


INFO 05-13 00:07:43 [loader.py:458] Loading weights took 76.88 seconds
INFO 05-13 00:07:44 [model_runner.py:1140] Model loading took 3.4654 GiB and 80.589109 seconds
INFO 05-13 00:08:19 [worker.py:287] Memory profiling takes 34.78 seconds
INFO 05-13 00:08:19 [worker.py:287] the current vLLM instance can use total_gpu_memory (4.00GiB) x gpu_memory_utilization (0.85) = 3.40GiB
INFO 05-13 00:08:19 [worker.py:287] model weights take 3.47GiB; non_torch_memory takes -2.40GiB; PyTorch activation peak memory takes 1.40GiB; the rest of the memory reserved for KV Cache is 0.93GiB.
INFO 05-13 00:08:19 [executor_base.py:112] # cuda blocks: 319, # CPU blocks: 1365
INFO 05-13 00:08:19 [executor_base.py:117] Maximum concurrency for 4096 tokens per request: 1.25x
INFO 05-13 00:08:25 [model_runner.py:1450] Capturing cudagraphs for decoding. This may lead to unexpected consequences if the model is not static. To run the model in eager mode, set 'enforce_eager=True' or use '--enforce-eager' in the CLI. I

Capturing CUDA graph shapes:   0%|          | 0/35 [00:00<?, ?it/s]

INFO 05-13 00:10:01 [model_runner.py:1592] Graph capturing finished in 96 secs, took 0.00 GiB
INFO 05-13 00:10:01 [llm_engine.py:437] init engine (profile, create kv cache, warmup model) took 137.11 seconds
Legal RAG Pipeline initialized successfully!


In [10]:
query = "quelle est la sanction si un outrage à un magistrat est commis lors d'une audience au tribunal ?"

# Answer the question
response, documents, timing = pipeline.answer_question(query, stream=True)


[1mGenerating response...[0m


Processed prompts:   0%|          | 0/1 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

assistant
# Answer: Si vous êtes un magistrat : Vous pouvez être puni d’une amende de 250 à 500 EUR (pour toute personne) ; Vous devez payer cette amende; Vous recevez une amende de 750 à 1000 EUR (par exemple). Pour plus d’informations, voyez notre fiche « Qu’est-ce qui se passe si je suis un magistrat ? ». Si vous êtes un fonctionnaire public : Vous pouvez être puni d’un emprisonnement de 1 à 2 ans (et d’une amende de 250 à 500 EUR); Vous devez payer cette amende; Vous recevrez une amende de 750 à 1000 EUR (par exemple). Pour plus d’information, voyez notre fiche « Qu'est-ce qui se passe s’il y a un service public? » Si vous êtes un commandante : Vous pouvez être puni d’une amende de 250 à 500 EUR (pour toutes personnes); Vous devez payer cette amende; Vous recevrez une amende de 750 à 1000 EUR (par exemple). Pour plus d’informations, voyez notre fiche « Qu’est-ce qui se passe si j’ai commis un crime contre un enfant mineur ? ». Si vous êtes un militaire : Vous pouvez être puni d’une

In [11]:
query = "Quels sont les cas où une femme peut demander le divorce sans l'accord du mari ?"

# Answer the question
response, documents, timing = pipeline.answer_question(query, stream=True)


[1mGenerating response...[0m


Processed prompts:   0%|          | 0/1 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

KeyboardInterrupt: 

In [None]:

query = "Quelles sont les peines prévues pour le vol simple"

# Answer the question
response, documents, timing = pipeline.answer_question(query, stream=True)


[1mGenerating response...[0m


Processed prompts:   0%|          | 0/1 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

assistant
# Answer: Les peines prévues pour le vol simple sont différentes selon le type de vol. Pour le vol simple, les peines prévues sont les suivantes : Peine de réclusion : 2 ans à 10 ans de prison, ou 1 an à 2 ans de prison, si la peine de réclusion est inférieure à 1 an. Peine d'exécution : 1 an à 5 ans de prison, ou 1 an à 2 ans de prison, si la peine d'exécution est inférieure à 1 an. Peine d'obligeer ou de faire des actes de force : 1 an à 5 ans de prison, ou 1 an à 2 ans de prison, si la peine d'obligeer ou de faire des actes de force est inférieure à 1 an. Peine de dégradation de la personne : 1 an à 5 ans de prison, ou 1 an à 2 ans de prison, si la peine de dégradation de la personne est inférieure à 1 an. Peine de dégradation de la personne : 1 an à 5 ans de prison, ou 1 an à 2 ans de prison, si la peine de dégradation de la personne est inférieure à 1 an. Peine de dégradation de la personne : 1 an à 5 ans de prison, ou 1 an à 2 ans de prison, si la peine de dégradation d

In [None]:

query = "Quelles sont les peines prévues pour le vol simple selon le Code pénal marocain ?"

# Answer the question
response, documents, timing = pipeline.answer_question(query, stream=True)


[1mGenerating response...[0m


Processed prompts:   0%|          | 0/1 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

assistant
# Answer: Avant d’aller plus loin Seuls les victimes de violence sexuelle et les victimes de violence psychologique sont soumises aux peines. Les victimes de violence sexuelle sont soumises aux peines de 1 à 3 ans de prison. Les victimes de violence psychologique sont soumises aux peines de 1 à 3 ans de prison. Les victimes de violence visée aux 3 documents sont soumises aux peines de 1 à 3 ans de prison. Les victimes de violence visée aux 2 documents sont soumises aux peines de 5 à 30 ans de prison. Les victimes de violence visée aux 1 document sont soumises aux peines de 100.000 dirhams à 100.000 dirhams. Les victimes de violence visée aux 2 documents sont soumises aux peines de 100.000 dirhams à 100.000 dirhams. Les victimes de violence visée aux 3 documents sont soumises aux peines de 100.000 dirhams à 100.000 dirhams. Les victimes de violence visée aux 1 document sont soumises aux peines de 100.000 dirhams à 100.000 dirhams. Les victimes de violence visée aux 2 documents

In [None]:
# Display the results
pipeline.display_results(query, response, documents, timing)


[1mQUESTION:[0m Quelles sont les peines prévues pour le vol simple

[1mANSWER:[0m
assistant
# Answer: Les peines prévues pour le vol simple sont différentes selon le type de vol. Pour le vol simple, les peines prévues sont les suivantes : Peine de réclusion : 2 ans à 10 ans de prison, ou 1 an à 2 ans de prison, si la peine de réclusion est inférieure à 1 an. Peine d'exécution : 1 an à 5 ans de prison, ou 1 an à 2 ans de prison, si la peine d'exécution est inférieure à 1 an. Peine d'obligeer ou de faire des actes de force : 1 an à 5 ans de prison, ou 1 an à 2 ans de prison, si la peine d'obligeer ou de faire des actes de force est inférieure à 1 an. Peine de dégradation de la personne : 1 an à 5 ans de prison, ou 1 an à 2 ans de prison, si la peine de dégradation de la personne est inférieure à 1 an. Peine de dégradation de la personne : 1 an à 5 ans de prison, ou 1 an à 2 ans de prison, si la peine de dégradation de la personne est inférieure à 1 an. Peine de dégradation de la per