<a href="https://colab.research.google.com/github/hadil56165M/READ.ME/blob/main/rag.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q pypdf faiss-cpu sentence-transformers requests langchain-community

import requests
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
import tempfile
import os

# Try multiple reliable sources for Tunisia Constitution
pdf_sources = [
    "https://www.legislation.tn/sites/default/files/news/constitution-b-a-t.pdf",
    "https://www.constituteproject.org/constitution/Tunisia_2014.pdf",
    "https://aceproject.org/ero-en/regions/africa/TN/tunisia-constitution-2014-english-version/view"
]

def download_pdf(url):
    """Try to download PDF from a URL"""
    try:
        response = requests.get(url, timeout=30)
        response.raise_for_status()
        return response.content
    except:
        return None

# Try each source until one works
pdf_content = None
for source in pdf_sources:
    print(f"Trying source: {source}")
    pdf_content = download_pdf(source)
    if pdf_content:
        print("✓ PDF downloaded successfully!")
        break

if not pdf_content:
    # Manual upload fallback
    print("All online sources failed. Please upload the Tunisia Constitution PDF:")
    from google.colab import files
    uploaded = files.upload()
    for filename in uploaded.keys():
        if filename.endswith('.pdf'):
            with open(filename, 'rb') as f:
                pdf_content = f.read()
            break

# Save to temporary file and load
with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
    tmp_file.write(pdf_content)
    tmp_path = tmp_file.name

loader = PyPDFLoader(tmp_path)
pages = loader.load()
os.unlink(tmp_path)  # Clean up

# Process text
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
)
texts = text_splitter.split_documents(pages)

# Create search engine
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
db = FAISS.from_documents(texts, embeddings)

def search_constitution(question):
    """Search and display relevant constitution passages"""
    results = db.similarity_search(question, k=2)
    print(f"\n🔍 Results for: '{question}'\n")

    for i, doc in enumerate(results, 1):
        page_num = doc.metadata.get('page', 0) + 1
        content = ' '.join(doc.page_content.strip().split())
        print(f"📖 Page {page_num}:")
        print(content[:400] + "..." if len(content) > 400 else content)
        print("-" * 50)

# Main interface
print("\n" + "="*60)
print("🇹🇳  TUNISIA CONSTITUTION SEARCH TOOL  🇹🇳")
print("="*60)
print("\nAsk questions about Tunisia's Constitution!")
print("Examples:")
print("- What are the president's powers?")
print("- How are judges appointed?")
print("- What rights do citizens have?")
print("- Type 'quit' to exit")
print("\n" + "="*60)

while True:
    try:
        question = input("\n❓ Your question: ").strip()
        if question.lower() in ['quit', 'exit', 'q']:
            print("Goodbye! 👋")
            break
        if question:
            search_constitution(question)
    except KeyboardInterrupt:
        print("\nGoodbye! 👋")
        break
    except Exception as e:
        print(f"Error: {e}")

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/2.5 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m2.5/2.5 MB[0m [31m88.5 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m54.8 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/45.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.2/45.2 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/50.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.9/50.9 kB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m
[?25hTrying source: https://www.legislation.tn/sites/default/files/news/constitution-b-a-t.pdf
Trying source: https://www.constituteproject.org/constitution/Tunisia_2014.pdf

  embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]


🇹🇳  TUNISIA CONSTITUTION SEARCH TOOL  🇹🇳

Ask questions about Tunisia's Constitution!
Examples:
- What are the president's powers?
- How are judges appointed?
- What rights do citizens have?
- Type 'quit' to exit


❓ Your question:  What are the president's powers?

🔍 Results for: 'What are the president's powers?'

📖 Page 18:
He/she also has the following powers: Dissolving the Assembly of the Representatives of the People in accordance withprovisions of the Constitution. The Assembly shall not be dissolved during the sixmonths following granting con dence to the government, or the six monthsfollowing legislative elections, or during the last six months of the presidential orparliamentary terms; Chairing the National S...
--------------------------------------------------
📖 Page 24:
The outgoing government shall continue to administer government business under thesupervision of one of its members to be selected by the Council of Ministers andnominated by the President of the Republic