In [2]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
!pip install -U langchain-huggingface


Collecting langchain-huggingface
  Downloading langchain_huggingface-0.3.0-py3-none-any.whl.metadata (996 bytes)
Downloading langchain_huggingface-0.3.0-py3-none-any.whl (27 kB)
Installing collected packages: langchain-huggingface
Successfully installed langchain-huggingface-0.3.0


In [4]:
!pip install faiss-cpu
!pip install transformers
!pip install sentence-transformers
!pip install numpy


Collecting faiss-cpu
  Downloading faiss_cpu-1.11.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.8 kB)
Downloading faiss_cpu-1.11.0-cp311-cp311-manylinux_2_28_x86_64.whl (31.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.3/31.3 MB[0m [31m20.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.11.0
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from to

In [5]:
!pip install streamlit pyngrok transformers accelerate bitsandbytes


Collecting streamlit
  Downloading streamlit-1.46.1-py3-none-any.whl.metadata (9.0 kB)
Collecting pyngrok
  Downloading pyngrok-7.2.11-py3-none-any.whl.metadata (9.4 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.46.1-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.46.1-py3-none-any.whl (10.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.1/10.1 MB[0m [31m95.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyngrok-7.2.11-py3-none-any.whl (25 kB)
Downloading bitsandbytes-0.46.1-py3-none-manylinux_2_24_x86_64.whl (72.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [6]:
!pip install pyngrok



In [None]:
!ngrok config add-authtoken ****************************

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [8]:
%%writefile rag_ui.py

import streamlit as st
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from sentence_transformers import SentenceTransformer
from rank_bm25 import BM25Okapi
import faiss
import pickle
import numpy as np

# ✅ Toujours au tout début
st.set_page_config(page_title="نظام RAG عربي", layout="centered")

# === Initialiser l'historique ===
if "history" not in st.session_state:
    st.session_state.history = []

# === Choix de modèle (chemins) ===
model_paths = {
    "distilgpt2": "/content/drive/MyDrive/googlesmall",
    "aragpt2-medium": "/content/drive/MyDrive/merged_aragpt2-medium_arabic",
    "gptneo": "/content/drive/MyDrive/final_model_gptneo_arabic_gen",

}

# === Sidebar : sélection de modèle + paramètres ===
with st.sidebar:
    st.header("⚙️ إعدادات")
    selected_model_name = st.selectbox("اختر النموذج:", list(model_paths.keys()))
    top_k = st.slider("عدد الوثائق المسترجعة:", 1, 10, 3)
    alpha = st.slider("نسبة الدمج (FAISS / BM25):", 0.0, 1.0, 0.5, 0.1)
    max_tokens = st.slider("عدد الكلمات في الجواب:", 10, 200, 50)

    st.markdown("---")
    st.header("🗂️ سجل الأسئلة")
    if st.session_state.history:
        for i, (q, r) in enumerate(reversed(st.session_state.history[-5:]), 1):
            st.markdown(f"**{i}. سؤال:** {q}")
            st.markdown(f"👉 {r}")
    else:
        st.write("لا يوجد سجل بعد.")

# === Chargement des ressources ===
@st.cache_resource
def load_resources(model_name):
    model_path = model_paths[model_name]
    faiss_path = "/content/drive/MyDrive/arabic_qa_generation_faiss"

    model = AutoModelForCausalLM.from_pretrained(model_path).to("cpu")
    tokenizer = AutoTokenizer.from_pretrained(model_path)




    embedder = SentenceTransformer('sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2')

    index = faiss.read_index(f"{faiss_path}/faiss.index")
    with open(f"{faiss_path}/documents.pkl", "rb") as f:
        documents = pickle.load(f)

    tokenized_corpus = [doc.split() for doc in documents]
    bm25 = BM25Okapi(tokenized_corpus)

    return tokenizer, model, embedder, index, bm25, documents

tokenizer, model, embedder, index, bm25, documents = load_resources(selected_model_name)

# === Pipeline RAG Hybride ===
def hybrid_rag_pipeline(query, top_k=3, max_new_tokens=50, alpha=0.5):
    query_embedding = embedder.encode([query], convert_to_numpy=True)
    D, I = index.search(query_embedding, k=top_k*2)
    dense_docs = [(documents[i], D[0][idx]) for idx, i in enumerate(I[0])]

    bm25_scores = bm25.get_scores(query.split())
    bm25_top_indices = np.argsort(bm25_scores)[::-1][:top_k*2]
    sparse_docs = [(documents[i], bm25_scores[i]) for i in bm25_top_indices]

    hybrid_scores = {}
    for doc, score in dense_docs:
        hybrid_scores[doc] = alpha * score
    for doc, score in sparse_docs:
        if doc in hybrid_scores:
            hybrid_scores[doc] += (1 - alpha) * score
        else:
            hybrid_scores[doc] = (1 - alpha) * score

    top_contexts = sorted(hybrid_scores.items(), key=lambda x: x[1], reverse=True)[:top_k]
    combined_context = "\n".join([doc for doc, _ in top_contexts])

    prompt = f"""السياق:
{combined_context}

السؤال: {query}
أجب على السؤال فقط، بجملة قصيرة ودقيقة. لا تكرر السؤال أو السياق.
الإجابة:"""

    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
    inputs = {k: v.to('cpu') for k, v in inputs.items()}
    output = model.generate(**inputs, max_new_tokens=max_new_tokens)
    decoded = tokenizer.decode(output[0], skip_special_tokens=True)

    answer = decoded.split("الإجابة:")[-1].strip().split("\n")[0].strip(" .،")
    return f"الإجابة: {answer}"

# === Entrée utilisateur et affichage résultat ===
st.title("🧠 نظام الأسئلة والأجوبة (RAG) - استرجاع هجين")

user_question = st.text_input("🖊️ أدخل سؤالك بالعربية:", placeholder="مثال: ما هي عاصمة المغرب؟")

if st.button("🔍 احصل على الإجابة"):
    if user_question.strip():
        with st.spinner("🔎 يتم توليد الإجابة..."):
            result = hybrid_rag_pipeline(user_question, top_k=top_k, max_new_tokens=max_tokens, alpha=alpha)
            st.session_state.history.append((user_question, result))
        st.markdown("### 🎯 الإجابة:")
        st.success(result)
    else:
        st.warning("⚠️ الرجاء إدخال سؤال صالح.")


Writing rag_ui.py


In [9]:
!pkill -f ngrok


In [11]:
!pip install rank_bm25

Collecting rank_bm25
  Downloading rank_bm25-0.2.2-py3-none-any.whl.metadata (3.2 kB)
Downloading rank_bm25-0.2.2-py3-none-any.whl (8.6 kB)
Installing collected packages: rank_bm25
Successfully installed rank_bm25-0.2.2


In [None]:
from pyngrok import ngrok
public_url = ngrok.connect(8501)
print("🚀 Interface Streamlit :", public_url)

!streamlit run rag_ui.py --server.enableCORS false --server.enableXsrfProtection false


🚀 Interface Streamlit : NgrokTunnel: "https://0310-34-125-114-98.ngrok-free.app" -> "http://localhost:8501"

Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://34.125.114.98:8501[0m
[0m
2025-07-06 18:03:56.546218: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1751825036.584660    4526 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1751825036.596780    4526 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been r