<a href="https://colab.research.google.com/github/ahmedaltabie/telecom-dashboard/blob/main/NLP_final_project_(1).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:

!pip install datasets transformers sentence-transformers faiss-cpu python-dotenv openai



In [2]:

import os
from google.colab import userdata

os.environ["GEMINI_API_KEY"] = userdata.get("Ahmed_Altabie")

In [3]:
import os
import re
import numpy as np
import faiss
from datasets import load_dataset
from sentence_transformers import SentenceTransformer
# The 'openai' library is used, but configured to talk to the Gemini API
from openai import OpenAI, RateLimitError
from google.colab import userdata





In [4]:
# --- START: API KEY AND CLIENT SETUP FOR GEMINI ---

# 1) Get the Gemini API key from Colab secrets.
# IMPORTANT: Replace "GEMINI_API_KEY" with the exact name you use in Colab Secrets
import os

GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")

if GEMINI_API_KEY is None:
    raise ValueError("API key not found")



# 2) Initialize the OpenAI client, pointing to the Gemini API endpoint
# This tells the library to send requests to Google's service.
client = OpenAI(
    api_key=GEMINI_API_KEY,
    base_url="https://generativelanguage.googleapis.com/v1beta/openai/"
)

# --- END: API KEY AND CLIENT SETUP FOR GEMINI -

In [5]:

dataset = load_dataset("squad")
subset = dataset["train"].select(range(500))

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [6]:
def clean_text(text):
    text = text.lower()
    text = re.sub(r"[^a-z0-9\s]", "", text)
    text = re.sub(r"\s+", " ", text).strip()
    return text

texts = []
for item in subset:
    texts.append(clean_text(item["context"]))
    texts.append(clean_text(item["question"]))

texts[:2]

['architecturally the school has a catholic character atop the main buildings gold dome is a golden statue of the virgin mary immediately in front of the main building and facing it is a copper statue of christ with arms upraised with the legend venite ad me omnes next to the main building is the basilica of the sacred heart immediately behind the basilica is the grotto a marian place of prayer and reflection it is a replica of the grotto at lourdes france where the virgin mary reputedly appeared to saint bernadette soubirous in 1858 at the end of the main drive and in a direct line that connects through 3 statues and the gold dome is a simple modern stone statue of mary',
 'to whom did the virgin mary allegedly appear in 1858 in lourdes france']

In [7]:
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
embeddings = embedding_model.encode(texts)

index = faiss.IndexFlatL2(embeddings.shape[1])
index.add(np.array(embeddings))

In [8]:
def search_docs(query, k=3):
    query = clean_text(query)
    query_embedding = embedding_model.encode([query])
    # The faiss index requires the query embedding to be 2D (batch size, dimension)
    _, indices = index.search(np.array(query_embedding).astype(np.float32), k)
    return [texts[i] for i in indices[0]]

def rag_chatbot(query):
    try:
        docs = search_docs(query)
        context = "\n".join(docs)

        prompt = f"""Context:
{context}

Question:
{query}
"""
        # --- MODEL NAME CHANGE ---
        # Changed the model name to gemini-2.5-flash, which is compatible
        # with the OpenAI chat completions API through the base_url.
        response = client.chat.completions.create(
            model="gemini-2.5-flash",
            messages=[{"role": "user", "content": prompt}]
        )
        return response.choices[0].message.content

    except RateLimitError:
        return "API quota exceeded. Please check billing or use another API key."
    except Exception as e:
        # Catch other potential errors, such as a bad API key or endpoint issue
        return f"An API Error Occurred: {e}"


In [9]:
while True:
    user_input = input("You: ")
    if user_input.lower() in ["exit", "quit"]:
        print("Bot: Goodbye!")
        break

    answer = rag_chatbot(user_input)
    print("Bot:", answer)

KeyboardInterrupt: Interrupted by user