In [None]:
import requests
from bs4 import BeautifulSoup
import os
import json
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
from langchain.document_loaders import PyPDFLoader, CSVLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_groq import ChatGroq

def get_data_from_website(url):
    # Get response from the server
    response = requests.get(url)
    if response.status_code == 500:
        print("Server error")
        return
    
    # Parse the HTML content using BeautifulSoup
    soup = BeautifulSoup(response.content, 'html.parser')
    
    # Step 1: Find all tab titles
    tab_titles = soup.find_all("div", class_="elementor-tab-title")

    # Step 2: Find all corresponding tab contents
    tab_data = {}

    for title_div in tab_titles:
        tab_id = title_div.get("data-tab")
        tab_title = title_div.get_text(strip=True)

        matching_content = soup.find("div", class_="elementor-tab-content", attrs={"data-tab": tab_id})
        tab_content = matching_content.get_text(separator="\n", strip=True) if matching_content else ""

        tab_data[tab_title] = tab_content

    # ✅ Create 'data' folder if it doesn't exist
    os.makedirs("data", exist_ok=True)

    # ✅ Save as JSON file
    with open("data/tab_data.json", "w", encoding="utf-8") as f:
        json.dump(tab_data, f, ensure_ascii=False, indent=2)

    print("Data saved to data/tab_data.json")


In [13]:
get_data_from_website("https://www.dinecollege.edu/academics/academic-policies/")

Data saved to data/tab_data.json


In [15]:
# Load JSON data
with open('data/tab_data.json', 'r', encoding='utf-8') as f:
    tab_data = json.load(f)

# Combine tab title and content into a document
documents = [f"{key}: {value}" for key, value in tab_data.items()]
metadata = list(tab_data.keys())

# Load a pre-trained embedding model from Hugging Face
model = SentenceTransformer('all-MiniLM-L6-v2')

In [16]:
# Generate embeddings
embeddings = model.encode(documents, convert_to_numpy=True, show_progress_bar=True)

# Create FAISS index
embedding_dim = embeddings.shape[1]
index = faiss.IndexFlatL2(embedding_dim)  # Using L2 similarity
index.add(embeddings)

# Save the index
faiss.write_index(index, 'data/faiss_index.idx')

Batches: 100%|██████████| 1/1 [00:00<00:00,  8.37it/s]


In [17]:
# Save the metadata for reverse lookup
with open('data/faiss_metadata.json', 'w', encoding='utf-8') as f:
    json.dump(metadata, f, ensure_ascii=False, indent=2)

print("✅ FAISS index and metadata saved.")

✅ FAISS index and metadata saved.


In [18]:
# Load the saved FAISS index and metadata
index = faiss.read_index('data/faiss_index.idx')
with open('data/faiss_metadata.json', 'r', encoding='utf-8') as f:
    metadata = json.load(f)

# Also load the tab_data to fetch full content for results
with open('data/tab_data.json', 'r', encoding='utf-8') as f:
    tab_data = json.load(f)

def search_query(user_query, top_k=2):
    # Convert query to embedding
    query_vector = model.encode([user_query], convert_to_numpy=True)

    # Perform similarity search
    distances, indices = index.search(query_vector, top_k)

    # Print top-k matches
    print(f"\n🔍 Top {top_k} Results for: '{user_query}'\n")
    for i, idx in enumerate(indices[0]):
        title = metadata[idx]
        content = tab_data[title]
        score = distances[0][i]
        print(f"{i+1}. Title: {title}")
        print(f"   Score: {score:.4f}")
        print(f"   Content:\n{content[:300]}{'...' if len(content) > 300 else ''}\n")

# Example
search_query("Want to know about the academic appeals.")


🔍 Top 2 Results for: 'Want to know about the academic appeals.'

1. Title: Grades
   Score: 0.9125
   Content:
General Grade Appeal
Grades are determined solely by the individual faculty who taught the course for the session(s) or the semester(s). A student who wishes to contest a grade must first attempt to resolve the matter with the course faculty.
If the matter cannot be resolved with the instructor, the...

2. Title: Academics
   Score: 1.0904
   Content:
Academic Appeals
Students placed on academic probation or suspension may appeal to the Academic Standards Committee by filing an appeal form with the Office of the Registrar. The student has the right to appeal any action affecting their academic status by obtaining the appropriate form from the Off...



In [None]:
llm = ChatGroq(
        model="Llama3-8b-8192",
        temperature=0,
        max_tokens=4192,
        timeout=30,
        max_retries=2,
    )

In [23]:
def generate_answer(user_query, retrieved_titles, tab_data):
    # Combine relevant tab content
    retrieved_docs = "\n\n".join([f"{title}: {tab_data[title]}" for title in retrieved_titles if title in tab_data])

    # Construct the prompt
    prompt = f"""
    Based on the following information, answer the question.

    Information:
    {retrieved_docs}

    Question: {user_query}

    Answer in a clear, helpful, and concise manner.
    """

    # Generate response using ChatGroq (LLaMA3)
    response = llm.invoke(prompt)
    return response


In [26]:
def search_and_generate(user_query, top_k=3):
    query_vector = model.encode([user_query], convert_to_numpy=True)
    distances, indices = index.search(query_vector, top_k)

    retrieved_titles = [metadata[idx] for idx in indices[0]]

    # Generate the answer using LLaMA (ChatGroq)
    answer = generate_answer(user_query, retrieved_titles, tab_data)

    print(f"\n🧠 Answer:\n{answer.content}")


In [27]:
search_and_generate("Want to know about the academic appeals.")


🧠 Answer:
According to the provided information, the academic appeals process at Diné College is as follows:

* If a student wishes to contest a grade, they must first attempt to resolve the matter with the course faculty.
* If the matter cannot be resolved with the instructor, the student may appeal to the appropriate Dean of School. The student must provide evidence as to why the grade posted by the faculty is an error.
* If the matter is not resolved with the Dean of School, the student may appeal a final time to the Academic Standards Committee.
* The decision of the Academic Standards Committee is final.

Additionally, students placed on academic probation or suspension may appeal to the Academic Standards Committee by filing an appeal form with the Office of the Registrar.

It's also important to note that students have the right to appeal any action affecting their academic status by obtaining the appropriate form from the Office of the Registrar: Appeal of Suspension, Appeal o