In [None]:
# !pip install langchain faiss-cpu sentence-transformers groq

Collecting faiss-cpu
  Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.4 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cusolver-cu12==11.6.1.9 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB

In [None]:
# !pip install -U langchain-community

Collecting langchain-community
  Downloading langchain_community-0.3.21-py3-none-any.whl.metadata (2.4 kB)
Collecting langchain-core<1.0.0,>=0.3.51 (from langchain-community)
  Downloading langchain_core-0.3.52-py3-none-any.whl.metadata (5.9 kB)
Collecting langchain<1.0.0,>=0.3.23 (from langchain-community)
  Downloading langchain-0.3.23-py3-none-any.whl.metadata (7.8 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.8.1-py3-none-any.whl.metadata (3.5 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting langchain-text-splitters<1.0.0,>=0.3.8 (from langchain<1.0.0,>=0.3.23->langchain-community)
  Downloading langchain_text_splitters-0.3.8-py3-none-any.whl.metadata (1.9 kB)
Collecting python-dotenv>=0.21.0 (from pydantic-settings<3.0.0,>=2.4.0->langchain-community)
  Downloading python_dotenv-1.1.0-py3-none-any.whl.metadata (24 kB)
Downloading

In [1]:
import os
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from sentence_transformers import CrossEncoder
from groq import Groq

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# === PATH TO FOLDER ===
folder_path = "Final_scraped_txts"  

# === LOAD TEXT FILES ===
def load_documents_from_folder(folder_path):
    documents = []
    for file_name in os.listdir(folder_path):
        if file_name.endswith(".txt"):
            file_path = os.path.join(folder_path, file_name)
            loader = TextLoader(file_path, encoding='utf-8')
            documents.extend(loader.load())
    return documents


In [3]:
# === SPLIT INTO CHUNKS ===
def split_documents(docs, chunk_size=1000, chunk_overlap=100):
    splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    return splitter.split_documents(docs)


In [4]:
# === EMBEDDINGS SETUP ===
def get_embedding_model():
    return HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


In [5]:
# === SAVE TO FAISS ===
def save_to_faiss(split_docs, embeddings, index_path="faiss_index_luddy"):
    db = FAISS.from_documents(split_docs, embeddings)
    db.save_local(index_path)
    print(f"✅ FAISS index saved to '{index_path}'")


In [6]:
docs = load_documents_from_folder(folder_path)
print(f"📄 Loaded {len(docs)} documents.")

📄 Loaded 633 documents.


In [14]:
for doc in docs:
    web_url = ""
    web_url = doc.page_content[:400].split('\n')[0][14:]
    doc.metadata['webURL'] = web_url
    

In [17]:
split_docs = split_documents(docs)
print(f"Split into {len(split_docs)} chunks.")

Split into 4716 chunks.


In [18]:
split_docs[:10]

[Document(metadata={'source': 'Final_scraped_txts\\aaai.indiana.edu_.txt', 'webURL': 'https://aaai.indiana.edu/'}, page_content='[Source URL]: https://aaai.indiana.edu/'),
 Document(metadata={'source': 'Final_scraped_txts\\aaai.indiana.edu_.txt', 'webURL': 'https://aaai.indiana.edu/'}, page_content='Upcoming Events\nApr\n16\nAAAI 50th Exhibit\nApr\n17\nAAAI 50th Exhibit\nApr\n18\nAAAI 50th Exhibit\nApr\n19\nAAAI 50th Exhibit\nApr\n20\nAAAI 50th Exhibit\nApr\n21\nAAAI 50th Exhibit\nApr\n22\nAAAI 50th Exhibit\nApr\n23\nAAAI 50th Exhibit\nApr\n24\nAAAI 50th Exhibit\nApr\n25\nAAAI 50th Exhibit\nApr\n26\nAAAI 50th Exhibit\nApr\n26\nIU Soul Revue Spring Concert\nApr\n27\nAAAI 50th Exhibit\nApr\n28\nAAAI 50th Exhibit\nApr\n29\nAAAI 50th Exhibit\nApr\n30\nAAAI 50th Exhibit\nMay\n01\nAAAI 50th Exhibit\nMay\n02\nAAAI 50th Exhibit\nMay\n03\nAAAI 50th Exhibit\nMay\n03\nAfrican American Choral Ensemble Spring Concert\nMay\n04\nAAAI 50th Exhibit\nMay\n05\nAAAI 50th Exhibit\nMay\n06\nAAAI 50th Exhibi

In [19]:
embeddings = get_embedding_model()
save_to_faiss(split_docs, embeddings)

  return HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


✅ FAISS index saved to 'faiss_index_luddy'


In [20]:
# === Initializing re-ranker, embedding models and FAISS ===
reranker_model = CrossEncoder("cross-encoder/ms-marco-MiniLM-L6-v2")
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
db = FAISS.load_local("faiss_index_luddy", embedding_model, allow_dangerous_deserialization = 'True')

In [21]:
# === Function to perform hybrid search ===
def hybrid_search(query, top_k=20):
    semantic_results = db.similarity_search_with_score(query, k=top_k * 2)
    keyword_filtered = [(doc, score) for doc, score in semantic_results if query.lower() in doc.page_content.lower()]
    if len(keyword_filtered) >= top_k:
        return keyword_filtered[:top_k]
    else:
        extra_needed = top_k - len(keyword_filtered)
        additional = [item for item in semantic_results if item not in keyword_filtered]
        return keyword_filtered + additional[:extra_needed]

In [38]:
# === Final answer generation function ===
def generate_llama_answer(query, top_k=5, groq_api_key=None):
    # Step 1: Retrieve top documents
    hybrid_results = hybrid_search(query, top_k=20)
    pairs = [(query, doc.page_content) for doc, _ in hybrid_results]

    # Step 2: Rerank using cross-encoder
    scores = reranker_model.predict(pairs)
    reranked = list(zip([doc for doc, _ in hybrid_results], scores))
    reranked.sort(key=lambda x: x[1], reverse=True)
    top_docs = reranked[:top_k]
    # print(top_docs[:1])

    # Step 3: Create context and prompt
    # context = "\n\n".join([doc.page_content for doc, _ in top_docs])
    context = "\n\n".join([f"Source: {doc.metadata.get('webURL', 'N/A')} \n{doc.page_content}"   for doc, _ in top_docs])
    # print("LLama context: ----> ", context)
    prompt = f"""Answer the following question in a clear and structured format based on the provided context. Please also provide with the necessary website links. Don't mention "context" or "based on context" in the response.
            Context:{context}
            Question: {query}
            Answer:"""

    # Step 4: Generate answer using Groq's LLaMA model
    client = Groq(api_key=groq_api_key)
    completion = client.chat.completions.create(
        model="llama3-70b-8192",
        messages=[{"role": "user", "content": prompt}]
    )

    return completion.choices[0].message.content.strip()

In [40]:
# Mine - Groq API Key
response = generate_llama_answer(
    # query="What is the process to get academic advising at Luddy? Give me any links, pls",
    query="What is the process to get academic advising at Luddy?",
    groq_api_key="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"
)

print("Response :\n", response)

Response :
 **Getting Academic Advising at Luddy**

To get academic advising at Luddy, follow these steps:

**For Current Luddy Students:**

1. **Visit the Luddy Student Portal**: Find the advising or tutoring help you need through the portal ([Luddy Student Portal](https://luddy.indiana.edu/academics/advising.html)).
2. **Schedule an Appointment**: Meet with your academic advisor online or in-person to discuss your goals, degree requirements, and career opportunities.

**For Undergraduates Exploring Luddy Degrees:**

1. **Schedule an Appointment with Maria Clor**: Use the Student Appointment Scheduler (SAS) and search for "Maria Clor" to schedule an appointment.
2. **Indicate Intended Major and Notes**: Please indicate your intended major in the appointment reason field and any notes that you would like to discuss.

**For Graduates:**

1. **Contact Your Department**: You’ll have one or more advisors in your department. Learn more from your department about academic advising.

**For Lu

Response :

 **Getting Academic Advising at Luddy**

**Current Luddy Students**

1. Visit the Luddy Student Portal to find the advising or tutoring help you need.

**Undergraduates**

1. Schedule an appointment with Maria Clor using the Student Appointment Scheduler (SAS) and searching for "Maria Clor."
2. Indicate your intended major in the appointment reason field and any notes you would like to discuss.

**Graduates**

1. You'll have one or more advisors in your department. Learn more from your department.
2. Contact gradvise@iu.edu (in-person students) or siceooe@iu.edu (online students).

**Luddy Undergraduate Minors and Certificates**

1. Schedule an appointment with Anna Bednarski, Associate Director and Student Services Generalist.

**General Inquiries**

1. Contact Luddy undergraduate advising for any questions about Luddy or a specific program.
2. Talk to an advisor or a recruiter for more information.

Response :

 The process to get academic advising at Luddy is as follows:

**For Undergraduates:**

1. Schedule an appointment with Maria Clor using the Student Appointment Scheduler (SAS) and searching for "Maria Clor".
2. Indicate your intended major in the appointment reason field and any notes that you would like to discuss.

**For Graduates:**

1. Reach out to your department to learn more about your advisor(s).
2. Contact gradvise@iu.edu for in-person students or siceooe@iu.edu for online students.

**For Luddy Undergraduate Minors and Certificates:**

1. Schedule an appointment with Anna Bednarski, Associate Director and Student Services Generalist.

**Additional Resources:**

* Visit the Luddy Student Portal to find the advising or tutoring help you need.
* Contact undergraduate advising for any questions about Luddy or a specific program.
* Talk to an advisor or recruiter for further guidance.

# Response with links:

 Here is the process to get academic advising at Luddy:

**Step 1: Identify Your Needs**
Determine what you need help with, such as understanding degree requirements, choosing classes, or exploring career opportunities.

**Step 2: Access the Luddy Student Portal**
Visit the [Luddy Student Portal](https://luddy.indiana.edu/academics/advising.html) to find the advising or tutoring help you need.

**Step 3: Meet with an Advisor**
* **Undergraduates**: Schedule an appointment with Maria Clor using the Student Appointment Scheduler (SAS) and searching for "Maria Clor." Indicate your intended major in the appointment reason field and any notes you'd like to discuss.
* **Graduates**: Contact your department to learn more about your advisors and schedule a meeting.
* **Non-Luddy Students**: If you're majoring in a non-Luddy degree and are earning an undergraduate Luddy minor or certificate, schedule an appointment with Anna Bednarski, Associate Director and Student Services Generalist.

**Step 4: Prepare for Your Meeting**
Come prepared to discuss your goals, questions, and concerns with your advisor.

**Additional Resources**

* For career coaching and other services, visit [Luddy Career Services](https://luddy.indiana.edu/academics/advising.html#nonsice).
* For Luddy undergraduate minors and certificates, contact Anna Bednarski.
* For general advising inquiries, contact [undergraduate advising](https://informatics.indiana.edu/programs/bs-informatics/index.html).

Remember, your Luddy advisor is an expert on Luddy, IU, and you, so don't hesitate to reach out for guidance throughout your academic journey!

Response :
 **Getting Academic Advising at Luddy**

To get academic advising at Luddy, follow these steps:

**For Current Luddy Students:**

1. **Visit the Luddy Student Portal**: Find the advising or tutoring help you need through the portal ([Luddy Student Portal](https://luddy.indiana.edu/academics/advising.html)).
2. **Schedule an Appointment**: Meet with your academic advisor online or in-person to discuss your goals, degree requirements, and career opportunities.

**For Undergraduates Exploring Luddy Degrees:**

1. **Schedule an Appointment with Maria Clor**: Use the Student Appointment Scheduler (SAS) and search for "Maria Clor" to schedule an appointment.
2. **Indicate Intended Major and Notes**: Please indicate your intended major in the appointment reason field and any notes that you would like to discuss.

**For Graduates:**

1. **Contact Your Department**: You’ll have one or more advisors in your department. Learn more from your department about academic advising.

**For Luddy Undergraduate Minors and Certificates:**

1. **Schedule an Appointment with Anna Bednarski**: Contact Anna Bednarski, Associate Director and Student Services Generalist, to discuss Luddy undergraduate minors and certificates.

**General Inquiries:**

1. **Contact Luddy Advising**: Reach out to the Luddy School’s advising staff with any questions about Luddy or a specific program ([Contact Undergraduate Advising](https://informatics.indiana.edu/programs/bs-informatics/index.html)).
2. **Talk to an Advisor or Recruiter**: Discuss your academic and career goals with a Luddy advisor or recruiter.

Remember to visit the Luddy website ([Luddy School of Informatics, Computing, and Engineering](https://luddy.indiana.edu/)) for more information on academic advising and other resources.