In [1]:
# Imports
from pinecone import Pinecone
from dotenv import load_dotenv
import os

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Initialize a Pinecone client with your API key
load_dotenv()
pineAPI = os.getenv("PINECONE_API")
pc = Pinecone(api_key=pineAPI)

In [4]:
# Create a dense index with integrated embedding
index_name = "dense-index"
if not pc.has_index(index_name):
    pc.create_index_for_model(
        name=index_name,
        cloud="aws",
        region="us-east-1",
        embed={
            "model":"llama-text-embed-v2",
            "field_map":{"text": "chunk_text"}
        }
    )

In [21]:
# Variables for parsing
num_docs = 0
labs  = ["lab0", "lab1", "lab2", "lab3", "lab4", "midterm", ]
records = []


In [30]:
# parse documents
import os
import re
from typing import Dict, List, Optional

def parse_lab_file(file_path):
    with open(file_path, 'r', encoding='utf-8') as f:
        content = f.read()

    question_match = re.search(r'Question:\s*(.*?)(?=Student Answer:|$)', content, re.DOTALL)
    student_answer_match = re.search(r'Student Answer:\s*(.*?)(?=Instructor Answer:|$)', content, re.DOTALL)
    instructor_answer_match = re.search(r'Instructor Answer:\s*(.*?)(?=Followup Discussions:|$)', content, re.DOTALL)
    followup_match = re.search(r'Followup Discussions:\s*(.*?)$', content, re.DOTALL)
    
    # Extract the content for each section, strip whitespace
    question = question_match.group(1).strip() if question_match else ""
    student_answer = student_answer_match.group(1).strip() if student_answer_match else ""
    instructor_answer = instructor_answer_match.group(1).strip() if instructor_answer_match else ""
    followups = followup_match.group(1).strip() if followup_match else ""
    
    # Create structured document
    return {
        "file_name": os.path.basename(file_path),
        "question": question,
        "student_answer": student_answer,
        "instructor_answer": instructor_answer,
        "followups": followups
    }

def process_lab_files(folder_path):
    results = []
    for filename in os.listdir(folder_path):
        if filename.endswith('.txt'):
            file_path = os.path.join(folder_path, filename)
            parsed_data = parse_lab_file(file_path)
            results.append(parsed_data)
    return results

def format_for_rag(parsed_data, category):
    global num_docs
    rag_documents = []
    
    for i, item in enumerate(parsed_data):
        doc_id = f"rec{num_docs}"
        num_docs += 1
        
        # Format text the same way as before
        formatted_text = f"""Question:{item['question']}
        Student Answer:{item['student_answer']}
        Instructor Answer:{item['instructor_answer']}
        Followup Discussions:{item['followups']}""".strip()
        
        # Create record dictionary
        record = {
            "_id": doc_id,
            "chunk_text": formatted_text,
            "category": category
        }
        
        rag_documents.append(record)
    
    return rag_documents

for lab in labs:
    cur_parsed = process_lab_files(lab)
    cur_raw_docs = format_for_rag(cur_parsed, lab)
    records.extend(cur_raw_docs)

print("Proccesed ", num_docs, "documents")


Proccesed  900 documents


In [31]:
batch_size = 96
dense_index = pc.Index(index_name)
for i in range(0, len(records), batch_size):
    batch = records[i:i+batch_size]
    dense_index.upsert_records("example-namespace", batch)
    print(f"Upserted batch {i//batch_size + 1} ({len(batch)} records)")

print(f"Total records upserted: {len(records)}")

Upserted batch 1 (96 records)


PineconeApiException: (429)
Reason: Too Many Requests
HTTP response headers: HTTPHeaderDict({'Date': 'Fri, 25 Apr 2025 19:56:56 GMT', 'Content-Type': 'text/plain; charset=utf-8', 'Content-Length': '313', 'Connection': 'keep-alive', 'x-envoy-upstream-service-time': '1586', 'server': 'envoy'})
HTTP response body: {"error":{"code":"RESOURCE_EXHAUSTED","message":"{\"error\":{\"code\":\"RESOURCE_EXHAUSTED\",\"message\":\"Request failed. You've reached the max tokens 5000000 for your project for model llama-text-embed-v2 this month. To use the Inference API further please upgrade your plan.\"},\"status\":429}"},"status":429}


In [32]:
stats = dense_index.describe_index_stats()
print(stats)

{'dimension': 1024,
 'index_fullness': 0.0,
 'metric': 'cosine',
 'namespaces': {'example-namespace': {'vector_count': 505}},
 'total_vector_count': 505,
 'vector_type': 'dense'}


In [44]:
from pinecone.exceptions import PineconeApiException 
def ask_query(query, debug):
    try:
        start_time = time.time()
        results = dense_index.search(
            namespace="example-namespace",
            query={
                "top_k": 10,
                "inputs": {
                    'text': query
                }
            }
        )
        
        try:
            reranked_results = dense_index.search(
                namespace="example-namespace",
                query={
                    "top_k": 10,
                    "inputs": {
                        'text': query
                    }
                },
                rerank={
                    "model": "bge-reranker-v2-m3",
                    "top_n": 10,
                    "rank_fields": ["chunk_text"]
                }   
            )
            search_results = reranked_results
        except PineconeApiException as e:
            # If reranking fails due to token limits, fall back to regular search results
            if debug:
                print(f"Reranking failed: Using standard search results instead.")
            search_results = results
            
        context_docs = []
        for hit in search_results['result']['hits']:
            context_docs.append(f"Category: {hit['fields']['category']}\n{hit['fields']['chunk_text']}")
        end_time = time.time()

        print(f"Time taken: {end_time - start_time:.4f} seconds")
        return context_docs
    except PineconeApiException as e:
        print(f"Search failed: {e}")
        return []


#example: ask_query("For lab 0, It says that we have to make a temporary file, does this imply we should delete the file before exiting helloworld.c?lab0")


In [34]:
import openai
from openai import OpenAI
import os

In [35]:
load_dotenv()
openai_api_key = os.getenv("OPENAI_API_KEY") 
client = OpenAI(api_key=openai_api_key)

In [None]:
def generate_response(query, context_docs):
    start_time = time.time()
    # Prepare context from retrieved documents
    context = "\n\n".join([f"Document {i+1}:\n{doc}" for i, doc in enumerate(context_docs)])
    
    # Create prompt with context and query
    prompt = f""" You are an AI assistant for a computer science course. Use the following retrieved documents to answer the question. If you don't know the answer, just say that you don't know, don't try to make up an answer. Context:
    {context}
    Question: {query}
    Answer:
    """
    # Call OpenAI API
    response = client.chat.completions.create(
        model="gpt-3.5-turbo",  
        messages=[
            {"role": "system", "content": "You are a helpful assistant for a computer science course."},
            {"role": "user", "content": prompt}
        ],
        temperature=0.3,
        max_tokens=500
    )
    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Time taken: {elapsed_time:.4f} seconds")
    return response.choices[0].message.content

def intake_question(query):
    context = ask_query(query, False)
    return generate_response(query, context)





In [40]:
import time

# Sample run
query = input("Enter question:")
print("question", query)

start_time = time.time()
response = intake_question(query)
end_time = time.time()

print(response)
elapsed_time = end_time - start_time
print(f"Latency: {elapsed_time:.4f} seconds")
# query = input("Enter question:")

question what topics should i focus on to study for exam 2?
Based on the retrieved documents, the topics that you should focus on to study for exam 2 include:
- Isolation and protection
- Kernel organization
- Virtual Memory Management (VMM)
- Interrupts, concurrency, and scheduling
- User and kernel threading
- Operating Systems (OS) and networking
- Security
- File systems
- Atomicity and ordering
- Distributed systems

It is important to review lecture recordings, actively listen, take notes, and solve examples presented in the lectures. The exam will be conceptual in nature, prompting you to consider OS design alternatives and argue for or against them. The best way to prepare is to understand the mechanisms and tradeoff spaces introduced in class without needing to look them up during the exam.

Please refer to the lecture recordings and course materials for detailed explanations and examples related to these topics.
Latency: 3.1046 seconds


In [43]:
import time

# Sample run
query = input("Enter question:")
print("question", query)

start_time = time.time()
response = intake_question(query)
end_time = time.time()

print(response)
elapsed_time = end_time - start_time
print(f"Latency: {elapsed_time:.4f} seconds")

question in lab2, when should i call lab2_report_pagefault?
Search failed: (429)
Reason: Too Many Requests
HTTP response headers: HTTPHeaderDict({'Date': 'Fri, 25 Apr 2025 20:02:36 GMT', 'Content-Type': 'text/plain; charset=utf-8', 'Content-Length': '313', 'Connection': 'keep-alive', 'x-envoy-upstream-service-time': '1917', 'server': 'envoy'})
HTTP response body: {"error":{"code":"RESOURCE_EXHAUSTED","message":"{\"error\":{\"code\":\"RESOURCE_EXHAUSTED\",\"message\":\"Request failed. You've reached the max tokens 5000000 for your project for model llama-text-embed-v2 this month. To use the Inference API further please upgrade your plan.\"},\"status\":429}"},"status":429}

You should call `lab2_report_pagefault` whenever a page fault occurs during the execution of your code in lab2. This function is typically used to report and handle page faults in operating system or memory management related tasks.
Latency: 2.9279 seconds


In [8]:
# CLEAN UP: USE TO DELETE INDEX
pc.delete_index(index_name)