In [6]:
import os
from openai import OpenAI
import requests
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Document, ServiceContext
# from llama_index.llms.openai import OpenAI

# Ensure the llama_index package is installed
# %pip install llama_index

# ---------------------------
# 1. Setup API Keys & Config
# ---------------------------

# Set your OpenAI key
api_key = "OPEN_AI KEY"
client = OpenAI(api_key=api_key)

# If you have a Brave Search API key, place it here. 
# For demonstration, we won't actually call Brave, but we'll illustrate how you might do it.
BRAVE_API_KEY = "YOUR_BRAVE_API_KEY"


# ----------------------------------------
# 3. Optional: External Search (Brave) Demo
# ----------------------------------------

def brave_search(query: str, api_key: str = BRAVE_API_KEY) -> str:
    """
    Placeholder function to demonstrate how an external search might be integrated.
    If you have a real Brave Search API Key and endpoint, you'd do something like:
    
    response = requests.get(
        BRAVE_SEARCH_URL, 
        params={"q": query, "key": api_key}
    )
    # Then parse the JSON response
    # For now, let's return a dummy string to simulate the response
    """
    # Example of a real call might be:
    # response_data = response.json()
    # top_snippets = [item["snippet"] for item in response_data["organic_results"]]
    # combined_text = " ".join(top_snippets)
    # return combined_text
    
    return (
        "According to Brave Search, React and Vue are among the top front-end frameworks in 2025. "
        "Many developers also mention SvelteKit as an emerging technology..."
    )

# ---------------------------------------------------------
# 4. Orchestration: Combine RAG + External Tool + LLM Call
# ---------------------------------------------------------
def get_sources(user_goal: str) -> str:
    system_prompt = """
        You are an AI assistant that generates open source and free access sources. 
        The user has a specific goal. You have some local resources and possibly external data. 
        Combine them to produce a list of open source sources that will allow the users to learn about the user goal.
        """
    
    context_for_model = f"""

    USER GOAL: {user_goal}
    """
    
    user_prompt = """
    Please produce around 10 sources that are open source, free, and not books
    Combine them to produce a list of sources for someone to find information
    """
    
    # Step C: Call OpenAI completion
    print("calling openai...")
    print("system prompt: ", system_prompt)
    print("context for model: ", context_for_model)
    print("user prompt: ", user_prompt)
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": context_for_model + user_prompt},
        ],
        # temperature=0.7
        stream=True
    )

    #print(response.choices[0].message.content)
    for chunk in response:
        if chunk.choices[0].delta.content is not None:
            print(chunk.choices[0].delta.content, end="")

    return response
        
    

def generate_roadmap(user_goal: str, sources: list) -> str:
    """
    Main function to orchestrate retrieving local docs, optionally calling external search,
    and finally calling OpenAI to create a structured 'learning roadmap.'
    """

    # Step B: Compose final prompt to the LLM
    system_prompt = """
    You are an AI assistant that generates a structured learning roadmap. 
    The user has a specific goal. You have some local resources and possibly external data. 
    Combine them to produce a week-by-week (or step-by-step) plan with recommendations.
    """
    
    # The 'context' to provide to the model: 
    #   1) Summaries from local knowledge base 
    #   2) Potential external results
    #   3) The user goal
    # We keep it relatively short in this demo to avoid token issues.
    context_for_model = f"""

    USER GOAL: {user_goal}
    """
    
    user_prompt = """
    Please produce a structured learning roadmap with milestones, resources, and any additional tips
    based on the context provided. 
    Format it as a week-by-week guide (or step-by-step) with recommended resources.
    """
    
    # Step C: Call OpenAI completion
    print("calling openai...")
    print("system prompt: ", system_prompt)
    print("context for model: ", context_for_model)
    print("user prompt: ", user_prompt)
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": context_for_model + user_prompt},
        ],
        # temperature=0.7
        stream=True
    )

    #print(response.choices[0].message.content)
    for chunk in response:
        if chunk.choices[0].delta.content is not None:
            print(chunk.choices[0].delta.content, end="")
        
    # Extract the text
    # roadmap_text = response.choices[0].message.content
    # return roadmap_text

# ----------------------------------------
# 5. Demonstrate the System in Action
# ----------------------------------------

# Let's simulate a user asking for a front-end roadmap in 2025:
user_input = "I want to learn how currency markets work"

print('generating roadmap...')
generate_roadmap(user_input)
# print("=== GENERATED LEARNING ROADMAP ===\n")
# print(roadmap)

generating roadmap...
calling openai...
system prompt:  
    You are an AI assistant that generates a structured learning roadmap. 
    The user has a specific goal. You have some local resources and possibly external data. 
    Combine them to produce a week-by-week (or step-by-step) plan with recommendations.
    
context for model:  

    USER GOAL: I want to learn how currency markets work
    
user prompt:  
    Please produce a structured learning roadmap with milestones, resources, and any additional tips
    based on the context provided. 
    Format it as a week-by-week guide (or step-by-step) with recommended resources.
    
### Learning Roadmap: Understanding Currency Markets

---

#### Overview
This roadmap is designed to help you learn the fundamental concepts, mechanics, and strategies involved in currency markets over eight weeks. Each week will involve focused learning objectives, key milestones, resources, and additional tips.

---

### Week 1: Introduction to Currency

In [119]:
from openai import OpenAI
import re
import requests

KEY = OPENAIKEY

client = OpenAI(api_key=KEY)

user_input = "I want to learn about distillation and how it might affect AI market space"
background = "Financial Analyist"

In [124]:
def get_sources(user_goal: str, background: str) -> str:
    system_prompt = """
        You are an AI assistant that generates open source and free access sources. 
        The user has a specific goal. You have some local resources and possibly external data. 
        Combine them to produce a list of open source sources that will allow the users to learn about the user goal.
        Format the response strictly in the following Markdown structure:

        **Title of Source**  
        - **Source:** Name of Source
        - **Link:** URL (if available)
        """
    
    context_for_model = f"""
    USER GOAL: {user_goal}
    USER BACKGROUND: {background}
    """
    
    user_prompt = """
    Please produce around 10 sources that are open source, free, and not books.
    Combine them to produce a list of sources for someone to find information.
    Only return papers if the user has the background to understand them.
    """
    
    print("calling openai...")
    print("system prompt: ", system_prompt)
    print("context for model: ", context_for_model)
    print("user prompt: ", user_prompt)
    
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": context_for_model + user_prompt},
        ],
        temperature=0.7,
        stream=True
    )
    
    text_response = ""
    for chunk in response:
        if chunk.choices[0].delta.content is not None:
            text_response += str(chunk.choices[0].delta.content)
            print(chunk.choices[0].delta.content, end="")
    
    pattern = r"\*\*(.*?)\*\*.*?\n\s*-\s*\*\*Source:\*\*\s*(.*?)\s*\n"
    matches = re.findall(pattern, text_response)
    
    formatted_results = [f"{title.strip()} - {source.strip()}" for title, source in matches]
    
    final_result = "\n".join(formatted_results)
    print(final_result)
    
    return formatted_results

In [125]:
source_list = get_sources(user_input,background)

calling openai...
system prompt:  
        You are an AI assistant that generates open source and free access sources. 
        The user has a specific goal. You have some local resources and possibly external data. 
        Combine them to produce a list of open source sources that will allow the users to learn about the user goal.
        Format the response strictly in the following Markdown structure:

        **Title of Source**  
        - **Source:** Name of Source
        - **Link:** URL (if available)
        
context for model:  
    USER GOAL: I want to learn about distillation and how it might affect AI market space
    USER BACKGROUND: Financial Analyist
    
user prompt:  
    Please produce around 10 sources that are open source, free, and not books.
    Combine them to produce a list of sources for someone to find information.
    Only return papers if the user has the background to understand them.
    
**Understanding Distillation in Machine Learning**  
- **Source:**

In [126]:
source_list

['Understanding Distillation in Machine Learning - Distill.pub',
 'Knowledge Distillation: A Survey - arXiv',
 'The Future of AI: Distilling Knowledge from Large Models - Google AI Blog',
 'Model Compression and Efficiency: An Overview - Towards Data Science',
 'Distillation: A Key to the AI Revolution - Medium',
 'The Role of Distillation in AI Market Trends - MIT Technology Review',
 'Understanding Distillation: Implications for AI Development - OpenAI Blog',
 'What is Knowledge Distillation? - Analytics Vidhya',
 'Impact of Model Distillation on AI Efficiency - Towards AI',
 'A Comparative Study of Knowledge Distillation Techniques - ResearchGate']

In [211]:
import threading
import queue


def update_markdown_with_url(query, url):
    """Searches for the query text in roadmap.md and appends the fetched URL inline."""
    with open("roadmap.md", "r") as file:
        lines = file.readlines()
    
    with open("roadmap.md", "w") as file:
        for line in lines:
            if query in line:
                line = line.strip() + f" ({url})\n"
            file.write(line)

In [174]:

# 

# def get_url_sync(query):
#     """Calls an external search API synchronously to fetch a relevant URL and saves it to a file."""
#     url = f"https://api.search.brave.com/res/v1/web/search?q={query}&count=3&summary=true"
#     headers = {
#         "Accept": "application/json",
#         "X-Subscription-Token": API_KEY
#     }
    
#     response = requests.get(url, headers=headers)
#     time.sleep(0.2)
#     if response.status_code == 200:
#         data = response.json()
#         results = data.get("web", {}).get("results", [])
#         final_url = results[0]["url"] if results else "No URL found"
#         print(f"Fetched URL: {final_url}\n")
        
#         with open("geturls.txt", mode="a") as f:
#             f.write(f"{final_url}\n")
#         update_markdown_with_url(query, final_url)
#         return final_url
#     else:
#         error_message = f"Error fetching URL: {response.status_code} {response.text}"
#         print(error_message)
        
#         with open("geturls.txt", mode="a") as f:
#             f.write(f"{error_message}\n")
        
#         return "Error fetching URL"

In [212]:
import time
import requests
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from openai import OpenAI

API_KEY = BRAVE_API_KEY

def embed_text(text):
    """Generate embeddings for a given text using OpenAI's API."""
    client = OpenAI(api_key=KEY)
    response = client.embeddings.create(
        model="text-embedding-ada-002",
        input=text
    )
    return np.array(response.data[0].embedding)

def get_url_sync(query):
    """Calls an external search API synchronously, embeds the top 3 results, and returns the closest one to the query."""
    url = f"https://api.search.brave.com/res/v1/web/search?q={query}&count=3&summary=true"
    headers = {
        "Accept": "application/json",
        "X-Subscription-Token": API_KEY
    }
    
    response = requests.get(url, headers=headers)
    time.sleep(0.2)
    if response.status_code == 200:
        data = response.json()
        results = data.get("web", {}).get("results", [])
        
        if not results:
            return "No URL found"
        
        query_embedding = embed_text(query)
        result_texts = [result["title"] + " " + result["description"] for result in results]
        result_urls = [result["url"] for result in results]
        
        # Embed each result
        result_embeddings = np.array([embed_text(text) for text in result_texts])
        
        # Compute cosine similarity
        similarities = cosine_similarity([query_embedding], result_embeddings)[0]
        best_index = np.argmax(similarities)
        best_url = result_urls[best_index]
        
        print(f"Best Matching URL: {best_url}\n")
        
        with open("geturls.txt", mode="a") as f:
            f.write(f"{query}:     {best_url}\n")
        update_markdown_with_url(query, best_url)
        
        return best_url
    else:
        error_message = f"Error fetching URL: {response.status_code} {response.text}"
        print(error_message)
        
        with open("geturls.txt", mode="a") as f:
            f.write(f"{error_message}\n")
        
        return "Error fetching URL"


In [213]:

def url_fetch_worker(url_queue, result_dict):
    """Worker thread to fetch URLs while allowing roadmap streaming to continue."""
    while True:
        query = url_queue.get()
        if query is None:
            break  # Stop thread gracefully
        result_dict[query] = get_url_sync(query)
        url_queue.task_done()

In [214]:

async def generate_roadmap(user_goal: str, background: str, sources: list) -> str:
    """Generates a structured learning roadmap while fetching URLs in parallel threads and saves it to a markdown file."""
    system_prompt = """
    You are an AI assistant that generates a structured learning roadmap. 
    The user has a specific goal. You have some local resources and possibly external data. 
    Combine them to produce a step-by-step plan with recommendations.
    Format each step with:
    - **Objective:** The learning goal for this step.
    - **Resource:** A recommended article or video (from the provided sources).
    - **Tip:** An additional useful tip for better understanding.
    """
    
    context_for_model = f"""
    USER GOAL: {user_goal}
    """
    
    user_prompt = f"""
    Please produce a structured learning roadmap with objectives, resources, and additional tips
    based on the context provided. Use the sources in this list {sources} as base sources.
    Each step should have a clear objective, a resource (website or video), and a tip.
    Make sure the roadmap is useful for someone with a {background} background and within their capabilities.
    """

    print("Calling OpenAI...")
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": context_for_model + user_prompt},
        ],
        temperature=0.7,
        stream=True
    )

    return_text = ""
    para_text = ""
    md_text = ""
    url_queue = queue.Queue()
    result_dict = {}
    worker_thread = threading.Thread(target=url_fetch_worker, args=(url_queue, result_dict))
    worker_thread.start()

    #with open("roadmap.md", mode="w") as md_file:
    counter = 1
    for chunk in response:
        counter += 1
        if chunk.choices[0].delta.content is not None:
            step_text = chunk.choices[0].delta.content
            return_text += step_text
            para_text += step_text
            md_text += step_text
            print(step_text, end="")
                #md_file.write(step_text)

            if counter % 20 == 0:
                with open("roadmap.md", mode="a") as md_file_append:
                    md_file_append.write(md_text)
                md_text = ""
                
                # Extract resource dynamically
            match = re.search(r"\*\*Resource:\*\* \[(.*?)\]\((.*?)\)", para_text)
            if match:
                para_text = ""  # Reset collected text for the next step
                resource_name = match.group(1)
                print(f"\nFetching URL for: {resource_name}")
                url_queue.put(resource_name)  # Add query to queue for async processing
    
    url_queue.put(None)  # Stop signal for worker thread
    worker_thread.join()  # Ensure all URLs are fetched before returning

    return return_text


In [215]:
import asyncio

if __name__ == "__main__":

    roadmap = asyncio.run(generate_roadmap(user_input, background, source_list))
    print("\nGenerated Roadmap:\n", roadmap)

Calling OpenAI...
### Learning Roadmap: Understanding Distillation and Its Impact on the AI Market

---

#### **Step 1: Introduction to Distillation in Machine Learning**
- **Objective:** Understand the basic concept of distillation and its relevance in machine learning.
- **Resource:** [What is Knowledge Distillation? - Analytics Vidhya](https://www.analyticsvidhya.com/blog/2020/11/what-is-knowledge-distillation-in-machine-learning/)

Fetching URL for: What is Knowledge Distillation? - Analytics Vidhya
- **Tip:** Take notes on key terms and definitions as they will be crucial for understanding more advanced concepts later.

---

#### **Step 2: Deep Dive into Knowledge Distillation Techniques**
- **Objective:** Explore the different techniques and methodologies used in knowledge distillation.
- **Resource:** [Knowledge Distillation: A Survey - arXiv](https://arxiv.org/abs/2003.09103)

Fetching URL for: Knowledge Distillation: A Survey - arXiv
- **Tip:** Focus on the diagrams and exampl