In [None]:
!pip install -q google-generativeai
import google.generativeai as genai
import json
import os
import re
import time
from datetime import datetime, timedelta
from concurrent.futures import ThreadPoolExecutor
from typing import List, Dict, Callable,Optional
from google.colab import userdata
from typing import List, Dict, Optional, Any,Tuple

try:
    API_KEY = userdata.get('GOOGLE_API_KEY')
    genai.configure(api_key=API_KEY)
    print("Google Generative AI Configured Successfully.")
except userdata.SecretNotFoundError:
    print("ERROR: Secret 'GEMINI_API_KEY' not found.")
    print("Please add your Gemini API Key to Colab Secrets.")
    # Optionally, raise an error or exit if the key is critical
    API_KEY = None # Set API_KEY to None to indicate failure
except Exception as e:
    print(f"An error occurred during genai configuration: {e}")
    API_KEY = None

DEFAULT_MODEL_NAME= "gemini-2.0-flash-thinking-exp-1219"


DEFAULT_SAFETY_SETTINGS = [
    {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
    {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
    {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
    {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
]

# --- Rate Limiter Class ---
class RateLimiter:
    """Handles rate limiting for API calls"""
    def __init__(self, max_calls=8, time_period=60):
        self.max_calls = max_calls
        self.time_period = time_period
        self.call_timestamps = []
        self.total_calls = 0
        print(f"Rate Limiter Initialized: Max {self.max_calls} calls per {self.time_period} seconds.")

    def wait_if_needed(self):
        """Wait if we're approaching rate limits"""
        if not API_KEY:
             raise ValueError("Cannot make API calls: Gemini API Key not configured.")

        current_time = datetime.now()


        self.call_timestamps = [ts for ts in self.call_timestamps
                               if current_time - ts < timedelta(seconds=self.time_period)]

        # If we've reached max calls within the time period, wait
        if len(self.call_timestamps) >= self.max_calls:
            oldest_call = min(self.call_timestamps)
            wait_time = (oldest_call + timedelta(seconds=self.time_period) - current_time).total_seconds()

            if wait_time > 0:
                print(f"\n[Rate Limiter]: Limit reached ({len(self.call_timestamps)}/{self.max_calls} calls in last {self.time_period}s). Waiting {wait_time:.1f} seconds...")
                time.sleep(wait_time + 0.5) # Add a small buffer

            # Update timestamp list after waiting
            current_time = datetime.now()
            self.call_timestamps = [ts for ts in self.call_timestamps
                                   if current_time - ts < timedelta(seconds=self.time_period)]

        # Record this call
        self.call_timestamps.append(datetime.now())
        self.total_calls += 1

        return self.total_calls

# --- Instantiate the Rate Limiter (global for this script/cell)
limiter = RateLimiter()


def llm_call(
    prompt: str,
    chat_history: Optional[List[Dict[str, Any]]] = None,
    system_prompt: str = "",
    model: Optional[str] = None,
    temperature: float = 0.1,
    safety_settings: Optional[List[Dict]] = None
) -> str:

    global limiter # Use the global limiter instance
    call_number = limiter.wait_if_needed() # Wait if necessary and get call number


    model_to_use = model if model else DEFAULT_MODEL_NAME

    effective_safety_settings = safety_settings if safety_settings else DEFAULT_SAFETY_SETTINGS

    print(f"\n--- LLM Call #{call_number} ---")
    print(f"Model: {model_to_use}")


    try:

        model_instance = genai.GenerativeModel(
            model_name=model_to_use,
            system_instruction=system_prompt if system_prompt else None,
            safety_settings=effective_safety_settings
        )

        # Configure generation
        generation_config = genai.types.GenerationConfig(
            temperature=temperature
            # max_output_tokens=4096 # You can set max tokens if needed
        )

        # --- MODIFICATION START: Prepare input for generate_content ---
        generation_input: Any # Type hint for clarity
        if chat_history:

            if not isinstance(chat_history, list) or not all(isinstance(item, dict) and 'role' in item and 'parts' in item for item in chat_history):
                 # Raise an error or log a warning, returning an error message
                 print(f"ERROR (Call #{call_number}): Invalid chat_history format. Expected List[Dict[str, Any]] with 'role' and 'parts'.")
                 return "[ERROR: Invalid chat_history format]"


            generation_input = chat_history + [{"role": "user", "parts": [prompt]}]

            print(f"Using chat history with {len(chat_history)} previous turns.")
        else:
            # Single turn prompt (original behavior)
            generation_input = prompt
            # print(f"User Prompt: {prompt[:100]}...") # Optional: Print snippet
            print("Using single prompt (no chat history).")
        # --- MODIFICATION END ---

        # Call the API with either the prompt string or the list of content dicts
        response = model_instance.generate_content(
            generation_input, # Pass the prepared input here
            generation_config=generation_config
        )

        print(f"--- Response Received (Call #{call_number}) ---")
        # --- Token Usage Snippet (Unchanged) ---
        try:
            # Check if usage_metadata exists and is not None
            if hasattr(response, 'usage_metadata') and response.usage_metadata:
                prompt_tokens = response.usage_metadata.prompt_token_count
                completion_tokens = response.usage_metadata.candidates_token_count
                total_tokens = response.usage_metadata.total_token_count
                print(f"Token Usage (Call #{call_number}): Prompt={prompt_tokens}, Completion={completion_tokens}, Total={total_tokens}")
            else:
                print(f"Token Usage (Call #{call_number}): Not available in response.")
        except AttributeError:
             # This might catch cases where usage_metadata attribute doesn't exist at all
             print(f"Token Usage (Call #{call_number}): 'usage_metadata' attribute not found in response object.")
        # --- END OF SNIPPET ---

        # --- Response Handling (Improved Robustness) ---
        response_text = ""
        block_reason = None
        finish_reason = None

        # 1. Check for explicit blocking via prompt_feedback
        if hasattr(response, 'prompt_feedback') and response.prompt_feedback.block_reason:
             block_reason = response.prompt_feedback.block_reason.name # Get enum name
             print(f"WARN (Call #{call_number}): Call blocked by API based on prompt. Reason: {block_reason}")

             return f"[BLOCKED DUE TO PROMPT: {block_reason}]"

        # 2. Check candidate content and finish reason
        if hasattr(response, 'candidates') and response.candidates:
            candidate = response.candidates[0] # Usually only one candidate for non-streaming

            # Get finish reason
            if hasattr(candidate, 'finish_reason'):
                 finish_reason = candidate.finish_reason.name # Get enum name

            # Check if generation stopped due to safety or other reasons
            if finish_reason not in ['STOP', 'UNSPECIFIED', None]: # STOP is normal completion
                print(f"WARN (Call #{call_number}): Generation stopped prematurely. Reason: {finish_reason}")
                # Check safety ratings on the candidate if available
                if hasattr(candidate, 'safety_ratings') and candidate.safety_ratings:
                    safety_ratings_str = ", ".join([f"{r.category.name}: {r.probability.name}" for r in candidate.safety_ratings])
                    print(f"Candidate Safety Ratings: {safety_ratings_str}")
                    # Decide if this specific finish reason constitutes a block message
                    if finish_reason == 'SAFETY':
                         return f"[BLOCKED CONTENT DUE TO: {finish_reason}. Ratings: {safety_ratings_str}]"
                    else:
                         return f"[GENERATION STOPPED: {finish_reason}. Ratings: {safety_ratings_str}]"
                else:
                     return f"[GENERATION STOPPED: {finish_reason}]" # Return stop reason even without ratings

            # 3. Try to extract text content if available
            if hasattr(candidate, 'content') and hasattr(candidate.content, 'parts') and candidate.content.parts:
                 # Assuming the first part contains the text response
                 response_text = "".join(part.text for part in candidate.content.parts if hasattr(part, 'text'))
                 # Fallback via response.text if parts extraction failed but .text exists
                 if not response_text and hasattr(response, 'text'):
                      response_text = response.text

        # Fallback: If candidate/parts structure isn't as expected, try the top-level response.text
        if not response_text and hasattr(response, 'text'):
            response_text = response.text

        # 4. Final check and return
        if response_text:
            return response_text
        else:
             # If we got here, no content was found and no explicit block/stop reason was returned above
             print(f"WARN (Call #{call_number}): Received no text content from API, and no explicit block/stop reason identified.")
             # print("Full response object:", response) # Uncomment for debugging
             return "[EMPTY RESPONSE]"
        # --- END OF Response Handling ---

    except Exception as e:
        print(f"ERROR (Call #{call_number}) during Gemini API call: {e}")
        import traceback
        traceback.print_exc() # Print full traceback for better debugging
        return f"[ERROR: {type(e).__name__} - {e}]"


print(f"llm_call function defined. Default model: {DEFAULT_MODEL_NAME}")
def extract_xml(text: str, tag: str) -> str:
    """
    Extracts the content of the specified XML tag from the given text.
    Made case-insensitive and strips whitespace.

    Args:
        text (str): The text containing the XML.
        tag (str): The XML tag to extract content from.

    Returns:
        str: The content of the specified XML tag, or an empty string if not found.
    """
    # Use re.IGNORECASE for case-insensitivity and re.DOTALL to match across newlines
    match = re.search(f'<{tag}>(.*?)</{tag}>', text, re.DOTALL | re.IGNORECASE)
    # Use .strip() to remove leading/trailing whitespace from the extracted content
    return match.group(1).strip() if match else ""

Google Generative AI Configured Successfully.
Rate Limiter Initialized: Max 8 calls per 60 seconds.
llm_call function defined. Default model: gemini-2.0-flash-thinking-exp-1219


In [None]:
import google.generativeai as genai
from typing import List, Dict, Optional, Any # Assuming these are already imported


def list_available_models() -> List[Dict[str, Any]]:
    """
    Lists available Gemini models that support 'generateContent'.

    Returns:
        List[Dict[str, Any]]: A list of dictionaries, where each dictionary
                               contains information about a model.
                               Returns an empty list if an error occurs or API key is not set.
    """
    if not API_KEY: # Check if API key was successfully configured
        print("ERROR: Cannot list models. Google API Key not configured.")
        return []

    print("\n--- Listing Available Gemini Models ---")
    models_info = []
    try:
        for m in genai.list_models():
            # Models that support 'generateContent' are typically the ones you can use with your llm_call
            if 'generateContent' in m.supported_generation_methods:
                model_details = {
                    "name": m.name,
                    "display_name": m.display_name,
                    "description": m.description,
                    "version": m.version,
                    "input_token_limit": m.input_token_limit if hasattr(m, 'input_token_limit') else "N/A",
                    "output_token_limit": m.output_token_limit if hasattr(m, 'output_token_limit') else "N/A",
                    "supported_generation_methods": m.supported_generation_methods,
                }
                models_info.append(model_details)

        if models_info:
            print(f"Found {len(models_info)} models supporting 'generateContent':")
            for i, model_data in enumerate(models_info):
                print(f"  {i+1}. Name: {model_data['name']}")
                print(f"     Display Name: {model_data['display_name']}")
                # print(f"     Description: {model_data['description'][:100]}...") # Optional: print snippet
                print(f"     Input Tokens: {model_data['input_token_limit']}, Output Tokens: {model_data['output_token_limit']}")
        else:
            print("No models found supporting 'generateContent' or an issue occurred.")

    except Exception as e:
        print(f"ERROR: Could not list models. Reason: {e}")
        import traceback
        traceback.print_exc()
        return [] # Return empty list on error

    return models_info

# --- Example of how to call this function ---
if __name__ == "__main__":
    # Ensure API key is configured before calling
    if API_KEY: # Make sure API_KEY is set from your setup
        available_models = list_available_models()
        # if available_models:
        #     print("\n--- Full Model Details (Python List) ---")
        #     for model_data in available_models:
        #         print(model_data)
    else:
        print("Please ensure your GOOGLE_API_KEY is set up in Colab Secrets or your environment.")


--- Listing Available Gemini Models ---
Found 45 models supporting 'generateContent':
  1. Name: models/gemini-1.0-pro-vision-latest
     Display Name: Gemini 1.0 Pro Vision
     Input Tokens: 12288, Output Tokens: 4096
  2. Name: models/gemini-pro-vision
     Display Name: Gemini 1.0 Pro Vision
     Input Tokens: 12288, Output Tokens: 4096
  3. Name: models/gemini-1.5-pro-latest
     Display Name: Gemini 1.5 Pro Latest
     Input Tokens: 2000000, Output Tokens: 8192
  4. Name: models/gemini-1.5-pro-001
     Display Name: Gemini 1.5 Pro 001
     Input Tokens: 2000000, Output Tokens: 8192
  5. Name: models/gemini-1.5-pro-002
     Display Name: Gemini 1.5 Pro 002
     Input Tokens: 2000000, Output Tokens: 8192
  6. Name: models/gemini-1.5-pro
     Display Name: Gemini 1.5 Pro
     Input Tokens: 2000000, Output Tokens: 8192
  7. Name: models/gemini-1.5-flash-latest
     Display Name: Gemini 1.5 Flash Latest
     Input Tokens: 1000000, Output Tokens: 8192
  8. Name: models/gemini-1.5-flas

## Uploaing Resources

In [None]:
from google.colab import drive
drive.mount('/content/drive')
!pip install PyPDF2
!pip install python-docx
import PyPDF2
import docx
import csv
import os
import io
import sys
import time # Import the time module


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:

try:
    import PyPDF2
except ImportError:
    print("WARNING: PyPDF2 library not found. PDF extraction will be disabled.")
    print("         Install it using: pip install PyPDF2")
    PyPDF2 = None

try:
    import docx
except ImportError:
    print("WARNING: python-docx library not found. DOCX extraction will be disabled.")
    print("         Install it using: pip install python-docx")
    docx = None

# --- Helper Functions (keep as they are) ---

def _extract_text_from_pdf(file_content: bytes) -> Optional[str]:
    # (Your existing PDF extraction code - no changes needed here)
    if not PyPDF2: return None
    try:
        pdf_file = io.BytesIO(file_content); reader = PyPDF2.PdfReader(pdf_file); text = ""
        if reader.is_encrypted:
            try:
                if reader.decrypt('') == PyPDF2.PasswordType.NOT_DECRYPTED: return None
            except Exception: return None
        for page in reader.pages:
            try:
                page_text = page.extract_text();
                if page_text: text += page_text + "\n"
            except Exception: continue
        return text.strip() if text else None
    except PyPDF2.errors.PdfReadError: return None
    except Exception: return None


def _extract_text_from_docx(file_content: bytes) -> Optional[str]:
    # (Your existing DOCX extraction code - no changes needed here)
    if not docx: return None
    try:
        doc_file = io.BytesIO(file_content); document = docx.Document(doc_file)
        text = "\n".join([para.text for para in document.paragraphs if para.text])
        return text.strip() if text else None
    except Exception: return None


def _read_text_from_txt(file_content: bytes, filename_for_log: str) -> Optional[str]:
    # (Your existing TXT reading code - no changes needed here)
    try: text = file_content.decode('utf-8'); return text
    except UnicodeDecodeError:
        try: text = file_content.decode('latin-1'); return text
        except Exception: return None
    except Exception: return None


# --- Main Processing Function ---

def extract_and_copy_text_files(input_dir: str, output_dir: str) -> List[str]:
    if not os.path.isdir(input_dir):
        print(f"Error: Input directory not found or is not a directory: '{input_dir}'")
        return []

    os.makedirs(output_dir, exist_ok=True)
    print(f"Output directory: '{os.path.abspath(output_dir)}'")

    successfully_processed_files = []
    processed_count = 0
    skipped_count = 0

    print(f"\nScanning directory: '{os.path.abspath(input_dir)}'...")

    files_to_process = []
    for root, _, files in os.walk(input_dir):
        for filename in files:
             files_to_process.append(os.path.join(root, filename))

    total_files = len(files_to_process)
    print(f"Found {total_files} potential files to process.")

    # --- Process each file ---
    for i, input_file_path in enumerate(files_to_process):
        filename = os.path.basename(input_file_path)
        base_name, file_extension = os.path.splitext(filename)
        file_extension = file_extension.lower()

        supported_extensions = [".pdf", ".docx", ".txt"]
        if file_extension not in supported_extensions:
            continue # Skip unsupported types early

        # Use relative path for potentially deeply nested files for cleaner logs
        try:
            relative_path = os.path.relpath(input_file_path, input_dir)
        except ValueError:
            relative_path = input_file_path
        print(f"\nProcessing file {i+1}/{total_files}: {relative_path}")


        if file_extension == ".pdf" and not PyPDF2:
            print("    - Skipping PDF: PyPDF2 library not available.")
            skipped_count += 1
            continue
        if file_extension == ".docx" and not docx:
            print("    - Skipping DOCX: python-docx library not available.")
            skipped_count += 1
            continue

        try:
            with open(input_file_path, 'rb') as f:
                file_content = f.read()
        except IOError as e:
            print(f"    - Error reading file: {e}")
            skipped_count += 1
            continue
        except Exception as e:
             print(f"    - Unexpected error reading file: {e}")
             skipped_count += 1
             continue

        full_text = None
        if file_extension == ".pdf":
            full_text = _extract_text_from_pdf(file_content)
        elif file_extension == ".docx":
            full_text = _extract_text_from_docx(file_content)
        elif file_extension == ".txt":
            full_text = _read_text_from_txt(file_content, filename)

        if full_text is not None:
            output_filename = base_name + ".txt"
            output_file_path = os.path.join(output_dir, output_filename)

            try:
                print(f"    Attempting to write to: {output_filename}") # Add debug print
                with open(output_file_path, 'w', encoding='utf-8') as f_out:
                    f_out.write(full_text.strip())
                    # --- Add these lines to force flush ---
                    f_out.flush() # Flush Python's internal buffer
                    os.fsync(f_out.fileno()) # Ask OS to sync to disk (Drive mount)
                    # ----------------------------------------
                action = "Extracted and saved" if file_extension != ".txt" else "Processed"
                print(f"    -> {action} to: {output_filename} (Write operation completed)")
                successfully_processed_files.append(os.path.abspath(output_file_path))
                processed_count += 1
                # --- Optional: Add a small delay after each file ---
                # time.sleep(0.5) # Pause for 0.5 seconds
                # -------------------------------------------------

            except IOError as e:
                print(f"    - Error writing output file '{output_filename}': {e}")
                skipped_count += 1
            except Exception as e:
                print(f"    - Unexpected error writing output file '{output_filename}': {e}")
                skipped_count += 1
        else:
            print(f"    - Text extraction or reading failed for: {filename}")
            skipped_count += 1

    print(f"\n--- Processing Complete ---")
    print(f"Successfully processed files reported by script: {processed_count}")
    print(f"Skipped/Failed files:       {skipped_count}")
    print(f"---------------------------")
    print("INFO: Waiting a few seconds for potential Drive sync...")
    time.sleep(5) # Add a final delay to allow background sync
    print("INFO: Wait finished.")

    return successfully_processed_files

# --- Example Usage (Keep as is) ---
if __name__ == "__main__":
    INPUT_DIRECTORY = r"/content/drive/MyDrive/Research Paper - Learning Agent"
    OUTPUT_DIRECTORY = r"/content/drive/MyDrive/txt_files"

    if not os.path.isdir(INPUT_DIRECTORY):
        print(f"Error: Input directory not found: '{INPUT_DIRECTORY}'")
        sys.exit(1)

    created_files = extract_and_copy_text_files(INPUT_DIRECTORY, OUTPUT_DIRECTORY)

    if created_files:
        print("\nList of created/processed TXT files reported by script:")
        # Check existence on disk *after* the final delay
        print("Verifying file existence in output directory:")
        actual_files_found = 0
        for f_path in created_files:
             exists = os.path.exists(f_path)
             print(f"- {os.path.relpath(f_path, OUTPUT_DIRECTORY)} (Exists: {exists})")
             if exists:
                 actual_files_found += 1
        print(f"\nVerification complete: {actual_files_found} files found in output directory.")

    else:
        print("\nNo TXT files were created or processed according to script logs.")


Output directory: '/content/drive/MyDrive/txt_files'

Scanning directory: '/content/drive/MyDrive/Research Paper - Learning Agent'...
Found 1 potential files to process.

Processing file 1/1: 2104.08691v2 (1).pdf
    Attempting to write to: 2104.08691v2 (1).txt
    -> Extracted and saved to: 2104.08691v2 (1).txt (Write operation completed)

--- Processing Complete ---
Successfully processed files reported by script: 1
Skipped/Failed files:       0
---------------------------
INFO: Waiting a few seconds for potential Drive sync...
INFO: Wait finished.

List of created/processed TXT files reported by script:
Verifying file existence in output directory:
- 2104.08691v2 (1).txt (Exists: True)

Verification complete: 1 files found in output directory.


In [None]:
def extract_text_from_txt_file(file_path: str) -> Optional[str]:

    try:
        # Ensure the file exists before trying to open it
        if not os.path.exists(file_path):
            print(f"ERROR: File not found at path: {file_path}")
            return None

        # Open the file in read mode ('r') with UTF-8 encoding (common and robust)
        with open(file_path, 'r', encoding='utf-8') as f:
            content = f.read()
        return content
    except FileNotFoundError: # This is redundant if os.path.exists is used, but good practice
        print(f"ERROR: File not found: {file_path}")
        return None
    except Exception as e:
        print(f"ERROR: Could not read file {file_path}. Reason: {e}")
        return None



In [None]:
resource_text = (extract_text_from_txt_file(created_files[0]))

In [None]:
len(resource_text)

63856

In [None]:
created_files

['/content/drive/MyDrive/txt_files/2104.08691v2 (1).txt']

In [None]:

book_name = (created_files[0])

In [None]:
created_files

['/content/drive/MyDrive/txt_files/2104.08691v2 (1).txt']

## Syllabus Generation

In [None]:
syllabus_generator_system_prompt = """
You are an AI Syllabus Creator. Your input is a conversation history detailing a user's request for a learning plan. Your task is to analyze this entire conversation and generate or modify a syllabus based on the discussion.

**Core Instructions:**

1.  **Analyze Conversation History:**
    *   Carefully read the entire conversation history provided as context.
    *   Identify the primary learning topic, the user's stated experience level (e.g., beginner, intermediate, advanced in the subject or prerequisites), their explicit learning goals, preferred learning style (if mentioned), and any constraints.
    *   If a previous syllabus version (enclosed in `<syllabus>` tags) exists in the history, note it and any subsequent user feedback regarding it.

2.  **Mode of Operation:**
    *   **Modification:** If a recent syllabus version and specific user feedback on it are present in the conversation history, your primary goal is to modify that syllabus according to the feedback. Ensure changes are targeted and address the user's requests.
    *   **Creation:** If no prior syllabus exists or if the request is for a new one, create a syllabus from scratch based on the synthesized information from the conversation (topic, experience, goals).

3.  **Syllabus Structure and Content Design:**
    *   **Logical Phasing:** Organize the syllabus into 2 to 5 distinct learning phases. Each phase should represent a major stage in the learning progression.
    *   **Lessons per Phase:** Within each phase, define 2 to 4 specific lessons or topics.
    *   **Progressive Order:** Ensure that phases and lessons are arranged in a logical, progressive order, building complexity and knowledge incrementally.
    *   **Detailed Lesson Information:** For each lesson, provide the following details:
        *   `Topic`: A clear, concise title for the lesson.
        *   `Keywords`: A list of 3-5 key terms or concepts central to the lesson.
        *   `Objective`: A brief statement (1-2 sentences) describing what the learner should be able to do or understand after completing the lesson.
        *   `Focus`: A short description (1-2 sentences) of the main emphasis or key takeaways for that lesson.

4.  **Output Format:**
    *   Enclose the *entire* final syllabus structure within `<syllabus>` and `</syllabus>` XML tags.
    *   **Output ONLY the syllabus structure within these tags.** Do not include any other conversational text, explanations, or apologies before or after the `<syllabus>` block.

5.  **Adherence and Quality:**
    *   Strictly follow all analysis and formatting instructions.
    *   Base the syllabus *entirely* on the information gleaned from the conversation history. Do not introduce external topics or assumptions not supported by the dialogue.
    *   Ensure the syllabus is coherent, practical, and tailored to the user's expressed needs.
"""

In [None]:
SYLLABUS_PROMPT_WITH_RAW_TEXT_COLLECTION = """
You are an AI Syllabus Creator. Your main task is to generate or modify a syllabus, incorporating insights directly from user-provided raw text learning resources. For your information the
resources won't be provided to other agents
**Crucial Context for Your Analysis:**
It is vital for you to understand that the raw text learning resources provided below are **exclusive to you** for this syllabus creation process. These resources **will not be shared with, nor will they have been pre-processed or summarized by, any other AI agents** in the system. Therefore, your thorough, independent, and comprehensive analysis of the full content of these raw texts is paramount for successfully creating a relevant and well-structured syllabus.


**You will analyze the following information:**

1.  **Conversation History:**
    *   This will be provided to you as a separate conversational context. It contains the ongoing dialogue with the user.
    *   From this history, carefully extract: the primary learning topic, the user's stated experience level, their explicit learning goals, preferred learning style (if mentioned), any constraints, and any prior syllabus versions or feedback.

2.  **User-Provided Raw Text Resources (JSON Object):**
    *   The raw text content of user-provided resources is given directly below, within the placeholder. It's a JSON object.
    *   The JSON object will have a `collection_type` field, which will be "raw_text_collection".
    *   It will also have a `resources_data` field, which is a list. **Each item in this list is an object representing a single resource and will contain:**
        *   `identifier`: A name for the resource (e.g., filename).
        *   `content`: The **full raw textual content** of that resource.
    *   If `resources_data` is empty (though `collection_type` would likely be "none" in that case from the calling script), proceed based on conversation history alone.

Resource Information (JSON Object containing raw texts):
{resource_information_placeholder}

**Core Instructions for Syllabus Generation/Modification using Raw Text Resources:**

3.  **Analyze Raw Text Resources (from the placeholder above):**
    *   For each resource object in the `resources_data` list, thoroughly analyze its raw `content`.
    *   Identify key topics, main concepts, definitions, examples, explanations, and the overall structure or flow of information within each raw text.
    *   Determine which parts of each resource are most relevant to the user's learning goals and current focus as identified from the Conversation History.

4.  **Mode of Operation:**
    *   **Modification:** If a recent syllabus and user feedback exist in the Conversation History, modify that syllabus. Directly integrate relevant information and concepts extracted from the provided raw text resources to address the feedback.
    *   **Creation:** Otherwise, create a new syllabus. Base it on the user's needs from the Conversation History, using the content of the raw text resources to structure topics, define lessons, and provide depth.

5.  **Syllabus Structure and Content Design:**
    *   **Direct Integration of Raw Text Insights:**
        *   When designing phases and lessons, draw directly from the topics and information present in the provided raw text resources. For example, if a resource details three key steps for a process, that could become a lesson or part of one.
        *   Ensure the syllabus remains primarily aligned with the user's explicit goals and experience level stated in the Conversation History. Use the raw text to *fulfill* these goals, not to deviate significantly unless the text offers a clearly superior path that still meets the core objectives.
    *   **Logical Phasing:** Organize into 2-5 distinct learning phases.
    *   **Lessons per Phase:** 2-4 specific lessons/topics per phase.
    *   **Progressive Order:** Ensure logical, incremental progression.
    *   **Detailed Lesson Information:** For each lesson:
        *   `Topic`: Clear title (potentially inspired by resource headings or sections).
        *   `Keywords`: 3-5 central terms (many might come directly from the raw text).
        *   `Objective`: What the learner will understand/do (informed by what the raw text explains).
        *   `Focus`: Main emphasis/takeaways (highlighting key points from the raw text).

6.  **Output Format:**
    *   Enclose the *entire* final syllabus structure within `<syllabus>` and `</syllabus>` XML tags.
    *   **Output ONLY the syllabus structure within these tags.** No other text.

7.  **Adherence and Quality:**
    *   Strictly follow instructions.
    *   The syllabus should be a practical learning plan based on the Conversation History, directly leveraging the content of the provided raw text resources.
    *   Produce a coherent and tailored syllabus.
"""

In [None]:
syllabus_generator_system_prompt_heavy_resources = """
You are an AI Syllabus Creator. Your main task is to generate or modify a syllabus, potentially incorporating insights from user-provided learning resources.
**Crucial Context for Your Analysis:**
It is vital for you to understand that the raw text learning resources provided below are **exclusive to you** for this syllabus creation process. These resources **will not be shared with, nor will they have been pre-processed or summarized by, any other AI agents** in the system. Therefore, your thorough, independent, and comprehensive analysis of the full content of these raw texts is paramount for successfully creating a relevant and well-structured syllabus.


**You will analyze the following information:**

1.  **Conversation History:**
    *   This will be provided to you as a separate conversational context. It contains the ongoing dialogue with the user.
    *   From this history, carefully extract: the primary learning topic, the user's stated experience level, their explicit learning goals, any constraints, and any prior syllabus versions or feedback.

2.  **User-Provided Resource Information (JSON Object):**
    *   This information is provided directly below, within the placeholder. It's a JSON object describing any resources the user has supplied.
    *   The JSON object will have a `collection_type` field (e.g., "raw_text_collection", "json_summary_collection", "mixed_resource_collection", or "none").
    *   It will also have a `resources_data` field, which is a list. Each item in this list is an object describing a single resource and will have its own `type` field:
        *   If `type` is "raw_content": The object will contain an "identifier" (e.g., filename) and "content" (the raw text of that resource).
        *   If `type` is "json_summary": The object will be a structured summary (e.g., with "resource_identifier", "primary_topics_relevant_to_conversation", "core_concepts_relevant_to_conversation", etc.).
    *   If `collection_type` is "none" or `resources_data` is empty, proceed based on conversation history alone.

Resource Information (JSON Object):
{resource_information_placeholder}

**Core Instructions for Syllabus Generation/Modification:**

3.  **Analyze Resource Information (if provided in the placeholder):**
    *   For each item in the `resources_data` list:
        *   If it's "raw_content", analyze the raw text to identify key topics, concepts, structure, and any parts particularly relevant to the user's goals from the Conversation History.
        *   If it's "json_summary", analyze the structured summary, paying attention to the topics, concepts, and contextual notes already extracted.
    *   Synthesize insights from ALL provided resources.

4.  **Mode of Operation:**
    *   **Modification:** If a recent syllabus and user feedback exist in the Conversation History, modify that syllabus. Integrate feedback and relevant insights from any provided Resource Information.
    *   **Creation:** Otherwise, create a new syllabus. Base it on the user's needs from the Conversation History, enriched and structured by insights from any provided Resource Information.

5.  **Syllabus Structure and Content Design:**
    *   **Integration:**
        *   If Resource Information is provided and relevant: Intelligently weave the topics, concepts, and structural ideas from the resources (raw text or summaries) into the syllabus. The syllabus should reflect the valuable content of the resources.
        *   Ensure the syllabus remains primarily aligned with the user's explicit goals and experience level stated in the Conversation History. If resources suggest a different direction, prioritize the user's conversational requests unless the resources clearly offer a better path that still meets the core goals.
    *   **Logical Phasing:** Organize into 2-5 distinct learning phases.
    *   **Lessons per Phase:** 2-4 specific lessons/topics per phase.
    *   **Progressive Order:** Ensure logical, incremental progression.
    *   **Detailed Lesson Information:** For each lesson:
        *   `Topic`: Clear title.
        *   `Keywords`: 3-5 central terms.
        *   `Objective`: What the learner will understand/do.
        *   `Focus`: Main emphasis/takeaways.

6.  **Output Format:**
    *   Enclose the *entire* final syllabus structure within `<syllabus>` and `</syllabus>` XML tags.
    *   **Output ONLY the syllabus structure within these tags.** No other text.

7.  **Adherence and Quality:**
    *   Strictly follow instructions.
    *   If Resource Information is used, ensure the syllabus reflects its content appropriately while prioritizing user requests. If no relevant resources are provided, base the syllabus entirely on the Conversation History.
    *   Produce a coherent, practical, and tailored syllabus.
"""

In [None]:
# This prompt needs to guide the LLM to summarize the resource *in the context of the conversation*
dynamic_resource_summarizer_prompt_template_v1 = """
You are an AI Resource Analyzer. Your task is to process the single provided 'Learning Material Excerpt'.
Your primary goal is to extract key information that is MOST RELEVANT to the ongoing 'Conversation History' provided below.
Pay special attention to any sections that appear to be a Table of Contents, chapter overview, or introduction, as these are highly valuable for understanding the resource's structure for syllabus planning.
The summary should help in creating a structured learning syllabus that directly addresses the user's current focus and needs as expressed in the conversation.

Output your analysis as a SINGLE JSON object containing the following keys:

*   "resource_identifier": (A string. Use the provided identifier: "{resource_identifier_placeholder}")
*   "primary_topics_relevant_to_conversation": (A list of strings. Identify the main topics or themes in this excerpt that directly relate to or support the topics being discussed in the 'Conversation History'.)
*   "core_concepts_relevant_to_conversation": (A list of strings. Extract key concepts, definitions, or fundamental ideas from this excerpt that are pertinent to the 'Conversation History'.)
*   "structure_or_progression_notes": (A string. Briefly describe the overall structure or flow of information in this excerpt, and note if this structure aligns well with the progression of the 'Conversation History'.)
*   "keywords_highlighted_by_conversation": (A list of strings. List important terminology from this excerpt, giving priority to keywords that have also appeared or are implied in the 'Conversation History'.)
*   "inferred_learning_objectives_for_current_focus": (A list of strings. Based on the 'Conversation History', what learning objectives from this excerpt would be most immediately beneficial or relevant to the user?)
*   "contextual_notes_for_syllabus": (A string. Provide specific observations on how this resource excerpt (or parts of it) could be directly used to address specific points, questions, or learning goals raised in the 'Conversation History'. Note any parts of the excerpt that seem less relevant to the current discussion.)

Ensure the output is ONLY the valid JSON object.

Provided Resource Identifier: {resource_identifier_placeholder}


Learning Material Excerpt to Analyze:
{learning_material_content_placeholder}
"""
def summarize_single_resource_dynamically(
    resource_content: str,
    resource_identifier: str,
    conversation_history: List[Dict[str, Any]],
    max_length: int = 100000 # Assuming your 100k char limit per resource
) -> Optional[Dict[str, Any]]:
    """
    Summarizes a single piece of resource content using an LLM,
    making the summary relevant to the provided conversation history.
    """
    if not resource_content.strip():
        print(f"Skipping empty resource: {resource_identifier}")
        return None

    truncated_content = resource_content[:max_length]
    if len(resource_content) > max_length:
        print(f"INFO: Resource '{resource_identifier}' was truncated to {max_length} characters for dynamic summarization.")


    prompt_for_summarizer = dynamic_resource_summarizer_prompt_template_v1.format(
        resource_identifier_placeholder=resource_identifier,
        learning_material_content_placeholder=truncated_content
    )


    summary_json_str = llm_call(
        prompt=prompt_for_summarizer,
        chat_history=conversation_history,
        # system_prompt=None, # The main instruction is in the prompt itself
        # model="gemini-2.5-pro-preview-04-17",
        temperature=0.4
    )
    cleaned_json_str = summary_json_str.strip()
    if cleaned_json_str.startswith("```json"):
        cleaned_json_str = cleaned_json_str[len("```json"):]
    elif cleaned_json_str.startswith("```"): # More generic ``` opening
        cleaned_json_str = cleaned_json_str[len("```"):]

    if cleaned_json_str.endswith("```"):
        cleaned_json_str = cleaned_json_str[:-len("```")]

    cleaned_json_str = cleaned_json_str.strip()

    try:
        summary_data = json.loads(cleaned_json_str)
        # Basic validation (can be more thorough based on your JSON keys)
        if isinstance(summary_data, dict) and "resource_identifier" in summary_data:
            return summary_data
        else:
            print(f"WARN: Dynamic summarizer for '{resource_identifier}' produced non-standard JSON. Output: {summary_json_str[:200]}...")
            # Fallback if needed
            return {"resource_identifier": resource_identifier, "raw_summary_text": summary_json_str, "is_fallback": True, "error": "Non-standard JSON structure"}
    except json.JSONDecodeError:
        print(f"WARN: Could not parse JSON from dynamic summary for '{resource_identifier}'. Raw output: {summary_json_str[:200]}...")
        return {"resource_identifier": resource_identifier, "raw_summary_text": summary_json_str, "is_fallback": True, "error": "JSONDecodeError"}
    except Exception as e:
        print(f"ERROR: Unexpected error during dynamic summarization for '{resource_identifier}': {e}")
        return {"resource_identifier": resource_identifier, "raw_summary_text": str(e), "is_fallback": True, "error": str(type(e).__name__)}

In [None]:
conversation_history = [{'role': 'model', 'parts': ['What Hard thing You want to learn Today']},
 {'role': 'user', 'parts': ['I want to understand the math part of attention is all you need. iam very week at the math part and have also attached the paper']},
 {'role': 'model', 'parts': ['Are you beginner or intermediate in Math and ML']},
 {'role': 'user',
  'parts': ['Iam ok with Basic math and im intermediate in ML/Transformers ']},
 {'role': 'model', 'parts': ['Ok generating syallabus']}]

In [None]:
conversation_history_heavy = [{'role': 'model', 'parts': ['What Hard thing You want to learn Today']},
 {'role': 'user', 'parts': ['I want to understand the math part of attention is all you need and also provied a txt for learning RL. iam very week at the math part and have also attached the paper and book explain the mathmatical concepts involved in it']},
 {'role': 'model', 'parts': ['Are you beginner or intermediate in Math and ML']},
 {'role': 'user',
  'parts': ['Iam ok with Basic math and im intermediate in ML/Transformers ']},
 {'role': 'model', 'parts': ['Ok generating syallabus']}]

In [None]:
#k = summarize_single_resource_dynamically(conversation_history=conversation_history,resource_content=resource_text,resource_identifier=book_name,max_length = 200000)

In [None]:
def generate_syllabus_from_chat(
    conversation_history: List[Dict[str, Any]],
    resource_list: Optional[List[Dict[str, Any]]] = [],

    model_name: Optional[str] = None
) -> str:
    """
    Generates or modifies a syllabus.
    Formats resources directly into the system_prompt if provided.
    """
    # print( len(resource_list))



    generation_instruction_for_llm_prompt = "Generate or modify the syllabus based on the provided conversation history and system instructions ."

    active_system_prompt_str: str
    final_user_facing_prompt_str: str = generation_instruction_for_llm_prompt

    if resource_list:

        print(f"[System: Formatting {len(resource_list)} resource summaries into the system prompt.]")
        try:
          extracted_data_dict = {}
          extracted_data_dict = {path: extract_text_from_txt_file(path) for path in resource_list}
          chars_length = 0
          extracted_basedata_dict ={}

          for full_path,chars in extracted_data_dict.items():

            chars_length += len(chars)
            base_filename = os.path.basename(full_path)
            extracted_basedata_dict[base_filename] = chars
          if chars_length > 70000:
            #Implement the Summary_generation and Implement the syllabus_generator_system_prompt_with_heavy_resources
            # summarize_single_resource_dynamically(value,key,conversation_history=conversation_history)
            # extracted_summary_dict = {key: value for key, value in (summarize_single_resource_dynamically(path) for path in resource_list)}
            extracted_summary_dict = [summarize_single_resource_dynamically(value,key,conversation_history=conversation_history) for key,value in extracted_basedata_dict.items() ]
            active_system_prompt_str = syllabus_generator_system_prompt_heavy_resources.format(resource_information_placeholder =extracted_summary_dict )


          else:
            active_system_prompt_str =  SYLLABUS_PROMPT_WITH_RAW_TEXT_COLLECTION.format(resource_information_placeholder = extracted_basedata_dict )


            # Append the Whole Resource  to the Prompt and make it generate the syllabus or a new prompt  as dict they are similar to Json.





        except (TypeError, KeyError) as e: # KeyError if placeholder is wrong
            print(f"ERROR: Could not format resource_list into system prompt: {e}")
            print("[System: Falling back to default syllabus generator prompt without resources.]")
            # Fallback to default prompt if formatting fails
            active_system_prompt_str = syllabus_generator_system_prompt

    else:
      print("There are no  Resources Provided")
      active_system_prompt_str = syllabus_generator_system_prompt




    # Make the LLM call
    llm_response = llm_call(
        prompt=final_user_facing_prompt_str,
        chat_history=conversation_history,
        system_prompt=active_system_prompt_str,
        # model="gemini-2.5-pro-preview-04-17",
        temperature=1
    )

    if llm_response and not llm_response.startswith(("[ERROR", "[BLOCKED")): # Adjusted for tuple
        extracted_syllabus = extract_xml(llm_response, "syllabus")
        if extracted_syllabus:
            print("--- Syllabus Extracted Successfully (from LLM response) ---")
            return extracted_syllabus
        else:
            if llm_response.strip().startswith("<syllabus>") and llm_response.strip().endswith("</syllabus>"):
                 print("--- Syllabus Tags Found Directly (LLM response) ---")
                 inner_content = llm_response.strip()[len("<syllabus>"):-len("</syllabus>")].strip()
                 return inner_content
            else:
                 print("WARN: Could not extract <syllabus> tags from LLM response.")
                 print(f"Full LLM Response was:\n{llm_response[:500]}...")
                 return f"[ERROR: Syllabus tags not found in response: {llm_response[:100]}...]"
    else:
        error_prefix = "[ERROR" if not llm_response or llm_response.startswith("[ERROR") else "[BLOCKED"
        print(f"ERROR: Syllabus generation LLM call failed: {llm_response if llm_response else error_prefix + ': No response]'}")
        return llm_response if llm_response else f"{error_prefix}: No response or an issue occurred]"


In [None]:
#singlefile_syllabus= generate_syllabus_from_chat(conversation_history=conversation_history)

In [None]:
#multifile_syllabus = generate_syllabus_from_chat(conversation_history=conversation_history_heavy,resource_list=created_files)

In [None]:
#print(multifile_syllabus)

In [None]:
#print(singlefile_syllabus)

In [None]:

convo_manager_system_prompt_explicit= """
You are a helpful AI assistant acting as a 'Conversation Manager'. Your primary role is to facilitate a conversation with the user to define requirements for a learning syllabus.

**Your Core Task:** Guide the user through discussing their needs (topic, experience, goals, style). You DO NOT generate the syllabus text itself. Instead, you use specific tags to signal when external actions (generation, modification, finalization, persona selection) are needed by the system.

**Strict Operating Rules:**

1.  **Conversational Turns:** Engage in natural, helpful conversation to gather information, ask for feedback, or ask about learning style. Your responses should be conversational text ONLY, *unless* a condition below requires a tag.
2.  **Tag-Only Turns:** When specific conditions are met (see below), your *entire response* MUST consist *solely* of the designated tag. **DO NOT add ANY other text, greetings, explanations, or punctuation in a response containing a tag.**

**Tag Trigger Conditions & Required Output:**

*   **Condition:** You have gathered sufficient initial information (topic, experience, goals) to request the *first* syllabus draft.
    *   **Required Output:** `<request_syllabus_generation/>`
*   **Condition:** The user has provided feedback on an *existing* syllabus (identified by `<syllabus>` tags in the chat history), and you understand the requested changes.
    *   **Required Output:** `<request_syllabus_modification/>`
*   **Condition:** The user explicitly confirms they are satisfied with the *most recent* syllabus presented.
    *   **Required Output:** `<request_finalization/>`
*   **Condition:** You have asked the user for their preferred learning style, and the user has provided their preference.
    *   **Required Output:** `<persona/>`

**Interaction Flow (Post-System Actions & User Responses):**

*   **After Syllabus Presentation:** The system will display a syllabus (enclosed in `<syllabus>` tags, originating from the 'model' role). In your *next conversational turn* (immediately following the syllabus display), your response MUST be natural language asking the user for feedback (e.g., "Here's the syllabus draft based on our discussion. How does it look? Are there any changes you'd like?"). **Do not output a tag here.**
*   **After Finalization Signal:** Immediately *after* you have outputted the `<request_finalization/>` tag, your *very next conversational turn* MUST be to ask the user about their preferred learning style (e.g., "Great, the syllabus is finalized! To help tailor the learning experience, could you tell me how you prefer to learn? For example, do you like detailed explanations, code examples, hands-on exercises, or a combination?"). **Do not output another tag here.**
*   **After Learning Style Response:** Immediately *after* the user responds with their preferred learning style, your *very next response* MUST be **ONLY** the tag: `<persona/>`. **Do not add conversational text here.**

**Crucial Constraints:**

*   **One Action Per Turn:** A single response turn can ONLY be *either* conversational text *or* a single tag, NEVER both.
*   **Wait for System/User:** After outputting a tag (`<request_syllabus_generation/>`, `<request_syllabus_modification/>`, `<request_finalization/>`, `<persona/>`), simply stop and wait for the next system action or user input as appropriate. Do not chain conversational text after a tag in the same turn.
*   **No Explanations with Tags:** Do not explain *why* you are outputting a tag in the same turn as the tag. For example, DO NOT output: "Okay, I have enough info now. `<request_syllabus_generation/>`". The correct output is JUST: `<request_syllabus_generation/>`.
"""

# Should the convo manager know what tags were called.
# --- Syllabus Generator System Prompt (Unchanged from previous version) ---
# --- NEW: Intent Recognizer System Prompt ---
intent_recognizer_prompt = """
Analyze the following model message. Respond with ONLY ONE of the following codes, based on the primary action signaled by the message:

*   `GENERATE`: If the message signals a request to generate a syllabus for the first time (e.g., contains `<request_syllabus_generation/>`).
*   `MODIFY`: If the message signals a request to modify an existing syllabus (e.g., contains `<request_syllabus_modification/>`).
*   `FINALIZE`: If the message signals that the user has confirmed the syllabus and finalization is requested (e.g., contains `<request_finalization/>`).
*   `CONVERSE`: If the message is purely conversational or asking for feedback, and does not contain an action tag.
*    `PERSONA`: If the message signals that the user has provided their preferred learning style (e.g., contains `<persona/>`).

Model Message:
{model_response}
"""

# --- (Keep `generate_syllabus_from_chat` function as defined in the previous step) ---
# It correctly takes history and uses syllabus_generator_system_prompt

def get_intent(model_response: str) -> str:
    """
    Classifies the intent based on specific tags in the Conversation Manager's response.
    Does NOT use an LLM call for this.
    """
    if not model_response:
        return "CONVERSE" # Handle empty response

    cleaned_response = model_response.strip()

    # Check for specific tags
    if "<request_syllabus_generation/>" in cleaned_response:
        return "GENERATE"
    if "<request_syllabus_modification/>" in cleaned_response:
        return "MODIFY"
    if "<request_finalization/>" in cleaned_response:
        return "FINALIZE"
    if "<persona/>" in cleaned_response:
        return "PERSONA"

    # Default to CONVERSE if no specific tag is found
    return "CONVERSE"

print("Simplified get_intent function defined.")
conversation_history = []


Simplified get_intent function defined.


In [None]:


prompt_generation = """

Your Role: You are an AI Persona Architect. Your primary function is to craft detailed, effective, and engaging system prompts for AI Tutors based on user specifications derived from the preceding conversation history and awareness of accompanying learning materials (like a syllabus).

Your Goal: To generate a system prompt for an AI Tutor that accurately reflects the user's desired teaching style, personality, depth preferences, and subject matter discussed in the conversation. This generated prompt must conclude with a simple introductory phrase followed immediately by the {{SYLLABUS_SECTION}} placeholder, where the actual learning syllabus will be inserted later.

Context You Will Use:

Conversation History: Analyze the entire preceding conversation with the user. Pay close attention to their explicit requests and implicit preferences regarding:

Teaching style (e.g., enthusiastic, patient, rigorous, Socratic).

Personality influences (e.g., specific educators like Feynman/Karpathy, general traits like humorous/formal/serious).

Focus areas (e.g., intuition, practical code, theory, problem-solving).

Interaction dynamics (e.g., level of questioning, guidance vs. direct answers).

Desired adaptability and depth control (e.g., the "Levels of Explanation" idea).

Syllabus Mention: Infer from the conversation that a specific learning syllabus will be provided to the final AI Tutor.

Your Task:

Synthesize the user's requirements from the conversation history into a coherent and actionable system prompt for the target AI Tutor.

The Generated Prompt MUST Include (in this order):

Clear Persona Definition: Start with a concise statement defining the AI Tutor's name (create one like 'Synapse', 'GuideBot', 'LearnSpark' if none is suggested), its subject specialization (inferred from the conversation/syllabus mention), and its core mission.

Core Principles Section: Detail the fundamental aspects of the tutor's personality and teaching philosophy, directly reflecting the user's preferences identified in the conversation history. Use bullet points for clarity. Incorporate specifics like desired traits, inspirational figures (and how to emulate them), and key emphasis areas.

Teaching Approach / Methodology Section: Outline the specific methods the tutor should use. This must address:

Clarity and Explanation Style (e.g., analogies, first principles).

Interaction Style (e.g., probing questions, checks for understanding, hints).

Handling Depth (e.g., adaptive levels, gauging understanding, offering detail choices).

Practical Elements (e.g., code usage, examples, tools).

Guidance vs. Direct Answers balance.

Overall Goal Statement: Include a sentence summarizing the ultimate aim of the AI Tutor (e.g., "Your goal is to foster deep understanding...").

Syllabus Introduction and Placeholder (MANDATORY LAST ELEMENT): The generated prompt must end precisely with a simple introductory phrase like "Here is the syllabus we will follow:", followed immediately by the placeholder {{SYLLABUS_SECTION}}. There should be no text, formatting, or additional instructions after this placeholder. Example ending:

...Your ultimate goal is to make learning X an exciting and rewarding journey.

Here is the syllabus we will follow:
{{SYLLABUS_SECTION}}


(Ensure the phrasing is natural and leads directly into the syllabus content).

Instructions for You (The Persona Architect):

Infer and Synthesize: Base your generated prompt solely on the preceding conversation history. Extract the user's needs accurately.

Be Specific and Actionable: Translate user preferences into clear, direct instructions for the final AI Tutor in sections 1-4.

Cohesive Persona: Ensure all parts of the generated prompt (sections 1-4) work together to create a consistent and believable tutor persona.

Strict Final Structure: Adhere strictly to placing the simple introductory phrase and the {{SYLLABUS_SECTION}} placeholder as the absolute final elements of your output. Keep the intro phrase brief and direct.

Output Format: Produce only the final, complete system prompt for the AI Tutor, ending exactly with the introductory phrase and {{SYLLABUS_SECTION}}. Do not include any explanatory text before or after the generated prompt itself.
"""
# Provide Examples for the Prompt.

In [None]:
# The method of passing information into the system prompt is more accurate but as data Increases this might cause Issues.
INITIAL_SUMMARY_PROMPT =  """You are an AI Resource Analyzer. Your task is to perform a detailed initial analysis of each provided learning resource excerpt. This analysis will be provided to another AI (a Conversation Manager) to give it a comprehensive understanding of the materials a user has supplied at the beginning of a conversation about creating a learning syllabus.

**Input You Will Receive:**
The user has provided the following truncated resource excerpts, formatted as a JSON object where keys are resource identifiers (e.g., filenames) and values are the truncated text content:
{resource_excerpts_json_placeholder}

**Your Task (based SOLELY on the excerpts provided above):**

For EACH resource excerpt provided in the `{resource_excerpts_json_placeholder}`:

1.  **Identify the Resource:** Clearly state which resource you are analyzing (e.g., "For Resource 'file1.txt':" or "Analysis of 'chapter_intro.pdf_excerpt':").
2.  **Analyze in Depth:** Based *only* on the provided truncated excerpt:
    *   **Primary Subject & Main Topics:** What is the main subject matter of this resource? What are the key topics or themes introduced or discussed in this excerpt?
    *   **Key Concepts/Information:** What are some of a_i_core_concepts, arguments, definitions, or significant pieces of information presented in this excerpt?
    *   **Apparent Content Type/Style (Optional Inference):** Briefly, what does the style or content suggest this resource might be (e.g., "seems like an introduction to a technical textbook," "appears to be a research paper abstract and introduction," "reads like a practical tutorial with code examples")?
3.  **Format Your Output Clearly:**
    *   Present your analysis for each resource separately. Use clear headings or delimiters for each resource (e.g., using the resource identifier).
    *   Use natural language for your analysis.
    *   The overall output should be a single text block containing the analyses for all resources.

**Overall Goal for Your Output:**
Your output should allow the Conversation Manager to quickly grasp the nature and primary content of each individual resource the user has brought to the table. This is more detailed than a simple one-line summary per resource.

**Example Snippet of Expected Output Structure:**

"Okay, I've analyzed the provided resources:

**Resource: 'Intro_to_Python_Ch1.txt'**
This excerpt appears to be from an introductory chapter on Python programming.
*   Main Topics: Basic Python syntax, variables, data types (integers, strings), and the print() function.
*   Key Information: It explains what a variable is, shows examples of assigning values, and demonstrates how to output text to the console. The style is beginner-friendly.

**Resource: 'Advanced_Algorithms_Paper_Excerpt.pdf'**
This excerpt seems to be the abstract and introduction of a research paper on advanced algorithms.
*   Main Topics: It discusses a novel approach to [specific algorithmic problem], compares it to existing methods, and outlines the paper's contributions.
*   Key Information: It mentions concepts like [Algorithm X], [Complexity Class Y], and aims to prove [Theorem Z]. The language is formal and academic.
--- (and so on for other resources) ---"

**DO NOT:**
*   Attempt to create a syllabus.
*   Merge the analyses into a single paragraph; keep resource analyses distinct.
*   Provide an extremely brief, superficial summary (go into a bit more depth per resource as outlined above).
*   Output raw JSON as your final response (though you are parsing JSON input).
"""

def resources_intro(resource_list:list):
  if resource_list:
    extracted_data_dict= {}
    extracted_basedata_dict = {}

    print(f"[System: Formatting {len(resource_list)} resource summaries into the system prompt.]")
    try:
      extracted_data_dict = {path: extract_text_from_txt_file(path) for path in resource_list}
      chars_length = 0
      extracted_basedata_dict ={}

      for full_path,chars in extracted_data_dict.items():

        chars_length += len(chars)
        base_filename = os.path.basename(full_path)
        extracted_basedata_dict[base_filename] = chars[:20000]
      # Pass 20000 chars to the llm
      print(extracted_basedata_dict)
      llm_response = llm_call(
        prompt="Generate Summary Based on the  Provided Truncated Resources",
        chat_history=[],
        system_prompt= INITIAL_SUMMARY_PROMPT.format(resource_excerpts_json_placeholder = extracted_basedata_dict ))

      print(llm_response)

      return llm_response



    except (TypeError, KeyError) as e:
      print(f"ERROR: Could not format resource_list into system prompt: {e}")
      print("[Couldn't Create Initial Summary.]")


In [None]:
learningstyle_prompt = """ You are an AI assistant specializing in understanding user learning preferences.
Your goal is to formulate a concise and engaging question for the user. This question should prompt them to describe their preferred learning style and the kind of AI tutor personality they would find most effective for the subject matter discussed in the provided conversation history.

Instructions:
1. Analyze the complete conversation history provided. Pay close attention to the finalized syllabus (if available), the user's stated goals, and the subject they want to learn.
2. Based on this analysis, craft a single, clear question.
3. The question should encourage the user to provide specific details about their preferences, going beyond generic answers. For example, it could touch upon their preferred interaction style, content format (e.g., examples, theory, analogies), pace, or the kind of feedback they find helpful.
4. Ensure the question is phrased naturally and invites a thoughtful response.
5. Output ONLY the question itself.

"""
# This is Just a Sample Prompt. temperature - 1

In [None]:
resource_list = created_files

In [None]:
from typing import Optional, Tuple, List, Dict, Any # Ensure typing is imported

from typing import Optional, Tuple, List, Dict, Any # Ensure typing is imported

def negotiate_syllabus_chat_dynamic(verbose: bool = True) -> Optional[Tuple[str, List[Dict[str, Any]]]]:


    conversation_history: List[Dict[str, Any]] = []
    # ---

    current_syllabus_content: Optional[str] = None # Store inner content only
    final_syllabus_xml: Optional[str] = None
    finalization_requested = False # State flag

    # --- Initial Greeting (Always happens now) ---
    if verbose:
        # No "Resuming" message anymore, always starts fresh
        print("\n--- Starting Syllabus Negotiation ---")
        print("AI: Hello! What topic are you interested in learning about today?")
    ai_turn = "Hello! What topic are you interested in learning about today?"
    conversation_history.append({'role': 'model', 'parts': [ai_turn]})

    # ---

    while True:
        # --- User Turn ---
        try:
            user_input = input("You: ")
        except EOFError:
            if verbose: print("\nAI: Session ended by user (EOF).")
            return None # Signal premature end

        if user_input.lower() in ["quit", "exit", "bye"]:
            if verbose: print("AI: Okay, ending the syllabus planning as requested. Goodbye!")
            return None # Signal user quit

        if not user_input:
            continue

        conversation_history.append({'role': 'user', 'parts': [user_input]})
        if len(conversation_history)<=2 and len(resource_list) >0:
          # Create Resource Summary
          initial_summary = resources_intro(resource_list)
          conversation_history.append({'role': 'model', 'parts': [initial_summary]})



        try:

            manager_response = llm_call(
                prompt=user_input,
                chat_history=conversation_history,
                system_prompt=convo_manager_system_prompt_explicit
            )

            is_tag_only = manager_response.strip().startswith('<') and manager_response.strip().endswith('/>')
            if verbose and not is_tag_only:
                print(f"AI: {manager_response}")
            elif verbose and is_tag_only:
                print(f"[System: AI signaled {manager_response.strip()}]")

            conversation_history.append({'role': 'model', 'parts': [manager_response]})

            intent = get_intent(manager_response)
            # if verbose: print(f"[System Debug: Intent={intent}]")
            if verbose: print(f"DEBUG: Recognized Intent: '{intent}'") # Enclose in quotes for clarity

            if intent == "GENERATE" or intent == "MODIFY":
                action = "generation" if intent == "GENERATE" else "modification"
                if verbose: print(f"[System: Requesting syllabus {action}...]")
                generated_content = generate_syllabus_from_chat(conversation_history,resource_list= resource_list)
                if generated_content and not generated_content.startswith("[ERROR"):
                    current_syllabus_content = generated_content
                    syllabus_xml_for_display = f"<syllabus>\n{current_syllabus_content}\n</syllabus>"
                    if verbose:
                         print(f"\n[System presenting syllabus requested by AI]\n{syllabus_xml_for_display}\n")
                    conversation_history.append({'role': 'model', 'parts': syllabus_xml_for_display})
                    follow_up_prompt = "The syllabus has been presented. Ask the user for feedback."
                    manager_feedback_request = llm_call(
                        prompt=follow_up_prompt, # Manager relies on history state (seeing the presented syllabus)
                        chat_history=conversation_history, # History now includes the presented syllabus
                        system_prompt=convo_manager_system_prompt_explicit)
                    conversation_history.append({'role': 'model', 'parts': [manager_feedback_request]})
                    print({'role': 'model', 'parts': [manager_feedback_request]})

                else:
                    error_msg = f"AI: Sorry, I encountered an error during syllabus {action}. {generated_content or 'Details unknown.'}"
                    if verbose: print(error_msg)
                    conversation_history.append({'role': 'model', 'parts': [error_msg]})


            elif intent == "FINALIZE":
              if verbose: print("[System: Finalization requested.]")
              if current_syllabus_content:
                  final_syllabus_xml = f"<syllabus>\n{current_syllabus_content}\n</syllabus>"
                  finalization_requested = True
                  # Syallabus has been Finalized now ask about persona

                  follow_up_prompt = "The syllabus has been finalized . in what way you want to be teached? - asked to the user by model make it accordingly"
                  # Should use learningstyle_prompt
                  manager_feedback_request = llm_call(
                      prompt=follow_up_prompt,
                      chat_history=conversation_history,
                      system_prompt=learningstyle_prompt,
                      temperature=1)
                  conversation_history.append({'role': 'model', 'parts': [manager_feedback_request]})

                  # ---- CHANGE THIS LINE ----
                  # print({'role': 'model', 'parts': [manager_feedback_request]}) # Old way (raw dictionary)
                  if verbose: print(f"AI: {manager_feedback_request}") # New way (formatted output)
                  # ---- END CHANGE ----

                  if verbose: print("[System: Syllabus content captured. Waiting for learning style.]") # Keep this if desired
              else:
                  if verbose: print("[System Warning: Finalization requested, but no syllabus content found.]")
            elif intent == "PERSONA":
              if verbose: print("[System: Persona tag received.]")

              # --- ADD THESE DEBUG LINES ---
              if verbose:
                  print(f"DEBUG (PERSONA): finalization_requested = {finalization_requested}")
                  print(f"DEBUG (PERSONA): final_syllabus_xml is set = {bool(final_syllabus_xml)}")
                  if final_syllabus_xml is None:
                    final_syllabus_xml = f"<syllabus>\n{current_syllabus_content}\n</syllabus>"
                    return final_syllabus_xml,conversation_history
                  return final_syllabus_xml, conversation_history
              # --- END OF ADDED DEBUG LINES ---

              # Check if conditions met BEFORE returning
              if finalization_requested and final_syllabus_xml:
                  if verbose: print("[System: Syllabus finalized and learning style captured. Negotiation complete.]")
                  # SUCCESS: Return the final syllabus and the history
                  return final_syllabus_xml, conversation_history # <<< EXIT POINT
              else:
                  # This part runs if the check fails
                  if verbose: print("[System Warning: Persona tag received, but pre-conditions (finalization_requested AND final_syllabus_xml set) were not met. Loop continues.]")

            elif intent == "CONVERSE":
                pass # Already printed if verbose and not tag-only

        except Exception as e:
            if verbose:
                print(f"[System Error: An error occurred in the main loop: {e}]")
                import traceback
                traceback.print_exc()
            conversation_history.append({'role': 'model', 'parts': ["[System Error during AI turn. Ending negotiation.]"]})
            return None # Signal error

    # Fallback
    if verbose: print("[System: Negotiation loop exited unexpectedly.]")
    return None

In [None]:
#k = resources_intro(created_files)

In [None]:
prompt_generation = """
Your Role: You are an AI Persona Architect. Your primary function is to craft detailed, effective, and engaging system prompts for AI Tutors based on user specifications derived from the preceding conversation history and awareness of accompanying learning materials (like a syllabus).

Your Goal: To generate a system prompt for an AI Tutor that accurately reflects the user's desired teaching style, personality, depth preferences, and subject matter discussed in the conversation. This generated prompt must conclude with a simple introductory phrase followed immediately by the {{SYLLABUS_SECTION}} placeholder, where the actual learning syllabus will be inserted later.

Context You Will Use:

Conversation History: Analyze the entire preceding conversation with the user. Pay close attention to their explicit requests and implicit preferences regarding:
*   Teaching style (e.g., enthusiastic, patient, rigorous, Socratic).
*   Personality influences (e.g., specific educators like Feynman/Karpathy, general traits like humorous/formal).
*   Focus areas (e.g., intuition, practical code, theory, problem-solving).
*   Interaction dynamics (e.g., level of questioning, guidance vs. direct answers).
*   Desired adaptability and depth control (e.g., the "Levels of Explanation" idea).
Syllabus Mention: Infer from the conversation that a specific learning syllabus will be provided to the final AI Tutor.

Your Task:

Synthesize the user's requirements from the conversation history into a coherent and actionable system prompt for the target AI Tutor.

The Generated Prompt MUST Include (in this order):

1.  Clear Persona Definition: Start with a concise statement defining the AI Tutor's name (create one like 'Synapse', 'GuideBot', 'LearnSpark' if none is suggested), its subject specialization (inferred from the conversation/syllabus mention), and its core mission.
2.  Core Principles Section: Detail the fundamental aspects of the tutor's personality and teaching philosophy, directly reflecting the user's preferences identified in the conversation history. Use bullet points for clarity. Incorporate specifics like desired traits, inspirational figures (and how to emulate them), and key emphasis areas.
3.  Teaching Approach / Methodology Section: Outline the specific methods the tutor should use. This must address:
    *   Clarity and Explanation Style (e.g., analogies, first principles).
    *   Interaction Style (e.g., probing questions, checks for understanding, hints).
    *   Handling Depth (e.g., adaptive levels, gauging understanding, offering detail choices).
    *   Practical Elements (e.g., code usage, examples, tools).
    *   Guidance vs. Direct Answers balance.
4.  Overall Goal Statement: Include a sentence summarizing the ultimate aim of the AI Tutor (e.g., "Your goal is to foster deep understanding...").
5.  Syllabus Introduction and Placeholder (MANDATORY LAST ELEMENT): The generated prompt must end precisely with a simple introductory phrase like "Here is the syllabus we will follow:", followed immediately by the placeholder {{SYLLABUS_SECTION}}. There should be no text, formatting, or additional instructions after this placeholder. Example ending:
    ...Your ultimate goal is to make learning X an exciting and rewarding journey.

    Here is the syllabus we will follow:
    {{SYLLABUS_SECTION}}

(Ensure the phrasing is natural and leads directly into the syllabus content).

Instructions for You (The Persona Architect):

*   Infer and Synthesize: Base your generated prompt solely on the preceding conversation history. Extract the user's needs accurately.
*   Be Specific and Actionable: Translate user preferences into clear, direct instructions for the final AI Tutor in sections 1-4.
*   Cohesive Persona: Ensure all parts of the generated prompt (sections 1-4) work together to create a consistent and believable tutor persona.
*   Strict Final Structure: Adhere strictly to placing the simple introductory phrase and the {{SYLLABUS_SECTION}} placeholder as the absolute final elements of your output. Keep the intro phrase brief and direct.
*   Output Format: Produce only the final, complete system prompt for the AI Tutor, ending exactly with the introductory phrase and {{SYLLABUS_SECTION}}. Do not include any explanatory text before or after the generated prompt itself.
"""

# --- Main Orchestration Function ---

def run_learning_session(verbose: bool = True):
    """
    Orchestrates syllabus negotiation, dynamic explainer prompt generation,
    and the learning explanation phase.
    """
    print("--- Welcome to the AI Learning Assistant! ---")

    # 1. Negotiate Syllabus
    if verbose: print("\n--- Phase 1: Planning Your Syllabus ---")
    negotiation_result = negotiate_syllabus_chat_dynamic(verbose=verbose)

    # 2. Check Negotiation Outcome
    if negotiation_result is None:
        if verbose: print("\n--- Syllabus planning did not complete. Exiting session. ---")
        return

    final_syllabus_xml, full_history = negotiation_result
    if verbose: print("\n--- Phase 1 Complete: Syllabus Finalized! ---")
    # if verbose: print(f"Final Syllabus XML:\n{final_syllabus_xml}") # Debugging


    # 4. Prepare for Explainer Phase
    if verbose: print("\n--- Phase 2: Let's Start Learning! ---")
    generated_explainer_template = llm_call(
            prompt="", # The system prompt is directive, no specific user prompt needed here
            chat_history=full_history,
            system_prompt=prompt_generation,
            temperature=1 # Allow some creativity in phrasing but stick to instructions
        )
    final_explainer_prompt = generated_explainer_template.replace(
            "{{SYLLABUS_SECTION}}", final_syllabus_xml
        )



    print(f"AI (Explainer): {final_explainer_prompt}")
    explainer_history = [] # Start explainer history from negotiation
    manager_response = llm_call("Start intoducing your self and give a sneak peak into the syallabus to the user",system_prompt = final_explainer_prompt )
    explainer_history.append({'role': 'model', 'parts': [manager_response]})
    print(f"AI (Explainer): {manager_response}")

    # Optionally add a transition message *after* the prompt generation
    # explainer_history.append({'role': 'model', 'parts': ["Great, I'm ready to start explaining based on our plan and your preferences! Where should we begin?"]})


    # 5. Run Explainer Loop
    while True:
        try:
            user_explainer_input = input("You (Learning): ")
        except EOFError:
            if verbose: print("\nAI (Explainer): Session ended.")
            break

        if user_explainer_input.lower() in ["quit", "exit", "bye"]:
            if verbose: print("AI (Explainer): Okay, ending the learning session. Goodbye!")
            break
        if not user_explainer_input:
            continue

        explainer_history.append({'role': 'user', 'parts': [user_explainer_input]})

        try:
            # Call LLM with the *dynamically generated and finalized* explainer prompt
            explainer_response = llm_call(
                prompt=user_explainer_input,
                chat_history=explainer_history,
                system_prompt=final_explainer_prompt # Use the generated & filled prompt
            )
            # Only print AI response if it's not empty/whitespace
            if explainer_response and explainer_response.strip():
                 if verbose: print(f"AI (Explainer): {explainer_response}")
                 explainer_history.append({'role': 'model', 'parts': [explainer_response]})
            elif verbose:
                 print("[System: Explainer produced empty response.]")


        except Exception as e:
            if verbose: print(f"[System Error in Explainer: {e}]")
            explainer_history.append({'role': 'model', 'parts': ["[System Error during explanation. Please try again.]"]})
            print(explainer_history)
            # Decide whether to break or continue on error

    if verbose: print("\n--- Learning Session Complete ---")


# --- Main Execution Area (for the whole process) ---
if __name__ == "__main__":
    # Assume API_KEY check or other setup is done here
    run_learning_session(verbose=True)

--- Welcome to the AI Learning Assistant! ---

--- Phase 1: Planning Your Syllabus ---

--- Starting Syllabus Negotiation ---
AI: Hello! What topic are you interested in learning about today?
[System: Formatting 1 resource summaries into the system prompt.]
{'2104.08691v2 (1).txt': 'The Power of Scale for Parameter-Efﬁcient Prompt Tuning\nBrian Lester\x03Rami Al-Rfou Noah Constant\nGoogle Research\n{brianlester,rmyeid,nconstant}@google.com\nAbstract\nIn this work, we explore “prompt tuning,”\na simple yet effective mechanism for learn-\ning “soft prompts” to condition frozen lan-\nguage models to perform speciﬁc downstream\ntasks. Unlike the discrete text prompts used by\nGPT-3, soft prompts are learned through back-\npropagation and can be tuned to incorporate\nsignals from any number of labeled examples.\nOur end-to-end learned approach outperforms\nGPT-3’s few-shot learning by a large margin.\nMore remarkably, through ablations on model\nsize using T5, we show that prompt tuning be-