In [None]:
import os
import glob
from tqdm import tqdm
import openai
import tiktoken

In [None]:
# Setup tiktoken encoding
encoding = tiktoken.get_encoding("o200k_base")

def count_tokens(text):
    return len(encoding.encode(text))

In [None]:
# Find all summary files
base_path = "./text/"
summary_files = glob.glob(base_path + "**/*_summary.txt", recursive=True)
summary_files = sorted(summary_files)  # Sort to maintain order
print(f"Found {len(summary_files)} summary files")
summary_files[:5]  # Show first 5 files

In [None]:
# Read all summary files and concatenate them
all_summaries = []
total_tokens = 0

for file in tqdm(summary_files, desc="Reading summaries"):
    with open(file, "r", encoding="utf-8") as f:
        content = f.read().strip()
        if content:  # Only include non-empty summaries
            file_name = os.path.basename(file).replace("_summary.txt", "")
            all_summaries.append(f"\n\n{'='*80}\n")
            all_summaries.append(f"LESSON: {file_name}\n")
            all_summaries.append(f"{'='*80}\n\n")
            all_summaries.append(content)
            total_tokens += count_tokens(content)

combined_text = "".join(all_summaries)
print(f"\nTotal summaries: {len(summary_files)}")
print(f"Total characters: {len(combined_text):,}")
print(f"Total tokens (estimated): {total_tokens:,}")

In [None]:
# Function to create comprehensive summary
def create_comprehensive_summary(all_summaries_text):
    prompt = f"""
You are given multiple lesson summaries from a day trading course series on "Momentum Burst" by Pradeep Bond from StockBee.

Your task:
- Read through ALL the lesson summaries carefully
- Synthesize the information into ONE comprehensive, well-organized study guide
- Group related concepts together logically, even if they appeared in different lessons
- Create a clear hierarchical structure with main topics and subtopics
- Eliminate redundancy while preserving all unique insights and strategies
- Maintain all important technical details, specific examples, and psychological advice
- Include specific stock examples and price points when mentioned
- Preserve any specific numbers, percentages, or timeframes mentioned

Output requirements:
1. Start with a comprehensive Table of Contents
2. Include an Executive Summary (2-3 paragraphs) of the entire course
3. Organize the main content into logical sections:
   - Core Concepts
   - Setup Criteria and Scanning
   - Entry and Exit Strategies
   - Risk Management
   - Psychology and Discipline
   - Real-world Examples and Case Studies
   - Common Mistakes to Avoid
   (Add more sections as needed based on the content)
4. Use clear headings, subheadings, and bullet points for readability
5. End with a "Key Takeaways" section highlighting the most critical points

DO NOT:
- Invent information not present in the summaries
- Omit important strategies or concepts
- Change the meaning of technical terms or specific advice

Here are all the lesson summaries:

{all_summaries_text}
    """

    client = openai.OpenAI()
    
    print("Sending request to GPT-5.1...")
    print(f"Prompt tokens (estimated): {count_tokens(prompt):,}")
    
    response = client.chat.completions.create(
        model="gpt-5.1-2025-11-13",
        messages=[
            {"role": "user", "content": prompt}
        ],
        temperature=0.2,
        max_completion_tokens=32768
    )
    
    return response.choices[0].message.content

In [None]:
# Generate the comprehensive summary
comprehensive_summary = create_comprehensive_summary(combined_text)
print("\nSummary generated successfully!")
print(f"Summary length: {len(comprehensive_summary):,} characters")
print(f"Summary tokens: {count_tokens(comprehensive_summary):,}")

In [None]:
# Save the comprehensive summary
output_file = "./Momentum_Burst_Complete_Course_Summary.txt"
with open(output_file, "w", encoding="utf-8") as f:
    f.write(comprehensive_summary)

print(f"\nComprehensive summary saved to: {output_file}")

In [None]:
# Optional: Display first 2000 characters of the summary
print("\n" + "="*80)
print("PREVIEW OF COMPREHENSIVE SUMMARY")
print("="*80 + "\n")
print(comprehensive_summary[:2000] + "\n\n[...truncated...]")