In [5]:
from lib.textbook_manager import TextbookManager

# Initialize manager
textbook_manager = TextbookManager()

# Print all available textbooks


# Get a specific textbook and page
textbook_name = "Daniel V. Schroeder - An Introduction to Thermal Physics-Pearson (1999)"  # without .txt extension
passage = textbook_manager.get_page(textbook_name, page_number=211)
# Get and combine multiple consecutive pages
start_page = 211
num_pages = 3
combined_passage = None

for page_num in range(start_page, start_page + num_pages):
    current_passage = textbook_manager.get_page(textbook_name, page_number=page_num)
    if current_passage:
        if combined_passage is None:
            combined_passage = current_passage
        else:
            combined_passage.content += "\n\n" + current_passage.content
    else:
        print(f"Warning: Could not find page {page_num}")

print(f"Combined content from pages {start_page} to {start_page + num_pages - 1}:")
if passage:
    print(f"\nPage {passage.page_number} content:")
    print(passage.content)
else:
    print(f"Could not find textbook or page")


Combined content from pages 211 to 213:

Page 211 content:
approximate  the sum as an integral  
that yields  a very simple  result.
Let ’s look at some numbers.  The constant  e, which  sets the energy  scale for 
rotational  excitations,  is never  more  than a small  fraction  of an electron-volt.  For a 
CO molecule,  for instance,  e = 0.00024  eV, so that e/k = 2.8 K. Ordinarily  we are 
interested  only in temperatures  much  higher  than e/k, so the quantity  kT/e will be 
much  greater  than 1. In this case the number  of terms  that contribute  significantly
Figure  6.7. Bar-graph  representations  of the partition  sum 6.30, for two different  
temperatures.  At high temperatures  the sum can be approximated  as the area 
under  a smooth  curve.


<<<PAGE 248>>>

236 Chapter  6 Boltzmann  Statistics
to the partition  function  will be quite large, so we can, to a good approximation,  
replace  the bar graph  in Figure  6.7 with the smooth  curve. The partition  function  
is

In [7]:
from lib.question_generator import QuestionGenerator
from lib.question_generator.models import QuestionDifficulty

# Create question generator
generator = QuestionGenerator(model_name="gpt-4o-mini", verification_model="gpt-4o-mini")

# Generate questions with verification
print("Generating questions without verification...")
verified_questions = await generator.generate_questions(
    combined_passage.content,
    num_questions=10,
    difficulty=QuestionDifficulty.GRAD,  # or any other difficulty level
    verify=False,
  
)

print(f"\nGenerated {len(verified_questions)} verified questions:")
for i, qa in enumerate(verified_questions, 1):
    print(f"\n Source: {qa.source}")
    print(f"\nQuestion {i}:")
    print(qa.question)
    print("\nSolution:")
    print(qa.solution)
    
    print("-" * 80)
    



Generating questions without verification...
MODEL OUTPUT:  <question>
For a CO molecule, the constant e is approximately 0.00024 eV. Calculate the rotational partition function for a CO molecule at room temperature (300 K), first using the exact formula 6.30 and then using the approximate formula 6.31.
</question>
<solution>
\[
Z_{rot} = \frac{1}{e} \left( \frac{kT}{e} \right)^{\frac{3}{2}} \quad \text{(exact formula 6.30)}
\]
\[
Z_{rot} \approx \frac{1}{2} \frac{kT}{e} \quad \text{(approximate formula 6.31)}
\]
\text{Answer: } \boxed{\text{Exact: } Z_{rot} \text{ from 6.30} \quad \text{Approx: } Z_{rot} = \frac{1}{2} \frac{300k}{0.00024}}
\end{solution}

<question>
For an O2 molecule, the constant e is approximately 0.00018 eV. Estimate the rotational partition function for an O2 molecule at room temperature (300 K).
</question>
<solution>
\[
Z_{rot} \approx \frac{1}{2} \frac{300k}{0.00018}
\]
\text{Answer: } \boxed{Z_{rot} \approx \frac{50 \cdot 300k}{0.00018}}
\end{solution}

<ques

In [10]:
import os
textbook_names = [f.replace('.txt', '') for f in os.listdir('textbooks/txt') if f.endswith('.txt')]
textbook_names

['Number Theory, Volume I Tools and Diophantine Equations (Cohen, Henri) (Z-Library)',
 'Classical Electrodynamics, 3rd edition (John David Jackson) (Z-Library)',
 'Measure Theory (Paul R. Halmos) (Z-Library)',
 'Principles of Plasma Physics for Engineers and Scientists (Umran S. Inan, Marek Gołkowski) (Z-Library)',
 'Organic Chemistry (T. W. Graham Solomons, Craig B. Fryhle etc.) (Z-Library)',
 'ANT',
 'Asymmetric Synthesis II More Methods and Applications (Mathias Christmann, Stefan Bräse) (Z-Library)',
 '9312416 (1)',
 'Functional Analysis - Second Edition (Walter Rudin) (Z-Library)',
 'An Introduction to Homological Algebra  ( etc.) (Z-Library)',
 'Gravitation (Charles W. Misner, Kip S. Thorne etc.) (Z-Library)',
 'gtnotes',
 'geoghe',
 'TopNotes_Spring10',
 'pde_notes',
 'Condensed Matter Field Theory, Second Edition (Altland Alexander, Simons Ben D.) (Z-Library)',
 'Differential Geometry of Curves and Surfaces (Manfredo P. do Carmo) (Z-Library)',
 'Nuclear and particle physics [

In [None]:
import asyncio
from lib.question_generator import QuestionGenerator
from lib.question_generator.models import QuestionDifficulty
from lib.textbook_manager import TextbookManager
import os
# Initialize manager
textbook_manager = TextbookManager()
generator = QuestionGenerator(model_name="deepseek-chat", output_dir="generated_questions/DS-MATH3.0")

# Create list of coroutines for each page

async def process_pages(textbook_name, start_page, num_pages=3):
    """Process multiple consecutive pages and generate questions from their combined content."""
    combined_passage = None
    
    # Combine the content from multiple pages
    for page_num in range(start_page, start_page + num_pages):
        current_passage = textbook_manager.get_page(textbook_name, page_number=page_num)
        if current_passage:
            if combined_passage is None:
                combined_passage = current_passage
            else:
                combined_passage.content += "\n\n" + current_passage.content
        else:
            print(f"Warning: Could not find page {page_num} in {textbook_name}")
    
    if combined_passage is None:
        print(f"Warning: Could not load any pages starting from {start_page}")
        return []
    
    print(f"Processing pages {start_page}-{start_page + num_pages - 1}...")
    try:
        questions = await generator.generate_questions(
            combined_passage.content,
            num_questions=10,
            difficulty=QuestionDifficulty.UNDERGRAD,
            verify=False,
            src=f"{textbook_name}_pages_{start_page}-{start_page + num_pages - 1}"
        )
        print(f"✓ Completed pages {start_page}-{start_page + num_pages - 1} - generated {len(questions)} questions")
        return questions
    except Exception as e:
        print(f"Error processing pages {start_page}-{start_page + num_pages - 1}: {str(e)}")
        return []

# Get all textbook names from the TextbookManager
textbook_names = [f.replace('.txt', '') for f in os.listdir('textbooks/txt') if f.endswith('.txt')]
print(f"Found {len(textbook_names)} textbooks: {textbook_names}")

all_questions = []
PAGES_PER_GROUP = 5  # Process 3 pages at a time
BATCH_SIZE = 100  # Process 50 page groups at a time

for textbook_name in textbook_names:
    num_pages = textbook_manager.get_num_pages(textbook_name)
    print(f"\nProcessing textbook: {textbook_name}")
    print(f"Number of pages: {num_pages}")
    
    textbook_questions = []
    
    # Process pages in groups and batches
    for batch_start in range(0, num_pages, BATCH_SIZE * PAGES_PER_GROUP):
        batch_end = min(batch_start + BATCH_SIZE * PAGES_PER_GROUP, num_pages)
        print(f"\nProcessing batch of pages {batch_start} to {batch_end-1}...")
        
        # Create tasks for each group of pages
        tasks = []
        for group_start in range(batch_start, batch_end, PAGES_PER_GROUP):
            # Make sure we don't go beyond the number of pages
            if group_start < num_pages:
                # Adjust the number of pages in the last group if needed
                actual_pages = min(PAGES_PER_GROUP, num_pages - group_start)
                tasks.append(process_pages(textbook_name, group_start, actual_pages))
        
        batch_results = await asyncio.gather(*tasks)
        
        # Add batch results to textbook_questions
        for page_questions in batch_results:
            textbook_questions.extend(page_questions)
        
        print(f"Batch complete. Questions for {textbook_name} so far: {len(textbook_questions)}")
    
    all_questions.extend(textbook_questions)
    print(f"\nFinished processing {textbook_name}. Total questions: {len(textbook_questions)}")

verified_questions = all_questions
print(f"\nFinished processing all textbooks. Total questions generated: {len(verified_questions)}")

# Remove the duplicate gather code at the end
# ... existing code ...


Found 27 textbooks: ['Number Theory, Volume I Tools and Diophantine Equations (Cohen, Henri) (Z-Library)', 'Classical Electrodynamics, 3rd edition (John David Jackson) (Z-Library)', 'Measure Theory (Paul R. Halmos) (Z-Library)', 'Principles of Plasma Physics for Engineers and Scientists (Umran S. Inan, Marek Gołkowski) (Z-Library)', 'Organic Chemistry (T. W. Graham Solomons, Craig B. Fryhle etc.) (Z-Library)', 'ANT', 'Asymmetric Synthesis II More Methods and Applications (Mathias Christmann, Stefan Bräse) (Z-Library)', '9312416 (1)', 'Functional Analysis - Second Edition (Walter Rudin) (Z-Library)', 'An Introduction to Homological Algebra  ( etc.) (Z-Library)', 'Gravitation (Charles W. Misner, Kip S. Thorne etc.) (Z-Library)', 'gtnotes', 'geoghe', 'TopNotes_Spring10', 'pde_notes', 'Condensed Matter Field Theory, Second Edition (Altland Alexander, Simons Ben D.) (Z-Library)', 'Differential Geometry of Curves and Surfaces (Manfredo P. do Carmo) (Z-Library)', 'Nuclear and particle physic