In [None]:
# --- Cell 1: Imports & Install ---
# !pip install langchain-community pypdf langchain-ollama
import pypdf
from langchain_ollama import ChatOllama
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
import os

print("Libraries imported successfully.")



In [None]:
# Cell 2: Load Multiple PDFs from Folder
import os
import pypdf

# --- SET YOUR PDF FOLDER HERE ---
PDF_FOLDER = "pdfs"
documents = []

# Ensure folder exists
if not os.path.isdir(PDF_FOLDER):
    raise Exception(f"PDF folder not found: {PDF_FOLDER}")

pdf_files = [f for f in os.listdir(PDF_FOLDER) if f.lower().endswith(".pdf")]

print(f"Found {len(pdf_files)} PDF(s) in: {PDF_FOLDER}\n")

for pdf_file in pdf_files:
    pdf_path = os.path.join(PDF_FOLDER, pdf_file)

    print(f"Loading {pdf_path}...")

    try:
        reader = pypdf.PdfReader(pdf_path)
        full_text = ""

        for page in reader.pages:
            page_text = page.extract_text()
            if page_text:
                full_text += page_text

        print(f"  → Extracted {len(full_text)} characters\n")

        documents.append({
            "filename": pdf_file,
            "text": full_text
        })

    except Exception as e:
        print(f"Error reading {pdf_file}: {e}\n")

print("FINISHED LOADING PDFs.")
print(f"Total PDFs loaded: {len(documents)}")

# Preview first 500 chars of first PDF
if documents:
    print("\n--- Preview of First PDF Loaded ---")
    print("Filename:", documents[0]["filename"])
    print(documents[0]["text"][:500] + "...")


In [None]:
# Cell 3: Initialize Your LOCAL LLM 
print("\nConnecting to local model...")
llm = ChatOllama(model="llama3:8b")
print("Connected to Ollama (llama3:8b) successfully!")


In [None]:
# --- Cell 4: Create Your Prompt and "Chain" 
# We tell the AI *what* to do.
# We create a prompt "template" with a placeholder {document_text}.

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful teacher assistant for a college student. Your task is to analyze the following document text and provide a concise, structured report for revision. Your output should be in clean Markdown. You answer all the questions according to the following numbers with numbers included.",
        ),
        (
            "human",
            """
            Here is the text from the document:
            ---
            {document_text}
            ---
            
            Please analyze this text and provide the following:
            1.  **Main Content:** A concise, 1-2 sentence summary of the document's main point.
            2.  **Important Parts (Key Takeaways):** A bulleted list of the most important concepts, names, or findings.
            3.  **Resources Mentioned:** A bulleted list of any websites, books, papers, or other resources mentioned in the text. If none, state "No resources were mentioned.
            4.  **Find Keywords:** Most important keywords that is the document focused on.
            5.  **Best Quote:** Most memorable quote in the whole documents."
            6.  **Why erosion happend according to the pdf?
            7.  **Can you give me some examples of Monte Carlos Simulation?
            8.  **Explain like I am five about Monte Carlo and Erosion!
            9.  **Can you create examples python simulation for Monte Carlo simulation using the formula in the file?
            
            """,
        ),
    ]
)

# The "chain" connects the prompt, the LLM, and an output parser
chain = prompt | llm | StrOutputParser()

print("Analyzer chain created.")

In [None]:
# --- Cell 6: Create a Scrolling MP4 from the Analysis Output ---

import matplotlib.pyplot as plt
from matplotlib import animation
import textwrap
import numpy as np

def create_scrolling_mp4(
    text,
    out_file="text_analyzer.mp4",
    fps=20,
    lines_in_window=25,
    wrap_width=90,
    fontsize=12,
    figsize=(12, 7),
    bitrate=2000
):
    # Use a safe font that supports most symbols
    plt.rcParams['font.family'] = "DejaVu Sans"

    # Wrap long lines
    lines = []
    for paragraph in text.split("\n"):
        if paragraph.strip() == "":
            lines.append("")
        else:
            wrapped = textwrap.wrap(paragraph, width=wrap_width)
            lines.extend(wrapped)

    # Build plot
    fig, ax = plt.subplots(figsize=figsize)
    ax.axis("off")

    line_height = 1.0
    y0 = 0
    text_objs = []

    for i, line in enumerate(lines):
        y = y0 - i * line_height
        t = ax.text(
            0.01,
            y,
            line,
            va="top",
            ha="left",
            fontsize=fontsize
        )
        text_objs.append(t)

    top = y0 + 0.5
    bottom = y0 - (lines_in_window + 0.5)

    ax.set_xlim(0, 1)
    ax.set_ylim(bottom, top)

    # animation settings
    seconds_per_line = 0.12
    frames_per_line = int(fps * seconds_per_line)

    total_scroll_lines = len(lines) - lines_in_window
    total_frames = max(1, total_scroll_lines * frames_per_line)

    def update(frame):
        shift = (frame / frames_per_line) * line_height
        new_top = top - shift
        new_bottom = new_top - lines_in_window
        ax.set_ylim(new_bottom, new_top)
        return text_objs

    ani = animation.FuncAnimation(fig, update, frames=total_frames, blit=False)

    writer = animation.FFMpegWriter(fps=fps, bitrate=bitrate)
    ani.save(out_file, writer=writer)

    plt.close(fig)
    print(f"✔ MP4 saved to: {out_file}")


# FUNCTION
create_scrolling_mp4(response, out_file="your_project_video.mp4")
print("Done!")
