In [2]:
path = 'data/'

In [68]:
# Extract images, tables, and chunk text
from unstructured.partition.pdf import partition_pdf

raw_pdf_elements = partition_pdf(
    filename=path + "physics_example.pdf",
    extract_images_in_pdf=True,
    infer_table_structure=True,
    # chunking_strategy="by_title",
    # max_characters=8000,
    # new_after_n_chars=3800,
    # combine_text_under_n_chars=2000,
    image_output_dir_path=path,
    strategy="hi_res",
)

In [101]:
from unstructured.cleaners.core import clean

# Categorize text elements by type
tables = []
formulas = []
texts = []
titles = []
everything = []

for element in raw_pdf_elements:
    # print(element)
    # print(str(type(element)))
    # print("---------------")
    if "unstructured.documents.elements.Table" in str(type(element)):
        tables.append(str(element))
    elif "unstructured.documents.elements.CompositeElement" in str(type(element)):
        texts.append(str(element))
    elif "unstructured.documents.elements.Title" in str(type(element)):
        titles.append(str(element))
    everything.append(str(element))


print(titles)
# print(titles[0].replace(" ", "_"))
print(' '.join(everything))

['Problem Set 1', '1. Car and Bicycle Rider', '2. Elevator Trip', '3. Rocket Launch', '4. Throw and Catch', '5. Vertical Collision']
8.01x Classical Mechanics, Fall 2016 Massachusetts Institute of Technology Problem Set 1 1. Car and Bicycle Rider A car is driving along a straight line with a speed v0. At time t = 0 the car is at the origin. At a later instant of time t = t1 the car starts to slow down until it stops at a time t = t2. The acceleration of the car as a function of time is given by a= 0 0<t<t, co —c(t — t1) ty <t<tg (cid:26) where c is a positive constant which has dimensions of acceleration per unit time. (a) Find vc(t) and xc(t), the x-component of the velocity and the position of the car as a function of time. Express your answer in terms of some or all of the following variables: v0, c, t, t1 and t2. (b) A bicycle rider is riding at a constant speed of vb and at t = 0 is 17 m behind the car. The cyclist reaches the car when the car just comes to rest. The car is moving

In [82]:
import io
import numpy as np
import base64
from io import BytesIO
from PIL import Image


def resize_base64_image(base64_string, size=(128, 128)):
    """
    Resize an image encoded as a Base64 string.

    Args:
    base64_string (str): Base64 string of the original image.
    size (tuple): Desired size of the image as (width, height).

    Returns:
    str: Base64 string of the resized image.
    """
    # Decode the Base64 string
    img_data = base64.b64decode(base64_string)
    img = Image.open(io.BytesIO(img_data))

    # Resize the image
    resized_img = img.resize(size, Image.LANCZOS)

    # Save the resized image to a bytes buffer
    buffered = io.BytesIO()
    resized_img.save(buffered, format=img.format)

    # Encode the resized image to Base64
    return base64.b64encode(buffered.getvalue()).decode("utf-8")


def is_base64(s):
    """Check if a string is Base64 encoded"""
    try:
        return base64.b64encode(base64.b64decode(s)) == s.encode()
    except Exception:
        return False


def split_image_text_types(docs):
    """Split numpy array images and texts"""
    images = []
    text = []
    for doc in docs:
        print(doc)
        doc = doc.page_content  # Extract Document contents
        if is_base64(doc):
            # Resize image to avoid OAI server error
            images.append(
                resize_base64_image(doc, size=(250, 250))
            )  # base64 encoded str
        else:
            text.append(doc)
    return {"images": images, "texts": text}

In [83]:
import os
import uuid
import chromadb
import numpy as np
from PIL import Image as _PILImage
from langchain.vectorstores import Chroma
from langchain_experimental.open_clip import OpenCLIPEmbeddings

vectorstore = Chroma(
    collection_name="cyrus_rag_clip", embedding_function=OpenCLIPEmbeddings()
)


image_uris = sorted(
    [
        os.path.join(path, image_name)
        for image_name in os.listdir(path)
        if image_name.endswith(".jpg")
    ]
)

# Add images
vectorstore.add_images(uris=image_uris)

# Add documents
vectorstore.add_texts(texts=everything)

# Make retriever
retriever = vectorstore.as_retriever()

In [88]:
from operator import itemgetter
from langchain.chat_models import ChatOpenAI
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough, RunnableLambda
from langchain.schema.messages import HumanMessage, SystemMessage


def prompt_func(data_dict):
    # Joining the context texts into a single string
    formatted_texts = "\n".join(data_dict["context"]["texts"])
    messages = []

    print(formatted_texts)

    # Adding image(s) to the messages if present
    if data_dict["context"]["images"]:
        image_message = {
            "type": "image_url",
            "image_url": {
                "url": f"data:image/jpeg;base64,{data_dict['context']['images'][0]}"
            },
        }
        messages.append(image_message)

    # Adding the text message for analysis
    text_message = {
        "type": "text",
        "text": (
            "As an experienced university tutor in the fields of mathematics, computer science, and philosophy, your task is to provide concise yet comprehensive summaries of PDF documents. These documents, rich with texts, graphs, tables, and images, aim to elucidate complex concepts across these disciplines. Your summaries should distill the essence of each section, ensuring that key concepts and explanations are retained efficiently.\n\n"
            "The summarization process will involve interpreting different sections of the document, including any visual aids like graphs or images that enhance understanding. This task will be supported by a combination of RAG (Retrieval-Augmented Generation) and a vector database, offering contextual insights and efficient information retrieval.\n\n"
            "Please use your expertise to:\n"
            "- Summarize each section of the provided text in ``` in a concise manner, focusing on essential concepts and information.\n"
            "- Explain or interpret any graphs, tables, or images included in the document, highlighting how they contribute to the understanding of the text.\n"
            "- Ensure that the summaries maintain a high level of detail, facilitating effective learning and preparation for academic assessments like tests.\n"
            "- Utilize the RAG and vector database to enhance the accuracy and relevance of your summaries.\n\n"
            "```\n"
            f"{everything}\n"
            "```\n\n"
        ),
    }
    messages.append(text_message)

    return [HumanMessage(content=messages)]


model = ChatOpenAI(temperature=0, model="gpt-4-vision-preview", max_tokens=1024)

# RAG pipeline
chain = (
    {
        "context": retriever | RunnableLambda(split_image_text_types),
        "question": RunnablePassthrough(),
    }
    | RunnableLambda(prompt_func)
    | model
    | StrOutputParser()
)

In [99]:
from langchain.prompts import ChatPromptTemplate

template = """
            As an experienced university tutor in the fields of mathematics, computer science, and philosophy, your task is to provide concise yet comprehensive summaries of PDF documents. These documents, rich with texts, graphs, tables, and images, aim to elucidate complex concepts across these disciplines. Your summaries should distill the essence of each section, ensuring that key concepts and explanations are retained efficiently.
            You are also aware that no tokens of a prompt or a response should be wasted. 

            Please work on the following tasks:
            - Summarizethe text provided in ``` using bullet points of the most important concepts
            - Focuse on essential concepts and information.
            - Explain how bullet points relate to each other.
            - Ensure that the summaries maintain a high level of detail, facilitating effective learning and preparation for academic assessments like tests.
            - Don't use any equations or variables
            - Finally you return everything formatted as markdown
            ```
            {text}
            ```
            
            This task is really important for me. I am studying for finals and your summary is crucial for my success.
            """

prompt = ChatPromptTemplate.from_template(template)

model = ChatOpenAI(temperature=0, model="gpt-4-1106-preview", max_tokens=1024)

chain = {"text": RunnablePassthrough()} | prompt | model | StrOutputParser()

In [103]:
from langchain.pydantic_v1 import BaseModel, Field
from typing import Optional

from langchain.chains.openai_functions import (
    create_openai_fn_chain,
    create_structured_output_chain,
    create_openai_fn_runnable,
    create_structured_output_runnable,
)
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate


class PdfSummary(BaseModel):
    """Summary and meta data of a PDF document."""

    title: str = Field(..., description="Title of the PDF document.")
    classification: str = Field(
        ..., description="Uni class where the PDF document was handed out."
    )
    summary: str = Field(..., description="Summary of the PDF document.")


llm = ChatOpenAI(model="gpt-4-1106-preview", temperature=0)
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a world class algorithm for extracting information in structured formats. You know a lot about different classes at university and you are able to summarize and categorize complex texts.",
        ),
        (
            "human",
            "Use the given format to extract information from the following input: {input}",
        ),
        (
            "human",
            "Choose a classification from the following list: {classes}",
        ),
        ("human", "Make sure to answer in the correct format and dont repeat information in the summary that is present in title or classification."),
        (
            "human",
            "I am studying for finals and your summary is crucial for my success.",
        ),
    ]
)

runnable = create_structured_output_runnable(PdfSummary, llm, prompt)
runnable.invoke({"input": everything, "classes":["physics", "mathematics", "computer science"]})

PdfSummary(title='8.01x Classical Mechanics, Fall 2016: Problem Set 1', classification='physics', summary='This document is a problem set from the 8.01x Classical Mechanics course offered in Fall 2016 by MIT. It includes a series of problems that cover topics such as kinematics and dynamics of motion. The problems are designed to apply concepts of velocity, acceleration, and forces in various scenarios including a car and bicycle rider, an elevator trip, a rocket launch, a throw and catch situation, and a vertical collision. Students are required to find expressions for velocities, positions, accelerations, and other physical quantities using given variables and constants. The problems encourage the use of equations of motion and the understanding of graphical representations of physical situations.')

In [100]:
chain.invoke(everything)

"### 1. Car and Bicycle Rider\n- **Car's Motion**: A car moves with initial speed `v0`, starts decelerating at `t1` with a constant deceleration rate `c` until it stops at `t2`.\n- **Velocity and Position**: The task is to find expressions for the car's velocity `vc(t)` and position `xc(t)` over time without using equations.\n- **Bicycle Rider**: A cyclist, starting 17m behind the car and moving at a constant speed `vb`, catches up to the car right when it stops.\n- **Cyclist's Speed**: Calculate the cyclist's speed `vb` given `v0 = 12 m/s`, `t1 = 1 s`, `c = 6 m/s^3`, and `t2` when the car stops.\n\n### 2. Elevator Trip\n- **Elevator's Motion**: An elevator accelerates for time `T`, moves at constant speed for `4T`, then decelerates for `T` to reach the sixth floor at height `h`.\n- **Velocity Sketch**: A qualitative sketch of the elevator's velocity `v(t)` over time is required.\n- **Acceleration**: Determine the magnitude of acceleration `a` in terms of `h` and `T` without using equa