<a target="_blank" href="https://colab.research.google.com/github/UpstageAI/cookbook/blob/main/upstage.ipynb">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>


# Upstage APIs

In this notebook, we'll use the Solar APIs provided by Upstage.
Solar provides LLM, Embedding, and Layout Analysis apis and is integrated with langchain and llamaindex.

## APIs

The APIs that Upstage currently offers.

1. Chat
2. Translation
3. Groundedness Check
4. Function Calling
5. Embedding
6. Document OCR
7. Layout Analysis
8. Key Information Extraction

In [16]:
# @title Install requirements
# @title First, create your upstage api key from upstage console. and set UPSTAGE_API_KEY environ variable.
!pip install -q llama_index_llms_upstage==0.1.3 llama_index_embeddings_upstage llama_index
!pip install -q langchain langchain_upstage==0.1.8rc0
!pip install -q python-dotenv


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.1[0m[39;49m -> [0m[32;49m24.1.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.1[0m[39;49m -> [0m[32;49m24.1.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.1[0m[39;49m -> [0m[32;49m24.1.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [1]:
!pip install -q langchain langchain_upstage==0.1.8rc0


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.1[0m[39;49m -> [0m[32;49m24.1.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [1]:
# @title set API key
import os
import getpass
from pprint import pprint
import warnings

warnings.filterwarnings("ignore")

from IPython import get_ipython

if "google.colab" in str(get_ipython()):
    # Running in Google Colab. Please set the UPSTAGE_API_KEY in the Colab Secrets
    from google.colab import userdata
    os.environ["UPSTAGE_API_KEY"] = userdata.get("UPSTAGE_API_KEY")
else:
    # Running locally. Please set the UPSTAGE_API_KEY in the .env file
    from dotenv import load_dotenv

    load_dotenv()

if "UPSTAGE_API_KEY" not in os.environ:
    os.environ["UPSTAGE_API_KEY"] = getpass.getpass("Enter your Upstage API key: ")


## Chat

In [3]:
# @title Chat with http request
import requests
import json

user_query = "Hi, how are you?"  # @param {type:"string"}

url = "https://api.upstage.ai/v1/solar/chat/completions"
headers = {"Authorization": f"Bearer 333", "Content-Type": "application/json"}
data = {
    "model": "solar-1-mini-chat",
    "messages": [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": user_query},
    ],
    "temperature": 0.7,
    "top_p": 0.1,
    "stream": False,
}

response = requests.post(url, headers=headers, data=json.dumps(data))
pprint(response.json())

{'message': 'Unauthorized'}


In [12]:
from openai import OpenAI
from httpx import Timeout

client = OpenAI(
    base_url="https://api.upstage.ai/v1/solar",
    api_key="invalid_key",
    max_retries=1,
    timeout=Timeout(connect=10.0, read=300.0, write=20.0, pool=315.0),
)
# client.chat.completions.create(
#                 messages=[{"role": "user", "content": "ping"}],
#                 model="dsad",
#                 temperature=0,
#                 max_tokens=10,
#                 stream=False
# )

In [16]:
# @title Chat with langchain
from langchain_upstage import ChatUpstage
from langchain_core.messages import HumanMessage, SystemMessage

user_query = "Hi, how are you?"  # @param {type:"string"}

chat = ChatUpstage()

messages = [
    SystemMessage(content="You are a helpful assistant."),
    HumanMessage(content=user_query),
]
response = chat.invoke(messages)
pprint(response.to_json())

{'id': ['langchain', 'schema', 'messages', 'AIMessage'],
 'kwargs': {'content': "Hello! I'm doing well, thank you for asking. How about "
                       'you?',
            'id': 'run-9d2613a1-acc8-4a87-bb4f-41b7bd0d8c8e-0',
            'invalid_tool_calls': [],
            'response_metadata': {'finish_reason': 'stop',
                                  'logprobs': None,
                                  'model_name': 'solar-1-mini-chat',
                                  'system_fingerprint': None,
                                  'token_usage': {'completion_tokens': 18,
                                                  'prompt_tokens': 28,
                                                  'total_tokens': 46}},
            'tool_calls': [],
            'type': 'ai',
            'usage_metadata': {'input_tokens': 28,
                               'output_tokens': 18,
                               'total_tokens': 46}},
 'lc': 1,
 'type': 'constructor'}


In [17]:
# @title Chat with llama-index
from llama_index.llms.upstage import Upstage
from llama_index.core.llms import ChatMessage

user_query = "Hi, how are you?"  # @param {type:"string"}

llm = Upstage()

response = llm.chat(
    messages=[
        ChatMessage(role="system", content="You are a helpful assistant."),
        ChatMessage(role="user", content=user_query),
    ]
)

pprint(response.raw)

{'choices': [Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content="Hello! I'm doing well, thank you for asking. How about you?", role='assistant', function_call=None, tool_calls=None))],
 'created': 1721885119,
 'id': '372c98e6-4e06-4d68-9b8d-c43781501e64',
 'model': 'solar-1-mini-chat-240612',
 'object': 'chat.completion',
 'system_fingerprint': None,
 'usage': CompletionUsage(completion_tokens=18, prompt_tokens=28, total_tokens=46)}


# Translation

In the translation API, you can set the translation style by first entering a sample translation message.


In [18]:
# @title Translation with http request
import requests
import json

translation_example_user = "아버지가방에들어가셨다."  # @param {type:"string"}
translation_example_assistant = "Father went into his room."  # @param {type:"string"}
translation_query = "엄마도들어가셨다."  # @param {type:"string"}

url = "https://api.upstage.ai/v1/solar/chat/completions"
headers = {
    "Authorization": f"Bearer {UPSTAGE_API_KEY}",
    "Content-Type": "application/json",
}
data = {
    "model": "solar-1-mini-translate-koen",
    "messages": [
        {"role": "user", "content": translation_example_user},
        {"role": "assistant", "content": translation_example_assistant},
        {"role": "user", "content": translation_query},
    ],
}

response = requests.post(url, headers=headers, data=json.dumps(data))
pprint(response.json())

{'choices': [{'finish_reason': 'stop',
              'index': 0,
              'logprobs': None,
              'message': {'content': 'Mom went in too.', 'role': 'assistant'}}],
 'created': 1721885121,
 'id': '038e3dac-0501-4774-b53e-0a3fb5ba691a',
 'model': 'solar-1-mini-translate-koen-240507',
 'object': 'chat.completion',
 'system_fingerprint': None,
 'usage': {'completion_tokens': 7, 'prompt_tokens': 56, 'total_tokens': 63}}


In [19]:
# @title Translation with langchain
from langchain_upstage import ChatUpstage
from langchain_core.messages import HumanMessage, AIMessage

translation_example_user = "아버지가방에들어가셨다."  # @param {type:"string"}
translation_example_assistant = "Father went into his room."  # @param {type:"string"}
translation_query = "엄마도들어가셨다."  # @param {type:"string"}


chat = ChatUpstage(model="solar-1-mini-translate-koen")

messages = [
    HumanMessage(content=translation_example_user),
    AIMessage(content=translation_example_assistant),
    HumanMessage(content=translation_query),
]
response = chat.invoke(messages)
pprint(json.loads(response.json()))

{'additional_kwargs': {},
 'content': 'Mom went in too.',
 'example': False,
 'id': 'run-2a886281-9aa3-4503-9ea0-16cca1df8ced-0',
 'invalid_tool_calls': [],
 'name': None,
 'response_metadata': {'finish_reason': 'stop',
                       'logprobs': None,
                       'model_name': 'solar-1-mini-translate-koen',
                       'system_fingerprint': None,
                       'token_usage': {'completion_tokens': 7,
                                       'prompt_tokens': 56,
                                       'total_tokens': 63}},
 'tool_calls': [],
 'type': 'ai',
 'usage_metadata': {'input_tokens': 56, 'output_tokens': 7, 'total_tokens': 63}}


In [20]:
# @title Translation with llama-index
from llama_index.llms.upstage import Upstage
from llama_index.core.llms import ChatMessage

translation_example_user = "아버지가방에들어가셨다."  # @param {type:"string"}
translation_example_assistant = "Father went into his room."  # @param {type:"string"}
translation_query = "엄마도들어가셨다."  # @param {type:"string"}


llm = Upstage(model="solar-1-mini-translate-koen")
response = llm.chat(
    messages=[
        ChatMessage(role="user", content=translation_example_user),
        ChatMessage(role="assistant", content=translation_example_assistant),
        ChatMessage(role="user", content=translation_query),
    ]
)

print(response)

assistant: Mom went in too.


# Groundedness Check

Large Language Models (LLMs) are capable of generating elaborate, information-rich texts, but they are prone to hallucinations -- they can produce factually incorrect (i.e., ungrounded) responses. A popular approach to overcoming this limitation of LLMs is to provide chunks of text, often called "contexts," which LLMs can use as a point of reference to generate factually correct outputs. This approach is known as Retrieval-Augmented Generation, or RAG.

However, RAG does not always guarantee truthful answers from LLMs. Therefore, an additional step is required to check whether a model-generated output is indeed grounded in a given context. The Groundedness Check API is specifically designed for this purpose: to check the groundedness of an assistant's response to a context provided by a user. Given two messages – a user-provided context and a model response – the API will return whether the response is grounded, not grounded, or if it is unsure about the groundedness of the response to the context.

In [21]:
# @title GroundednessCheck with http request
from openai import OpenAI

user_message = "Mauna Kea is an inactive volcano on the island of Hawaiʻi. Its peak is 4,207.3 m above sea level, making it the highest point in Hawaii and second-highest peak of an island on Earth."  # @param {type:"string"}
assistant_message = "Mauna Kea is 5,207.3 meters tall."  # @param {type:"string"}

client = OpenAI(api_key=UPSTAGE_API_KEY, base_url="https://api.upstage.ai/v1/solar")

response = client.chat.completions.create(
    model="solar-1-mini-groundedness-check",
    messages=[
        {"role": "user", "content": user_message},
        {"role": "assistant", "content": assistant_message},
    ],
)

pprint(json.loads(response.json()))

{'choices': [{'finish_reason': 'stop',
              'index': 0,
              'logprobs': None,
              'message': {'content': 'notGrounded',
                          'function_call': None,
                          'role': 'assistant',
                          'tool_calls': None}}],
 'created': 1721885127,
 'id': '4a4a4431-0e03-43d6-a6ad-a598d15eb52a',
 'model': 'solar-1-mini-groundedness-check-240502',
 'object': 'chat.completion',
 'system_fingerprint': '',
 'usage': {'completion_tokens': 5, 'prompt_tokens': 198, 'total_tokens': 203}}


In [22]:
# @title GroundednessCheck with langchain
import os
from langchain_upstage import UpstageGroundednessCheck

user_message = "Mauna Kea is an inactive volcano on the island of Hawaiʻi. Its peak is 4,207.3 m above sea level, making it the highest point in Hawaii and second-highest peak of an island on Earth."  # @param {type:"string"}
assistant_message = "Mauna Kea is 5,207.3 meters tall."  # @param {type:"string"}

groundedness_check = UpstageGroundednessCheck()

request_input = {
    "context": user_message,
    "answer": assistant_message,
}
response = groundedness_check.invoke(request_input)
print(response)

notGrounded


In [23]:
# @title GroundednessCheck with llama-index
from llama_index.llms.upstage import Upstage
from llama_index.core.llms import ChatMessage

user_message = "Mauna Kea is an inactive volcano on the island of Hawaiʻi. Its peak is 4,207.3 m above sea level, making it the highest point in Hawaii and second-highest peak of an island on Earth."  # @param {type:"string"}
assistant_message = "Mauna Kea is 5,207.3 meters tall."  # @param {type:"string"}


llm = Upstage(model="solar-1-mini-groundedness-check")
response = llm.chat(
    messages=[
        ChatMessage(role="user", content=user_message),
        ChatMessage(role="assistant", content=assistant_message),
    ]
)

print(response)

assistant: notGrounded


# Function Calling

A function calling occurs when you interact with the Chat API to communicate with a Language Learning Model (LLM). Within the tool array, you have the flexibility to define custom functions. This capability enables the model to dynamically generate and provide function signatures in JSON format, facilitating seamless integration with external tools and applications.

In [24]:
# @title Function Calling with http request
from openai import OpenAI
import json

user_message = "How is the weather in Seoul today?"  # @param {type:"string"}

client = OpenAI(api_key=UPSTAGE_API_KEY, base_url="https://api.upstage.ai/v1/solar")


# Example dummy function hard coded to return the same weather
# In production, this could be your backend API or an external API
def get_current_weather(location, unit="fahrenheit"):
    """Get the current weather in a given location"""
    if "seoul" in location.lower():
        return json.dumps({"location": "Seoul", "temperature": "10", "unit": unit})
    elif "san francisco" in location.lower():
        return json.dumps(
            {"location": "San Francisco", "temperature": "72", "unit": unit}
        )
    elif "paris" in location.lower():
        return json.dumps({"location": "Paris", "temperature": "22", "unit": unit})
    else:
        return json.dumps({"location": location, "temperature": "unknown"})


def run_conversation():
    # Step 1: send the conversation and available functions to the model
    messages = [
        {
            "role": "user",
            "content": user_message,
        }
    ]
    tools = [
        {
            "type": "function",
            "function": {
                "name": "get_current_weather",
                "description": "Get the current weather in a given location",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "location": {
                            "type": "string",
                            "description": "The city and state, e.g. San Francisco, CA",
                        },
                        "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
                    },
                    "required": ["location"],
                },
            },
        }
    ]
    response = client.chat.completions.create(
        model="solar-1-mini-chat",
        messages=messages,
        tools=tools,
        tool_choice="auto",  # auto is default, but we'll be explicit
    )
    response_message = response.choices[0].message
    tool_calls = response_message.tool_calls

    # Step 2: check if the model wanted to call a function
    if tool_calls:
        # Step 3: call the function
        # Note: the JSON response may not always be valid; be sure to handle errors
        available_functions = {
            "get_current_weather": get_current_weather,
        }  # only one function in this example, but you can have multiple
        messages.append(response_message)  # extend conversation with assistant's reply
        # Step 4: send the info for each function call and function response to the model
        for tool_call in tool_calls:
            function_name = tool_call.function.name
            function_to_call = available_functions[function_name]
            function_args = json.loads(tool_call.function.arguments)
            function_response = function_to_call(
                location=function_args.get("location"),
                unit=function_args.get("unit"),
            )
            messages.append(
                {
                    "tool_call_id": tool_call.id,
                    "role": "tool",
                    "name": function_name,
                    "content": function_response,
                }
            )  # extend conversation with function response
        second_response = client.chat.completions.create(
            model="solar-1-mini-chat",
            messages=messages,
        )  # get a new response from the model where it can see the function response
        return second_response


response = run_conversation()
pprint(json.loads(response.json()))

{'choices': [{'finish_reason': 'stop',
              'index': 0,
              'logprobs': None,
              'message': {'content': 'The current weather in Seoul is 10 '
                                     'degrees Celsius.',
                          'function_call': None,
                          'role': 'assistant',
                          'tool_calls': None}}],
 'created': 1721885133,
 'id': 'b805e7d3-3fac-4741-8246-f85ac6132af2',
 'model': 'solar-1-mini-chat-240612',
 'object': 'chat.completion',
 'system_fingerprint': None,
 'usage': {'completion_tokens': 16, 'prompt_tokens': 640, 'total_tokens': 656}}


In [25]:
# @title Fwith langchain
from langchain_upstage import ChatUpstage
from langchain.tools import tool
import json

user_message = "How is the weather in Seoul today?"  # @param {type:"string"}


# Example dummy function hard coded to return the same weather
# In production, this could be your backend API or an external API
@tool
def get_current_weather(location, unit="fahrenheit"):
    """
    return location's weather information
    """
    weather_data = {
        "San Francisco": {"celsius": "15°C", "fahrenheit": "59°F"},
        "Seoul": {"celsius": "16°C", "fahrenheit": "61°F"},
        "Paris": {"celsius": "11°C", "fahrenheit": "52°F"},
    }
    return f"The weather in {location} is {weather_data[location][unit]}."


available_functions = {"get_current_weather": get_current_weather}

llm = ChatUpstage()

tools = [get_current_weather]
llm_with_tools = llm.bind_tools(tools)


# Step 1: send the conversation and available functions to the model
messages = [{"role": "user", "content": user_message}]
response = llm_with_tools.invoke(messages)

# Step 2: check if the model wanted to call a function
if response.tool_calls:
    tool_call = response.tool_calls[0]

    # Step 3: call the function
    function_name = tool_call["name"]
    function_to_call = available_functions[function_name]
    function_args = tool_call["args"]
    # Step 4: send the info for each function call and function response to the model
    function_response = function_to_call.invoke(function_args)

    print(function_response)

The weather in Seoul is 61°F.


In [49]:
import llama_index

print(llama_index.__spec__)
dir(llama_index)

ModuleSpec(name='llama_index', loader=<_frozen_importlib_external._NamespaceLoader object at 0x10e8dbd60>, submodule_search_locations=_NamespacePath(['/Users/juhyung/upstage/projects/upstage-cookbook/.venv/lib/python3.10/site-packages/llama_index']))


['__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 'core',
 'embeddings',
 'llms',
 'readers']

In [50]:
# @title Function Calling with llama-index
import json
from typing import Sequence, List

from llama_index.llms.upstage import Upstage
from llama_index.core.llms import ChatMessage
from llama_index.core.tools import BaseTool, FunctionTool
from llama_index.core.agent import ReActAgent

user_message = "How is the weather in Seoul today?"  # @param {type:"string"}


# Example dummy function hard coded to return the same weather
# In production, this could be your backend API or an external API
def get_current_weather(location, unit="fahrenheit"):
    """
    return location's weather information
    """
    weather_data = {
        "San Francisco": {"celsius": "15°C", "fahrenheit": "59°F"},
        "Seoul": {"celsius": "16°C", "fahrenheit": "61°F"},
        "Paris": {"celsius": "11°C", "fahrenheit": "52°F"},
    }
    return f"The weather in {location} is {weather_data[location][unit]}."


tool = FunctionTool.from_defaults(fn=get_current_weather)
llm = Upstage()

agent = ReActAgent.from_tools(
    tools=[tool],
    llm=llm,
)

response = agent.chat(user_message)
print(response)

The weather in Seoul is 61°F.


# Embedding

Embed any text with Solar Embeddings API.

The embeddings API converts text into numbers that computers can understand. Imagine converting a sentence into a list of numbers, each capturing a piece of the sentence's meaning. This makes it easier for machines to do tasks like finding similar texts, sorting information, or even answering questions.

Solar Embeddings API features dual models, solar-embedding-1-large-query for user queries and solar-embedding-1-large-passage for document embedding, within a unified vector space, designed to enhance text processing tasks with a focus on performance.

For developers building search engines or retrieval systems, solar-embedding-1-large-passage is ideal for initially embedding the searchable content. Upon user query submission, leveraging solar-embedding-1-large-query facilitates efficient and accurate matching of queries with the embedded content, thereby optimizing the information retrieval process.

In [28]:
# @title Embedding with http request
import numpy as np
from openai import OpenAI

query = "What makes Solar LLM small yet effective?"  # @param type:{"string"}
passage = "SOLAR 10.7B: Scaling Large Language Models with Simple yet Effective Depth Up-Scaling. DUS is simple yet effective in scaling up high performance LLMs from small ones."  # @param type:{"string"}

client = OpenAI(api_key=UPSTAGE_API_KEY, base_url="https://api.upstage.ai/v1/solar")

query_result = (
    client.embeddings.create(model="solar-embedding-1-large-query", input=query)
    .data[0]
    .embedding
)

document_result = (
    client.embeddings.create(model="solar-embedding-1-large-passage", input=passage)
    .data[0]
    .embedding
)

similarity = np.dot(np.array(query_result), np.array(document_result))
print(f"Similarity between query and document: {similarity}")

Similarity between query and document: 0.39890187266794896


In [29]:
# @title Embedding with langchain
from langchain_upstage import UpstageEmbeddings

query = "What makes Solar LLM small yet effective?"  # @param type:{"string"}
passage1 = "SOLAR 10.7B: Scaling Large Language Models with Simple yet Effective Depth Up-Scaling."  # @param type:{"string"}
passage2 = "DUS is simple yet effective in scaling up high performance LLMs from small ones."  # @param type:{"string"}

embeddings = UpstageEmbeddings(model="solar-embedding-1-large")

doc_result = embeddings.embed_documents([passage1, passage2])

query_result = embeddings.embed_query(query)
similarity = np.dot(np.array(query_result), np.array(document_result))
print(f"Similarity between query and document: {similarity}")

Similarity between query and document: 0.39890187266794896


In [30]:
# @title Embedding with llama-index
from llama_index.embeddings.upstage import UpstageEmbedding

query = "What makes Solar LLM small yet effective?"  # @param type:{"string"}
passage1 = "SOLAR 10.7B: Scaling Large Language Models with Simple yet Effective Depth Up-Scaling."  # @param type:{"string"}
passage2 = "DUS is simple yet effective in scaling up high performance LLMs from small ones."  # @param type:{"string"}

embeddings = UpstageEmbedding(model="solar-embedding-1-large")

doc_result = embeddings.get_text_embedding_batch([passage1, passage2])

query_result = embeddings.get_query_embedding(query)
similarity = np.dot(np.array(query_result), np.array(document_result))
print(f"Similarity between query and document: {similarity}")

Similarity between query and document: 0.39890187266794896


# Layout Analysis

Detect document elements from any document including tables and figures.

*Example Image*

![invoice.png](./data/invoice.png)

In [31]:
# @title Download example image file
import os

image_url = "https://github.com/UpstageAI/cookbook/blob/main/data/invoice.png?raw=true"  # @param {type:"string"}
filename = "data/invoice.png"  # @param {type:"string"}

if not os.path.exists("data"):
    os.mkdir("data")

response = requests.get(image_url)
if response.status_code == 200:
    with open(filename, "wb") as f:
        f.write(response.content)
    print(f"Image successfully downloaded: {filename}")
else:
    print("Image couldn't be retrieved")

Image successfully downloaded: data/invoice.png


In [32]:
# @title Layout Analysis with http request
import requests

filename = "data/invoice.png"  # @param {type:"string"}

url = "https://api.upstage.ai/v1/document-ai/layout-analysis"
headers = {"Authorization": f"Bearer {UPSTAGE_API_KEY}"}
files = {"document": open(filename, "rb")}
data = {"ocr": True}
response = requests.post(url, headers=headers, files=files, data=data)
pprint(response.json())

{'api': '1.1',
 'billed_pages': 1,
 'elements': [{'bounding_box': [{'x': 93, 'y': 75},
                                {'x': 335, 'y': 75},
                                {'x': 335, 'y': 129},
                                {'x': 93, 'y': 129}],
               'category': 'heading1',
               'html': "<h1 id='0' style='font-size:22px'>INVOICE</h1>",
               'id': 0,
               'page': 1,
               'text': 'INVOICE'},
              {'bounding_box': [{'x': 806, 'y': 104},
                                {'x': 1310, 'y': 104},
                                {'x': 1310, 'y': 139},
                                {'x': 806, 'y': 139}],
               'category': 'heading1',
               'html': "<br><h1 id='1' style='font-size:16px'>Invoice ID # "
                       'INV-AJ355548</h1>',
               'id': 1,
               'page': 1,
               'text': 'Invoice ID # INV-AJ355548'},
              {'bounding_box': [{'x': 807, 'y': 170},
                   

In [33]:
# @title Layout Analysis with langchain
import os
from langchain_upstage import UpstageLayoutAnalysisLoader
import json

file_path = "data/invoice.png"  # @param {type:"string"}
loader = UpstageLayoutAnalysisLoader(file_path, split="page", use_ocr=True)

pages = loader.load()  # or loader.lazy_load()
for page in pages:
    pprint(json.loads(page.json()))

{'metadata': {'page': 1},
 'page_content': "<h1 id='0' style='font-size:22px'>INVOICE</h1> <br><h1 "
                 "id='1' style='font-size:16px'>Invoice ID # INV-AJ355548</h1> "
                 "<h1 id='2' style='font-size:18px'>Invoice Date 9/7/1992</h1> "
                 "<h1 id='3' style='font-size:22px'>Service Details Form</h1> "
                 "<h1 id='4' style='font-size:20px'>Company</h1> <br><h1 "
                 "id='5' style='font-size:20px'>Upstage</h1> <p id='6' "
                 "data-category='paragraph' "
                 "style='font-size:16px'>Name<br>Lucy Park</p> <br><p id='7' "
                 "data-category='paragraph' "
                 "style='font-size:16px'>Name<br>Sung Kim</p> <p id='8' "
                 "data-category='paragraph' style='font-size:18px'>-00 "
                 "'ess<br>Gwanggyojungang-ro 338, "
                 'Gyeonggi-do,<br>Sanghyeon-dong, Suji-gu<br>Yongin-si, South '
                 "Korea</p> <br><h1 id='9' style='font-size

In [43]:
# @title Layout Analysis with langchain
import os
from langchain_upstage import UpstageLayoutAnalysisParser
import json
from langchain_core.document_loaders import BaseLoader, Blob

file_path = "data/invoice.png"  # @param {type:"string"}
blob = Blob.from_path(file_path)
loader = UpstageLayoutAnalysisParser(split="page", use_ocr=True)

pages = loader.lazy_parse(blob)  # or loader.lazy_load()
for page in pages:
    pprint(json.loads(page.json()))

{'metadata': {'page': 1},
 'page_content': "<h1 id='0' style='font-size:22px'>INVOICE</h1> <br><h1 "
                 "id='1' style='font-size:16px'>Invoice ID # INV-AJ355548</h1> "
                 "<h1 id='2' style='font-size:18px'>Invoice Date 9/7/1992</h1> "
                 "<h1 id='3' style='font-size:22px'>Service Details Form</h1> "
                 "<h1 id='4' style='font-size:20px'>Company</h1> <br><h1 "
                 "id='5' style='font-size:20px'>Upstage</h1> <p id='6' "
                 "data-category='paragraph' "
                 "style='font-size:16px'>Name<br>Lucy Park</p> <br><p id='7' "
                 "data-category='paragraph' "
                 "style='font-size:16px'>Name<br>Sung Kim</p> <p id='8' "
                 "data-category='paragraph' style='font-size:18px'>-00 "
                 "'ess<br>Gwanggyojungang-ro 338, "
                 'Gyeonggi-do,<br>Sanghyeon-dong, Suji-gu<br>Yongin-si, South '
                 "Korea</p> <br><h1 id='9' style='font-size

# Key Information Extraction

Extract key information from target documents.

Will use same invoice.png example image.

In [39]:
# @title Key Information Extraction with http request
import requests

filename = "data/invoice.png"  # @param {type:"string"}
model = "receipt-extraction"  # @param {type:"string"}

url = f"https://api.upstage.ai/v1/document-ai/extraction"
headers = {"Authorization": f"Bearer {UPSTAGE_API_KEY}"}
files = {"document": open(filename, "rb")}
data = {"model": model}
response = requests.post(url, headers=headers, files=files, data=data)
pprint(response.json())

{'apiVersion': '1.1',
 'confidence': 0.5562,
 'documentType': 'receipt',
 'fields': [{'confidence': 0.7508,
             'id': 0,
             'key': 'store.store_name',
             'refinedValue': 'Company',
             'type': 'header',
             'value': 'Company'},
            {'confidence': 0.1591,
             'id': 1,
             'key': 'store.store_registration_number',
             'refinedValue': 'Up',
             'type': 'header',
             'value': 'Up'},
            {'confidence': 0.04,
             'id': 2,
             'key': 'store.store_name',
             'refinedValue': 'Su Kim',
             'type': 'content',
             'value': 'Su Kim'},
            {'confidence': 0.9652,
             'id': 3,
             'key': 'store.store_address',
             'refinedValue': 'Lucy Park',
             'type': 'content',
             'value': 'Lucy Park'},
            {'confidence': 0.9685,
             'id': 4,
             'key': 'store.store_phone_number',
    

# Document OCR

Extract all text from any document.

In [40]:
# @title Document OCR with http request
import requests

filename = "data/invoice.png"  # @param {type:"string"}
url = "https://api.upstage.ai/v1/document-ai/ocr"
headers = {"Authorization": f"Bearer {UPSTAGE_API_KEY}"}
files = {"document": open(filename, "rb")}

response = requests.post(url, headers=headers, files=files)
pprint(response.json())

{'apiVersion': '1.1',
 'confidence': 0.9939,
 'metadata': {'pages': [{'height': 1370, 'page': 1, 'width': 1406}]},
 'mimeType': 'multipart/form-data',
 'modelVersion': 'ocr-2.2.1',
 'numBilledPages': 1,
 'pages': [{'confidence': 0.9939,
            'height': 1370,
            'id': 0,
            'text': 'INVOICE # INV-AJ355548 \n'
                    'Invoice ID \n'
                    'Invoice Date 9/7/1992 \n'
                    'Service Details Form \n'
                    'Company Name \n'
                    'Upstage Sung Kim \n'
                    "Name -00 'ess \n"
                    'Lucy Park Gwanggyojungang-ro 338, Gyeonggi-do, \n'
                    'Sanghyeon-dong, Suji-gu \n'
                    'Yongin-si, South Korea \n'
                    'Address \n'
                    '7 Pepper Wood Street, 130 Stone Corner \n'
                    'Terrace \n'
                    'Wilkes Barre, Pennsylvania, 18768 \n'
                    'United States \n'
                    '