In [1]:
! pip3 install -qU langchain-upstage

In [2]:

%load_ext dotenv
%dotenv
# set UPSTAGE_API_KEY

In [3]:
import warnings

warnings.filterwarnings("ignore")

In [4]:
from langchain_core.prompts import PromptTemplate
from langchain_upstage import ChatUpstage
from langchain_core.output_parsers import StrOutputParser


llm = ChatUpstage()

prompt_template = PromptTemplate.from_template(
    """
    Please provide one question from the following text 
    regarding "{keyword}":
    
    ---
    {text}
    """
)
chain = prompt_template | llm | StrOutputParser()
keyword = "DUS"
text = """
SOLAR 10.7B: Scaling Large Language Models with Simple yet Effective Depth Up-Scaling

We introduce SOLAR 10.7B, a large language model (LLM) with 10.7 billion parameters, 
demonstrating superior performance in various natural language processing (NLP) tasks. 
Inspired by recent efforts to efficiently up-scale LLMs, 
we present a method for scaling LLMs called depth up-scaling (DUS), 
which encompasses depthwise scaling and continued pretraining.
In contrast to other LLM up-scaling methods that use mixture-of-experts, 
DUS does not require complex changes to train and inference efficiently. 
We show experimentally that DUS is simple yet effective 
in scaling up high-performance LLMs from small ones. 
Building on the DUS model, we additionally present SOLAR 10.7B-Instruct, 
a variant fine-tuned for instruction-following capabilities, 
surpassing Mixtral-8x7B-Instruct. 
SOLAR 10.7B is publicly available under the Apache 2.0 license, 
promoting broad access and application in the LLM field.
"""

chain.invoke({"keyword": keyword, "text": text})

'What is the method for scaling large language models (LLMs) called, which encompasses depthwise scaling and continued pretraining, and is introduced as an alternative to methods using mixture-of-experts?'

In [6]:
from langchain_core.prompts import PromptTemplate

prompt_template = PromptTemplate.from_template(
    """
    Please provide most correct answer from the following context. 
    If the answer is not present in the context, please write "The information is not present in the context."
    ---
    Question: {question}
    ---
    Context: {Context}
    """
)
chain = prompt_template | llm | StrOutputParser()
context = """
We introduce SOLAR 10.7B, a large language model (LLM) with 10.7 billion parameters, 
    demonstrating superior performance in various natural language processing (NLP) tasks. 
    Inspired by recent efforts to efficiently up-scale LLMs, 
    we present a method for scaling LLMs called depth up-scaling (DUS), 
    which encompasses depthwise scaling and continued pretraining.
    In contrast to other LLM up-scaling methods that use mixture-of-experts, 
    DUS does not require complex changes to train and inference efficiently. 
    We show experimentally that DUS is simple yet effective 
    in scaling up high-performance LLMs from small ones. 
    Building on the DUS model, we additionally present SOLAR 10.7B-Instruct, 
    a variant fine-tuned for instruction-following capabilities, 
    surpassing Mixtral-8x7B-Instruct. 
    SOLAR 10.7B is publicly available under the Apache 2.0 license, 
    promoting broad access and application in the LLM field.
"""

'SOLAR 10.7B is available under the Apache 2.0 license.'

In [7]:
chain.invoke({"question": "What is the license under which SOLAR 10.7B is available?", "Context": context})

'SOLAR 10.7B is available under the Apache 2.0 license.'

In [11]:
chain.invoke({"question": "Who is the created SOLAR 10.7B?", "Context": context})

'The information is not present in the context.'

In [10]:
chain.invoke({"question": "Did Google provide resources for the SOLAR 10.7B project?", "Context": context})

'The information is not present in the context.'

In [12]:
chain.invoke({"question": "Why DUS is good", "Context": context})

'DUS is good because it is a simple yet effective method for scaling up high-performance LLMs from small ones. It encompasses depthwise scaling and continued pretraining, and does not require complex changes to train and inference efficiently.'

In [14]:
answer = chain.invoke({"question": "What is the name of the variant fine-tuned for instruction-following capabilities?", "Context": context})
print(answer)

The name of the variant fine-tuned for instruction-following capabilities is SOLAR 10.7B-Instruct.


## Groundedness Check with LangChain and Upstage
![Groundedness](./figures/gc.png)


In [16]:
# GC
from langchain_upstage import UpstageGroundednessCheck

groundedness_check = UpstageGroundednessCheck()

answer = chain.invoke(
    {
        "question": "What is the name of the variant fine-tuned for instruction-following capabilities?",
        "Context": context,
    }
)
print(answer)
gc_result = groundedness_check.invoke({"context": context, "answer": answer})

print(gc_result)
if gc_result.lower().startswith("grounded"):
    print("✅ Groundedness check passed")
else:
    print("❌ Groundedness check failed")

The name of the variant fine-tuned for instruction-following capabilities is SOLAR 10.7B-Instruct.
grounded
✅ Groundedness check passed


In [18]:
# GC
from langchain_upstage import UpstageGroundednessCheck

groundedness_check = UpstageGroundednessCheck()

answer = "Solar 10.7B is available to the public with a non-commercial license."
gc_result = groundedness_check.invoke({"context": context, "answer": answer})

print(gc_result)
if gc_result.lower().startswith("grounded"):
    print("✅ Groundedness check passed")
else:
    print("❌ Groundedness check failed")

notGrounded
❌ Groundedness check failed


# Excercise 

Write code to check GC and make LLM more secure. For example, check GC several times until LLM gives you a reliable answer.