## Quick Start Langchain (Ver 0.1.14)

In [10]:
import os
from dotenv import load_dotenv
load_dotenv()

True

In [11]:
from langchain_openai import ChatOpenAI

# initialize the model
llm = ChatOpenAI(
    model="gpt-3.5-turbo-1106",
    temperature=0.2,
    api_key=os.getenv('OPENAI_API_KEY')
    )

In [12]:
# It was supposed to be not good enough response, but this is something that wasn't present in the training data so it has a good response.
llm.invoke("how can langsmith help with testing?")

AIMessage(content='Langsmith can help with testing by providing language testing services to assess the language proficiency of individuals or groups. This can include standardized language proficiency tests, custom language assessments, and language placement tests. Langsmith can also provide scoring and reporting services to help organizations and institutions make informed decisions based on the language test results. Additionally, Langsmith can offer test preparation materials and resources to help individuals and groups prepare for language proficiency exams.', response_metadata={'token_usage': {'completion_tokens': 84, 'prompt_tokens': 15, 'total_tokens': 99}, 'model_name': 'gpt-3.5-turbo-1106', 'system_fingerprint': 'fp_77a673219d', 'finish_reason': 'stop', 'logprobs': None}, id='run-7820d7d3-f01d-4703-ab35-fc24bdc5cdf2-0')

In [13]:
# We’d like to add something with using prompts
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
prompt = ChatPromptTemplate.from_messages(
    [
        (
        "system",
        "You are world class technical documentation writer, so please tell me in one clear, concise sentence."
        ),
        MessagesPlaceholder(variable_name="messages"),
])

In [14]:
from langchain_core.messages import HumanMessage
# We can now combine these into a simple LLM chain:
chain = prompt | llm
chain.invoke([HumanMessage(content="how can langsmith help with testing?")])

AIMessage(content='Langsmith can help with testing by providing automated testing tools and frameworks to streamline the testing process and improve the efficiency and accuracy of testing activities.', response_metadata={'token_usage': {'completion_tokens': 28, 'prompt_tokens': 38, 'total_tokens': 66}, 'model_name': 'gpt-3.5-turbo-1106', 'system_fingerprint': 'fp_77a673219d', 'finish_reason': 'stop', 'logprobs': None}, id='run-ae38e3bc-3364-4093-8b93-b4ff4b692cb5-0')

In [15]:
# output only text
from langchain_core.output_parsers import StrOutputParser
output_parser = StrOutputParser()

chain_strOutput = prompt | llm | output_parser
chain_strOutput.invoke([HumanMessage(content="How can langsmith help with testing?")])

'Langsmith can help with testing by providing automated language translation and localization testing services to ensure the accuracy and quality of translated content.'

### Use SAP HANA Cloud Vector Engine

https://how.wtf/how-to-use-json-files-in-vector-stores-with-langchain.html

In [16]:
# Pip install necessary package
!pip install --upgrade --quiet  hdbcli

In [17]:
# Create a database connection to a HANA Cloud instance.
from hdbcli import dbapi
load_dotenv()

# Use connection settings from the environment
connection = dbapi.connect(
    address=os.getenv("HANA_DB_ADDRESS"),
    port=os.getenv("HANA_DB_PORT"),
    user=os.getenv("HANA_DB_USER"),
    password=os.getenv("HANA_DB_PASSWORD"),
    autocommit=True,
    sslValidateCertificate=False,
    # currentSchema="HOTEL",
)

In [32]:
from langchain_community.document_loaders import JSONLoader
loader = JSONLoader(file_path="./Data/書誌情報ダウンロード(JSON形式)_20240418134938.json", jq_schema=".[] | .title[].value", text_content=False)
documents = loader.load()

In [40]:
from langchain_text_splitters import CharacterTextSplitter
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
text_chunks = text_splitter.split_documents(documents)

In [41]:
import getpass
from langchain_cohere import CohereEmbeddings
os.environ["COHERE_API_KEY"] = getpass.getpass()

In [45]:
%%time
from langchain_openai import OpenAIEmbeddings

from langchain_community.vectorstores.hanavector import HanaDB

# embedding = OpenAIEmbeddings(model="text-embedding-3-large")
# embedding = ErnieEmbeddings()
embedding = CohereEmbeddings(model="embed-multilingual-light-v3.0")

# Access the vector DB with a new table
db = HanaDB(
    connection=connection,
    embedding=embedding,
    table_name="ITO_DEMO_RETRIEVAL_CHAIN",
)

# Delete already existing entries from the table
db.delete(filter={})

# add the loaded document chunks from the file
db.add_documents(text_chunks)

# # Create a retriever instance of the vector store
# retriever = db.as_retriever()

CPU times: user 3.15 s, sys: 518 ms, total: 3.67 s
Wall time: 36.7 s


[]

In [None]:
from langchain.chains.combine_documents import create_stuff_documents_chain

question_answering_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "Based on the following JSON file, answer up to 10 titles similar to the book title provided by the user in a list format in order of similarity:: \nn{context}, Question the title of the book.",
        ),
        MessagesPlaceholder(variable_name="messages"),
    ]
)

document_chain = create_stuff_documents_chain(llm, question_answering_prompt)

In [None]:
# from langchain.memory import ChatMessageHistory
# question = "日本語ワープロ検定試験模擬問題集"
# chat_history = ChatMessageHistory()
# chat_history.add_user_message(question)

# document_chain.invoke(
#     {
#         "messages": chat_history.messages,
#         "context": retriever.invoke(question),
#     }
#     )

In [43]:
%%time
question = "日本語ワープロ検定試験模擬問題集"
docs = db.similarity_search(question, k=100)
for doc in docs:
    print(doc.page_content,",")

全国通訳案内士試験合格!対策地理 ,
情報セキュリティマネジメント予想問題集 ,
1週間で日商簿記3級に合格できるテキスト&問題集 : 書いて覚えて合格できる ,
地方自治法よく出る問題123問 : 頻出テーマを徹底分析/実戦力・問題対応力養成 ,
日本一わかりやすい地方創生の教科書 : 全く新しい45の新手法&新常識 : テレワーク 移住促進 インバウンド ,
地方公務員法よく出る問題108問 : 頻出テーマを徹底分析/実戦力・問題対応力養成 ,
スッキリわかるサーブレット&JSP入門 ,
THE突破ファイルマンガ推理クイズブック ,
もぐら〈仮〉 ,
中学受験をするきみへ : 決定版 : 勉強とメンタルの悩みを解決! ,
「生きる力を引き出す」住まい : 障がいをもつ方のくらしと家づくり : パーソンデザインで考えるくらしを変える福祉住環境 ,
航西日記 : パリ万国博見聞録 : 現代語訳 ,
できる江戸時代の世界を作ろう!マインクラフト和風建築わくわくスゴ技ブック ,
ムツゴロウ麻雀物語 ,
幼稚園WARS ,
仕事と人間 : 70万年のグローバル労働史 ,
打田十紀夫フィンガースタイル・ギター・メソッド : 基本演習と実践曲 ,
トキワ荘の遺伝子 : 北見けんいちが語る巨匠たちの横顔 ,
音旅のキセキ : 日本で新たな人生を見つけた音楽家の、誰も知らなかった素顔 : マーティ・フリードマン自叙伝 ,
真実の口 ,
AIが答えを出せない問いの設定力 ,
新しい英検の教科書 ,
放浪・雪の夜 : 織田作之助傑作集 ,
クイズあなたは小学5年生より賢いの? : 大人もパニックの難問に挑戦! ,
今日から始めて来月収穫!マンガでわかる!ズボラ野菜づくり ,
植物の謎 : 60のQ&Aから見える、強くて緻密な生きざま ,
肉ビジネス : 食べるのが好きな人から専門家まで楽しく読める肉の教養 ,
中学受験奇跡を引き出す合格法則 : 予約殺到の東大卒スーパー家庭教師が教える ,
エンターテインメント・ビジネス : 産業構造と契約実務 ,
オンラインセラピーの理論と実践 : インターネットを通じた個人・集団・家族・組織への介入 ,
ヤバすぎる!偉人の勉強やり方図鑑 ,
ゴルゴ13スピンオフシリーズ : さいとう・プロ作品 ,
調教師になったトップ・ジョッキー : 2500勝

### Message history

In [None]:
!pip install openai==0.28

In [None]:
from langchain_core.runnables.history import RunnableWithMessageHistory

chain_with_message_history = RunnableWithMessageHistory(
    document_chain,
    lambda session_id: chat_history,
    input_messages_key="messages",
    history_messages_key="chat_history",
)

In [None]:
question = "Ito is the one who took action on these minutes. What version of LangChain did Mr. Ito use before?"
chat_history.add_user_message(question)

chain_with_message_history.invoke(
    {
        "messages": chat_history.messages,
        "context": retriever.invoke(question)
    },
    {'configurable': {'session_id': 'unused'}}
)

In [None]:
question = "What did I just ask you?"
chat_history.add_user_message(question)

chain_with_message_history.invoke(
    {
        "messages": chat_history.messages,
        "context": retriever.invoke(question)
    },
    {'configurable': {'session_id': 'unused'}}
)