In [17]:
# !pip install psycopg2-binary



In [2]:
import psycopg2
import configparser

# 读取配置文件
config = configparser.ConfigParser()
config.read('db_config.ini')

# 获取数据库连接信息
db_params = config['postgresql']

# 连接到 PostgreSQL 数据库
conn = psycopg2.connect(
    dbname='postgres',  # 默认连接到 postgres 数据库
    user=db_params['user'],
    password=db_params['password'],
    host=db_params['host'],
    port=db_params['port']
)

# 创建一个游标对象
cur = conn.cursor()

# 列出所有数据库
cur.execute("SELECT datname FROM pg_database;")
databases = cur.fetchall()
print("Databases in the PostgreSQL server:")
for db in databases:
    print(db[0])

# 关闭游标和连接
cur.close()
conn.close()


Databases in the PostgreSQL server:
template0
template1
postgres
CourseMaterials
rdsadmin


In [4]:
# ! pip install pandas tiktoken langchain chromadb langchain-community openai

Collecting tiktoken
  Downloading tiktoken-0.7.0-cp38-cp38-win_amd64.whl.metadata (6.8 kB)
Collecting chromadb
  Downloading chromadb-0.5.0-py3-none-any.whl.metadata (7.3 kB)
Collecting build>=1.0.3 (from chromadb)
  Downloading build-1.2.1-py3-none-any.whl.metadata (4.3 kB)
Collecting chroma-hnswlib==0.7.3 (from chromadb)
  Downloading chroma_hnswlib-0.7.3-cp38-cp38-win_amd64.whl.metadata (262 bytes)
Collecting posthog>=2.4.0 (from chromadb)
  Downloading posthog-3.5.0-py2.py3-none-any.whl.metadata (2.0 kB)
Collecting onnxruntime>=1.14.1 (from chromadb)
  Downloading onnxruntime-1.18.0-cp38-cp38-win_amd64.whl.metadata (4.4 kB)
Collecting opentelemetry-api>=1.2.0 (from chromadb)
  Downloading opentelemetry_api-1.24.0-py3-none-any.whl.metadata (1.3 kB)
Collecting opentelemetry-exporter-otlp-proto-grpc>=1.2.0 (from chromadb)
  Downloading opentelemetry_exporter_otlp_proto_grpc-1.24.0-py3-none-any.whl.metadata (2.2 kB)
Collecting opentelemetry-instrumentation-fastapi>=0.41b0 (from chromad

In [11]:
import os
import pandas as pd
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
import openai
import configparser

# 加载配置文件
config = configparser.ConfigParser()
config.read('db_config.ini')

# 设置OpenAI API Key
openai.api_key = config['openai']['api_key']

# 读取文本文件
with open("data/course_medium.txt", "r", encoding="utf-8") as file:
    text = file.read()

# 使用LangChain的文本分割器进行分段
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=512, chunk_overlap=100, length_function=len
)

sentences = text.split('。')
sentences = [sentence.strip() for sentence in sentences if sentence.strip()]

# 对每个段落进行进一步分割
split_texts = []
for sentence in sentences:
    split_texts.extend(text_splitter.split_text(sentence))

# 将分段文本存储在DataFrame中
df = pd.DataFrame({'content': split_texts})

# 初始化嵌入模型，并显式传递API密钥
embeddings = OpenAIEmbeddings(openai_api_key=openai.api_key)

# 将每个分段文本转换为向量
embeddings_list = embeddings.embed_documents(df['content'].tolist())
df['embedding'] = embeddings_list

# 创建Chroma数据库并存储向量
persist_directory = "./chroma_db"
db = Chroma(
    embedding_function=embeddings,
    persist_directory=persist_directory
)

# 添加文档到Chroma数据库
texts = df['content'].tolist()
metadatas = [{'content': text} for text in texts]
db.add_texts(texts=texts, metadatas=metadatas)

# 持久化数据库
db.persist()

# 显示前几行结果
print(df.head())


                                             content  \
0  我們要開始這個禮拜的課程這個禮拜會到佼佼作業二然後宣布作業三我們今天的內容會是網路爬蟲也會是...   
1  可以自己評估自己的實力來去把你的程式碼寫到多模組化能夠去分享給別人那上學期我是要求說他們一定...   
2  Soup等如何使用到變成最後變可以模組化成一個分享的class的樣子要怎麼寫那我們先來看一下...   
3  Super這個套件其實爬蟲的工具有超級多不是只有這一套那我教這一套算是簡單好上手那它也是有一...   
4  Source給你的Data只是它公開在網站上秀出來它期望的是人真的去跟它互動而不是機器跟它互...   

                                           embedding  
0  [0.006447721749169884, -0.004048418726103644, ...  
1  [-0.010561584497385644, 0.01106263417999642, 0...  
2  [-0.01855356271909417, -0.008329481096833914, ...  
3  [-0.0036436148085231433, -0.009813557960242894...  
4  [-0.002057345691651404, -0.0019108522518765266...  


In [12]:
# !pip install sqlalchemy




In [1]:
import psycopg2
import configparser
from sqlalchemy import create_engine, text

# 加載配置文件
config = configparser.ConfigParser()
config.read('db_config.ini')

# 讀取PostgreSQL配置
postgresql_config = {
    'host': config['postgresql']['host'],
    'user': config['postgresql']['user'],
    'password': config['postgresql']['password']
}

# 連接到PostgreSQL（不指定資料庫）
conn = psycopg2.connect(
    host=postgresql_config['host'],
    user=postgresql_config['user'],
    password=postgresql_config['password'],
    database='postgres'  # 連接到預設資料庫
)

# 啟用autocommit模式
conn.autocommit = True

# 創建游標
cur = conn.cursor()

# 檢查資料庫是否存在
cur.execute("SELECT 1 FROM pg_database WHERE datname = 'rag_db';")
exists = cur.fetchone()
if not exists:
    # 創建資料庫
    cur.execute("CREATE DATABASE rag_db;")
    print("資料庫 rag_db 已創建")
else:
    print("資料庫 rag_db 已存在")

# 關閉游標和連接
cur.close()
conn.close()

# 使用SQLAlchemy連接到新創建的資料庫並安裝pgvector擴展

# 創建SQLAlchemy引擎
connection_string = f"postgresql+psycopg2://{postgresql_config['user']}:{postgresql_config['password']}@" \
                    f"{postgresql_config['host']}/rag_db"
engine = create_engine(connection_string)

# 創建連接並運行SQL命令來安裝pgvector擴展
with engine.connect() as connection:
    connection.execute(text("CREATE EXTENSION IF NOT EXISTS vector;"))

print("pgvector 擴展已安裝")


資料庫 rag_db 已存在
pgvector 擴展已安裝


In [3]:
import psycopg2
import configparser
from sqlalchemy import create_engine, text as sql_text
import pandas as pd
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
import openai

# 加载配置文件
config = configparser.ConfigParser()
config.read('db_config.ini')

# 读取PostgreSQL配置
postgresql_config = {
    'host': config['postgresql']['host'],
    'user': config['postgresql']['user'],
    'password': config['postgresql']['password']
}

# 连接到PostgreSQL（不指定数据库）
conn = psycopg2.connect(
    host=postgresql_config['host'],
    user=postgresql_config['user'],
    password=postgresql_config['password'],
    database='postgres'  # 连接到默认数据库
)

# 启用autocommit模式
conn.autocommit = True

# 创建游标
cur = conn.cursor()

# 检查数据库是否存在
cur.execute("SELECT 1 FROM pg_database WHERE datname = 'rag_db';")
exists = cur.fetchone()
if not exists:
    # 创建数据库
    cur.execute("CREATE DATABASE rag_db;")
    print("数据库 rag_db 已创建")
else:
    print("数据库 rag_db 已存在")

# 关闭游标和连接
cur.close()
conn.close()

# 使用SQLAlchemy连接到新创建的数据库并安装pgvector扩展

# 创建SQLAlchemy引擎
connection_string = f"postgresql+psycopg2://{postgresql_config['user']}:{postgresql_config['password']}@" \
                    f"{postgresql_config['host']}/rag_db"
engine = create_engine(connection_string)

# 创建连接并运行SQL命令来安装pgvector扩展
with engine.connect() as connection:
    connection.execute(sql_text("CREATE EXTENSION IF NOT EXISTS vector;"))
    result = connection.execute(sql_text("SELECT extname FROM pg_extension WHERE extname = 'vector';")).fetchone()
    if result:
        print("pgvector 扩展已成功启用")
        vector_check = connection.execute(sql_text("SELECT typname FROM pg_type WHERE typname = 'vector';")).fetchone()
        if vector_check:
            print("vector 类型已成功启用")
        else:
            raise Exception("vector 类型未成功启用")
    else:
        raise Exception("pgvector 扩展未成功启用")

# 读取文本文件
with open("data/course_medium.txt", "r", encoding="utf-8") as file:
    text_content = file.read()

# 使用LangChain的文本分割器进行分段
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=512, chunk_overlap=100, length_function=len
)

sentences = text_content.split('。')
sentences = [sentence.strip() for sentence in sentences if sentence.strip()]

# 对每个段落进行进一步分割
split_texts = []
for sentence in sentences:
    split_texts.extend(text_splitter.split_text(sentence))

# 将分段文本存储在DataFrame中
df = pd.DataFrame({'content': split_texts})

# 设置OpenAI API Key
openai.api_key = config['openai']['api_key']

# 初始化嵌入模型，并显式传递API密钥
embeddings = OpenAIEmbeddings(openai_api_key=openai.api_key)

# 将每个分段文本转换为向量
embeddings_list = embeddings.embed_documents(df['content'].tolist())
df['embedding'] = embeddings_list

# 将DataFrame中的数据写入到PostgreSQL数据库
with engine.connect() as connection:
    # 确保每次连接时都加载pgvector扩展
    connection.execute(sql_text("CREATE EXTENSION IF NOT EXISTS vector;"))
    # 创建表格（如果不存在）
    connection.execute(sql_text("""
    CREATE TABLE IF NOT EXISTS documents (
        id SERIAL PRIMARY KEY,
        content TEXT,
        embedding vector(1536)
    );
    """))

    # 插入数据
    for _, row in df.iterrows():
        content = row['content']
        embedding = row['embedding']
        connection.execute(
            sql_text("INSERT INTO documents (content, embedding) VALUES (:content, :embedding)"),
            {'content': content, 'embedding': embedding}
        )

print("数据已成功存储到PostgreSQL数据库中")


数据库 rag_db 已存在
pgvector 扩展已成功启用
vector 类型已成功启用
数据已成功存储到PostgreSQL数据库中


In [4]:
df

Unnamed: 0,content,embedding
0,我們要開始這個禮拜的課程這個禮拜會到佼佼作業二然後宣布作業三我們今天的內容會是網路爬蟲也會是...,"[0.006464687987765308, -0.004162107817109202, ..."
1,可以自己評估自己的實力來去把你的程式碼寫到多模組化能夠去分享給別人那上學期我是要求說他們一定...,"[-0.010561584497385644, 0.01106263417999642, 0..."
2,Soup等如何使用到變成最後變可以模組化成一個分享的class的樣子要怎麼寫那我們先來看一下...,"[-0.01855356271909417, -0.008329481096833914, ..."
3,Super這個套件其實爬蟲的工具有超級多不是只有這一套那我教這一套算是簡單好上手那它也是有一...,"[-0.0037689747831788386, -0.009944525199125087..."
4,Source給你的Data只是它公開在網站上秀出來它期望的是人真的去跟它互動而不是機器跟它互...,"[-0.002057345691651404, -0.0019108522518765266..."
5,Sussex那你就沒有辦法再往下去去寫程式你後面寫再多都沒有用所以第一件事情就是你要確定網路...,"[0.016295601156821264, -0.0056206231710190235,..."
6,200這些編碼這不是我編的這是當初定義這些通訊協定的科學家大家講好的那你只要去查網路回應碼你...,"[0.00026900039787820676, 0.003136010706772307,..."
7,39181然後3918039179就是它在產生網址換頁的時候它是一個流水號那所以你就很容易組...,"[-0.0069324893354217355, 0.003398151946759526,..."
8,這樣計數下去它也好寫可是很多網站它不會這麼好心它可能會它可能會在換頁的時候它會把網址後面的請...,"[-0.010149303162636911, -0.0001468956800642371..."
9,頁的位置然後就讓你沒有辦法拼湊出那個合理的網址所以我們今天為什麼可以用回圈訪問一頁一頁就是因...,"[-0.006891231651292923, 0.0036771130370308503,..."


# 文本切割並傳向量，儲存到postgresql

In [31]:
import psycopg2
import configparser
from sqlalchemy import create_engine, text as sql_text, inspect
import pandas as pd
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
import openai

# Load configuration file
config = configparser.ConfigParser()
config.read('db_config.ini')

# Read PostgreSQL configuration
postgresql_config = {
    'host': config['postgresql']['host'],
    'user': config['postgresql']['user'],
    'password': config['postgresql']['password']
}

# Connect to PostgreSQL (specify rag_db database)
conn = psycopg2.connect(
    host=postgresql_config['host'],
    user=postgresql_config['user'],
    password=postgresql_config['password'],
    database='rag_db'
)

# Create cursor
cur = conn.cursor()

# Execute query to see all tables
cur.execute("SELECT table_name FROM information_schema.tables WHERE table_schema='public'")
tables = cur.fetchall()
print("Tables in rag_db:", tables)

# Use SQLAlchemy to connect to rag_db database
connection_string = f"postgresql+psycopg2://{postgresql_config['user']}:{postgresql_config['password']}@" \
                    f"{postgresql_config['host']}/rag_db"
engine = create_engine(connection_string)

# Create connection and run SQL command to install pgvector extension
with engine.connect() as connection:
    connection.execute(sql_text("CREATE EXTENSION IF NOT EXISTS vector;"))
    result = connection.execute(sql_text("SELECT extname FROM pg_extension WHERE extname = 'vector';")).fetchone()
    if result:
        print("pgvector extension enabled successfully")
        vector_check = connection.execute(sql_text("SELECT typname FROM pg_type WHERE typname = 'vector';")).fetchone()
        if vector_check:
            print("vector type enabled successfully")
        else:
            raise Exception("vector type not enabled successfully")
    else:
        raise Exception("pgvector extension not enabled successfully")

# Read text file
with open("data/course_medium.txt", "r", encoding="utf-8") as file:
    text_content = file.read()

# Use LangChain's text splitter to segment text
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=512, chunk_overlap=100, length_function=len
)

sentences = text_content.split('。')
sentences = [sentence.strip() for sentence in sentences if sentence.strip()]

# Further split each paragraph
split_texts = []
for sentence in sentences:
    split_texts.extend(text_splitter.split_text(sentence))

# Store segmented text in DataFrame
df = pd.DataFrame({'content': split_texts})

# Set OpenAI API Key
openai.api_key = config['openai']['api_key']

# Initialize embedding model and pass API key explicitly
embeddings = OpenAIEmbeddings(openai_api_key=openai.api_key)

# Convert each segmented text to vectors
embeddings_list = embeddings.embed_documents(df['content'].tolist())
df['embedding'] = embeddings_list

# Write data from DataFrame to PostgreSQL database
with engine.connect() as connection:
    connection.execute(sql_text("CREATE EXTENSION IF NOT EXISTS vector;"))
    connection.execute(sql_text("""
    CREATE TABLE IF NOT EXISTS documents (
        id SERIAL PRIMARY KEY,
        content TEXT,
        embedding vector(1536)
    );
    """))

    for _, row in df.iterrows():
        content = row['content']
        embedding = row['embedding']
        # Format embedding as a string with proper array format for PostgreSQL
        formatted_embedding = f'[{", ".join(map(str, embedding))}]'
        connection.execute(
            sql_text("INSERT INTO documents (content, embedding) VALUES (:content, :embedding)"),
            {'content': content, 'embedding': formatted_embedding}
        )
    connection.commit()  # Commit transaction

print("Data successfully stored in PostgreSQL database")

# Check table structure and data
inspector = inspect(engine)
tables = inspector.get_table_names()
print("Tables in rag_db:", tables)

for table_name in tables:
    print(f"\nStructure of table {table_name}:")
    columns = inspector.get_columns(table_name)
    for column in columns:
        print(column)

# Query structure and first few rows of each table
for table in tables:
    print(f"\nStructure of table {table}:")
    cur.execute(f"SELECT column_name, data_type FROM information_schema.columns WHERE table_name='{table}'")
    columns = cur.fetchall()
    for column in columns:
        print(column)

    print(f"\nFirst 5 rows of table {table}:")
    cur.execute(f"SELECT * FROM {table} LIMIT 5")
    rows = cur.fetchall()
    for row in rows:
        print(row)

# Close cursor and connection
cur.close()
conn.close()


Tables in rag_db: [('documents',), ('langchain_pg_collection',), ('langchain_pg_embedding',)]
pgvector extension enabled successfully
vector type enabled successfully
Data successfully stored in PostgreSQL database
Tables in rag_db: ['documents', 'langchain_pg_collection', 'langchain_pg_embedding']

Structure of table documents:
{'name': 'id', 'type': INTEGER(), 'nullable': False, 'default': "nextval('documents_id_seq'::regclass)", 'autoincrement': True, 'comment': None}
{'name': 'content', 'type': TEXT(), 'nullable': True, 'default': None, 'autoincrement': False, 'comment': None}
{'name': 'embedding', 'type': Vector(dim=1536), 'nullable': True, 'default': None, 'autoincrement': False, 'comment': None}

Structure of table langchain_pg_collection:
{'name': 'name', 'type': VARCHAR(), 'nullable': True, 'default': None, 'autoincrement': False, 'comment': None}
{'name': 'cmetadata', 'type': JSON(astext_type=Text()), 'nullable': True, 'default': None, 'autoincrement': False, 'comment': None}

# 查看PostgreSQL的情況

In [22]:
import psycopg2
import configparser
from sqlalchemy import create_engine, text as sql_text, inspect
import pandas as pd
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
import openai

# 加载配置文件
config = configparser.ConfigParser()
config.read('db_config.ini')

# 读取PostgreSQL配置
postgresql_config = {
    'host': config['postgresql']['host'],
    'user': config['postgresql']['user'],
    'password': config['postgresql']['password']
}

# 连接到PostgreSQL（指定rag_db数据库）
conn = psycopg2.connect(
    host=postgresql_config['host'],
    user=postgresql_config['user'],
    password=postgresql_config['password'],
    database='rag_db'  # 连接到rag_db数据库
)
# 创建游标
cur = conn.cursor()

# 执行查询查看所有表
cur.execute("SELECT table_name FROM information_schema.tables WHERE table_schema='public'")
tables = cur.fetchall()
print("Tables in rag_db:", tables)

# 查询每个表的结构和前几行数据
for table in tables:
    table_name = table[0]
    print(f"\nStructure of table {table_name}:")
    cur.execute(f"SELECT column_name, data_type FROM information_schema.columns WHERE table_name='{table_name}'")
    columns = cur.fetchall()
    for column in columns:
        print(column)

    print(f"\nFirst 5 rows of table {table_name}:")
    cur.execute(f"SELECT * FROM {table_name} LIMIT 5")
    rows = cur.fetchall()
    for row in rows:
        print(row)

# 关闭游标和连接a
cur.close()
conn.close()

Tables in rag_db: [('documents',)]

Structure of table documents:
('id', 'integer')
('embedding', 'USER-DEFINED')
('content', 'text')

First 5 rows of table documents:
(1, '我們要開始這個禮拜的課程這個禮拜會到佼佼作業二然後宣布作業三我們今天的內容會是網路爬蟲也會是作業三的重點所以我會把今天的整個範例講完以後你們可以選擇你自己能力內可以寫到的程度因為寫程式其實有幾個階段一個階段是第二個階段是可以跑而且內容要正確第三個階段是可以跑內容要正確之後你還要能力模組化模組化的意思就是你可以把它建成function那這個模組化呢如果你要分享給更多不同的專案使用你還要能夠建成class就好像你去應用一個模組化你還要能夠建成class今天我就會針對這四個步驟來去講程式品質的四個階段那我喜歡上課模式都是融合性的就是我剛剛在描述的這四個階段可能很多時候會變成是一門叫做軟體工程的課然後它都是在產生在用軟體工程的方式然後你會看到你會看到你會看到你會看到你會看到你會看到然後它都是在產出寫程式的結構可是如果你沒有融入一個情境跟直接帶入一個實作你看完了你覺得好像在念一本如何管理程式的書可是其實你也不知道怎麼寫那你可以自己評估自己的實力來去把你的程式碼寫到多模組化能夠去分享給別人那上學期我是要求說他們一定要寫成模組化去分享給其他同學使用後來發現還蠻多同學做不到所以我今年就放寬就是你只要能夠寫出爬蟲但是我不限定你', '[0.0064477217,-0.0040484187,0.006939223,-0.029955713,-0.012578554,0.004261834,-0.027006706,-0.009707152,-0.012248731,-0.035051808,0.0052965735,0.012158191,0.0004187462,-0.009338525,-0.0021778035,0.003886741,0.028351868,-0.016206611,0.002743677,-0.0071332366,0.009416131,0.008775886,0.007838153,-0.011246326,-0.0

# RAG

In [9]:
# !pip install langchain_postgres
# !pip install psycopg
# !pip install openai


In [42]:
import psycopg2
import configparser
from sqlalchemy import create_engine, text as sql_text
import openai
import pandas as pd
from langchain_openai import OpenAIEmbeddings
from langchain import hub
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain.chat_models import ChatOpenAI

# Load configuration file
config = configparser.ConfigParser()
config.read('db_config.ini')

# Read PostgreSQL configuration
postgresql_config = {
    'host': config['postgresql']['host'],
    'user': config['postgresql']['user'],
    'password': config['postgresql']['password']
}

# Set OpenAI API Key
openai.api_key = config['openai']['api_key']

# Initialize embedding model and pass API key explicitly
embeddings = OpenAIEmbeddings(openai_api_key=openai.api_key)

# Connect to PostgreSQL (specify rag_db database)
connection_string = f"postgresql+psycopg2://{postgresql_config['user']}:{postgresql_config['password']}@" \
                    f"{postgresql_config['host']}/rag_db"
engine = create_engine(connection_string)

# Function to convert user query to embedding
def get_query_embedding(query):
    return embeddings.embed_query(query)

# Function to search the nearest neighbors in PostgreSQL
def search_documents(query_embedding):
    with engine.connect() as connection:
        # Convert the embedding list to a string format suitable for SQL
        query_embedding_str = ','.join(map(str, query_embedding))
        
        # Perform the nearest neighbor search in PostgreSQL
        search_query = f"""
        SELECT content, 1 - (embedding <=> '[{query_embedding_str}]'::vector) AS similarity
        FROM documents
        ORDER BY similarity DESC
        LIMIT 5;
        """
        result = connection.execute(sql_text(search_query))
        docs = [{'content': row[0], 'similarity': row[1]} for row in result]
    return docs

# Query example
query = "What is Task Decomposition?"
query_embedding = get_query_embedding(query)
docs = search_documents(query_embedding)

# Format the retrieved documents for display
def format_docs(docs):
    return "\n\n".join(doc['content'] for doc in docs)

# Setup Prompt and LLM
prompt = hub.pull("rlm/rag-prompt")
llm = ChatOpenAI(model_name="gpt-3.5-turbo", openai_api_key=openai.api_key, temperature=0)

# Setup RAG chain
class ContextWithDocs:
    def __init__(self, docs):
        self.docs = docs

    def __call__(self, inputs):
        formatted_docs = format_docs(self.docs)
        return {"context": formatted_docs, "question": inputs}

rag_chain = (
    ContextWithDocs(docs) | prompt | llm | StrOutputParser()
)

# Test RAG chain
response = rag_chain.invoke(query)
print("Response:", response)


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Response: Task Decomposition is the process of breaking down a task into smaller, more manageable subtasks. It involves dividing the task into stages or steps to make it easier to complete. By decomposing a task, individuals can focus on one aspect at a time, leading to more efficient and effective task completion.


In [43]:
query = "老師這次上課說了什麼?"
response = rag_chain.invoke(query)
print("Response:", response)

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Response: 老師這次上課主要講解了網路爬蟲和程式模組化的重要性，並提到寫程式有幾個階段，包括能夠跑且內容正確、模組化建成function和class等。學生可以根據自己的能力選擇寫程式的程度，並且老師強調了程式品質的四個階段。


In [29]:
# !pip install pgvector

Collecting pgvector
  Downloading pgvector-0.2.5-py2.py3-none-any.whl.metadata (9.9 kB)
Downloading pgvector-0.2.5-py2.py3-none-any.whl (9.6 kB)
Installing collected packages: pgvector
Successfully installed pgvector-0.2.5
