# Lesson 6: Shipping as a web API

In [1]:
import "dotenv/config";

[Module: null prototype] { default: {} }

In [2]:
import { 
  loadAndSplitChunks, 
  initializeVectorstoreWithDocuments 
} from "./lib/helpers.ts";

const splitDocs = await loadAndSplitChunks({
  chunkSize: 1536,
  chunkOverlap: 128,
});

const vectorstore = await initializeVectorstoreWithDocuments({
  documents: splitDocs,
});

const retriever = vectorstore.asRetriever();

In [3]:
import { 
  createDocumentRetrievalChain, 
  createRephraseQuestionChain 
} from "./lib/helpers.ts";

const documentRetrievalChain = createDocumentRetrievalChain();
const rephraseQuestionChain = createRephraseQuestionChain();

In [4]:
import { ChatPromptTemplate, MessagesPlaceholder } from "@langchain/core/prompts";

const ANSWER_CHAIN_SYSTEM_TEMPLATE = `You are an experienced researcher,
expert at interpreting and answering questions based on provided sources.
Using the below provided context and chat history, 
answer the user's question to the best of your ability
using only the resources provided. Be verbose!

<context>
{context}
</context>`;

const answerGenerationChainPrompt = ChatPromptTemplate.fromMessages([
  ["system", ANSWER_CHAIN_SYSTEM_TEMPLATE],
  new MessagesPlaceholder("history"),
  [
    "human", 
    `Now, answer this question using the previous context and chat history:
  
    {standalone_question}`
  ]
]);

In [5]:
import { 
  RunnablePassthrough, 
  RunnableSequence 
} from "@langchain/core/runnables";
import { ChatOpenAI } from "@langchain/openai";

const conversationalRetrievalChain = RunnableSequence.from([
  RunnablePassthrough.assign({
    standalone_question: rephraseQuestionChain,
  }),
  RunnablePassthrough.assign({
    context: documentRetrievalChain,
  }),
  answerGenerationChainPrompt,
  new ChatOpenAI({ modelName: "gpt-3.5-turbo-1106" }),
]);

In [6]:
import { HttpResponseOutputParser } from "langchain/output_parsers";

// "text/event-stream" is also supported
const httpResponseOutputParser = new HttpResponseOutputParser({
  contentType: "text/plain"
});

In [7]:
import { RunnableWithMessageHistory } from "@langchain/core/runnables"; 
import { ChatMessageHistory } from "langchain/stores/message/in_memory";

const messageHistory = new ChatMessageHistory();

const finalRetrievalChain = new RunnableWithMessageHistory({
  runnable: conversationalRetrievalChain,
  getMessageHistory: (_sessionId) => messageHistory,
  historyMessagesKey: "history",
  inputMessagesKey: "question",
}).pipe(httpResponseOutputParser);

Additionally, we'll want to bear in mind that users should not share chat histories, and we should create a new history object per session:

In [8]:
const messageHistories = {};

const getMessageHistoryForSession = (sessionId) => {
    if (messageHistories[sessionId] !== undefined) {
        return messageHistories[sessionId];
    } 
    const newChatSessionHistory = new ChatMessageHistory();
    messageHistories[sessionId] = newChatSessionHistory;
    return newChatSessionHistory;
};

We'll recreate our final chain with this new method:

In [9]:
const finalRetrievalChain = new RunnableWithMessageHistory({
  runnable: conversationalRetrievalChain,
  getMessageHistory: getMessageHistoryForSession,
  inputMessagesKey: "question",
  historyMessagesKey: "history",
}).pipe(httpResponseOutputParser);

In [10]:
const port = 8087;

In [11]:
const handler = async (request: Request): Response => {
  const body = await request.json();
  const stream = await finalRetrievalChain.stream({
    question: body.question
  }, { configurable: { sessionId: body.session_id } });

  return new Response(stream, { 
    status: 200,
    headers: {
      "Content-Type": "text/plain"
    },
  });
};

In [12]:
Deno.serve({ port }, handler);

Listening on http://localhost:8087/


{
  finished: Promise { [36m<pending>[39m },
  shutdown: [36m[AsyncFunction: shutdown][39m,
  ref: [36m[Function: ref][39m,
  unref: [36m[Function: unref][39m,
  [[32mSymbol(Symbol.asyncDispose)[39m]: [36m[Function: [Symbol.asyncDispose]][39m
}

In [13]:
const decoder = new TextDecoder();

// readChunks() reads from the provided reader and yields the results into an async iterable
function readChunks(reader) {
  return {
    async* [Symbol.asyncIterator]() {
      let readResult = await reader.read();
      while (!readResult.done) {
        yield decoder.decode(readResult.value);
        readResult = await reader.read();
      }
    },
  };
}

const sleep = async () => {
  return new Promise((resolve) => setTimeout(resolve, 500));
}

In [14]:
const response = await fetch(`http://localhost:${port}`, {
    method: "POST",
    headers: {
        "content-type": "application/json",
    },
    body: JSON.stringify({
        question: "What are the prerequisites for this course?",
        session_id: "1", // Should randomly generate/assign
    })
});

// response.body is a ReadableStream
const reader = response.body?.getReader();

for await (const chunk of readChunks(reader)) {
  console.log("CHUNK:", chunk);
}

await sleep();

CHUNK: Based o
CHUNK: n the provided co
CHUNK: ntext, the requir
CHUNK: ements for
CHUNK:  the machine learn
CHUNK: ing cour
CHUNK: se (CS229) include
CHUNK:  familiarity with basic pro
CHUNK: bability and statis
CHUNK: tics, as well as 
CHUNK: basic linear alge
CHUNK: bra. 

The instru
CHUNK: ctor mentions that
CHUNK:  the class will n
CHUNK: ot be ve
CHUNK: ry programm
CHUNK: ing intensive, althou
CHUNK: gh some programming 
CHUNK: will be involved, mostly in either MATLAB or Octave
CHUNK: . Additionally
CHUNK: , the course assumes
CHUNK:  familiarity with concepts su
CHUNK: ch as random v
CHUNK: ariables, expe
CHUNK: ctation, variance,
CHUNK:  matrix opera
CHUNK: tions (multiplication
CHUNK: , inverses), and vectors. 
CHUNK: The assumption i
CHUNK: s that students have taken 
CHUNK: undergraduate
CHUNK:  courses in statistics (such as Stat 116 
CHUNK: at Stanford
CHUNK: ) and linear algebra (such as Mat
CHUNK: h 51, 103, Math 
CHUNK: 113, or CS205
CHUNK:  at Stanford). 

The instruc

In [15]:
const response = await fetch(`http://localhost:${port}`, {
  method: "POST",
  headers: {
    "content-type": "application/json",
  },
  body: JSON.stringify({
    question: "Can you list them in bullet point format?",
    session_id: "1", // Should randomly generate/assign
  })
});

// response.body is a ReadableStream
const reader = response.body?.getReader();

for await (const chunk of readChunks(reader)) {
  console.log("CHUNK:", chunk);
}

await sleep();

CHUNK: The prerequisite
CHUNK: s for this machine
CHUNK:  learning course (CS2
CHUNK: 29) can b
CHUNK: e listed in bulle
CHUNK: t point format b
CHUNK: ased on the provided con
CHUNK: text and chat
CHUNK:  history:

- Fam
CHUNK: iliarity with basic
CHUNK:  probability 
CHUNK: and statist
CHUNK: ics, assuming know
CHUNK: ledge of concepts 
CHUNK: like random v
CHUNK: ariables
CHUNK: , expectation, variance
CHUNK: , and other fund
CHUNK: amental
CHUNK:  statistical concepts.
- Basic
CHUNK:  familiarity with linea
CHUNK: r algebra, incl
CHUNK: uding understanding o
CHUNK: f matrices, vectors, matri
CHUNK: x operations (m
CHUNK: ultiplic
CHUNK: ation, inverses), and i
CHUNK: deally 
CHUNK: knowledge of concepts like eigenvectors
CHUNK: .
- Some prog
CHUNK: ramming experie
CHUNK: nce, particularly in MATLAB or
CHUNK:  Octave, for
CHUNK:  the purpose of complet
CHUNK: ing assignments and proj
CHUNK: ects.
- While 
CHUNK: undergraduate
CHUNK:  courses in statistics (s
CHUNK: uch as Stat 116 a

In [16]:
const response = await fetch(`http://localhost:${port}`, {
  method: "POST",
  headers: {
    "content-type": "application/json",
  },
  body: JSON.stringify({
    question: "What did I just ask you?",
    session_id: "2", // Should randomly generate/assign
  })
});

// response.body is a ReadableStream
const reader = response.body?.getReader();

for await (const chunk of readChunks(reader)) {
  console.log("CHUNK:", chunk);
}

await sleep();

CHUNK: Based on the provi
CHUNK: ded context,
CHUNK:  you did not exp
CHUNK: licitly ask a qu
CHUNK: estion within the text
CHUNK: . Instead, the 
CHUNK: context consists of a le
CHUNK: cture from a cours
CHUNK: e, including parts where the 
CHUNK: instructor inter
CHUNK: acts with stu
CHUNK: dents and
CHUNK:  provides information about the course. Therefore, based on the provided context, it is not possible to identify the specific question you may have asked. If you have a specific question in mind, please feel free to ask it directly so that I can provide you with a relevant and accurate answer.
