# Lesson 6: Shipping as a web API

In [3]:
//import "dotenv/config";

In [4]:
import {ChatOllama} from '@langchain/ollama';
const model = new ChatOllama({
    model:  "llama3.1:latest",
    baseUrl: "http://localhost:11434"
});

In [5]:
import { 
  loadAndSplitChunks, 
  initializeVectorstoreWithDocuments 
} from "./lib/helpers.ts";

const splitDocs = await loadAndSplitChunks({
  chunkSize: 1536,
  chunkOverlap: 128,
});

const vectorstore = await initializeVectorstoreWithDocuments({
  documents: splitDocs,
});

const retriever = vectorstore.asRetriever();

In [6]:
import { 
  createDocumentRetrievalChain, 
  createRephraseQuestionChain 
} from "./lib/helpers.ts";

const documentRetrievalChain = createDocumentRetrievalChain();
const rephraseQuestionChain = createRephraseQuestionChain();

In [7]:
import { ChatPromptTemplate, MessagesPlaceholder } from "@langchain/core/prompts";

const ANSWER_CHAIN_SYSTEM_TEMPLATE = `You are an experienced researcher,
expert at interpreting and answering questions based on provided sources.
Using the below provided context and chat history, 
answer the user's question to the best of your ability
using only the resources provided. Be verbose!

<context>
{context}
</context>`;

const answerGenerationChainPrompt = ChatPromptTemplate.fromMessages([
  ["system", ANSWER_CHAIN_SYSTEM_TEMPLATE],
  new MessagesPlaceholder("history"),
  [
    "human", 
    `Now, answer this question using the previous context and chat history:
  
    {standalone_question}`
  ]
]);

In [9]:
import { 
  RunnablePassthrough, 
  RunnableSequence 
} from "@langchain/core/runnables";
//import { ChatOpenAI } from "@langchain/openai";

const conversationalRetrievalChain = RunnableSequence.from([
  RunnablePassthrough.assign({
    standalone_question: rephraseQuestionChain,
  }),
  RunnablePassthrough.assign({
    context: documentRetrievalChain,
  }),
  answerGenerationChainPrompt,
  model,
]);

In [10]:
import { HttpResponseOutputParser } from "langchain/output_parsers";

// "text/event-stream" is also supported
const httpResponseOutputParser = new HttpResponseOutputParser({
  contentType: "text/plain"
});

In [11]:
import { RunnableWithMessageHistory } from "@langchain/core/runnables"; 
import { ChatMessageHistory } from "langchain/stores/message/in_memory";

const messageHistory = new ChatMessageHistory();

const finalRetrievalChain = new RunnableWithMessageHistory({
  runnable: conversationalRetrievalChain,
  getMessageHistory: (_sessionId) => messageHistory,
  historyMessagesKey: "history",
  inputMessagesKey: "question",
}).pipe(httpResponseOutputParser);

Additionally, we'll want to bear in mind that users should not share chat histories, and we should create a new history object per session:

In [13]:
const messageHistories = {};

const getMessageHistoryForSession = (sessionId) => {
    if (messageHistories[sessionId] !== undefined) {
        return messageHistories[sessionId];
    } 
    const newChatSessionHistory = new ChatMessageHistory();
    messageHistories[sessionId] = newChatSessionHistory;
    return newChatSessionHistory;
};

We'll recreate our final chain with this new method:

In [14]:
const finalRetrievalChain = new RunnableWithMessageHistory({
  runnable: conversationalRetrievalChain,
  getMessageHistory: getMessageHistoryForSession,
  inputMessagesKey: "question",
  historyMessagesKey: "history",
}).pipe(httpResponseOutputParser);

In [15]:
const port = 8087;

In [16]:
const handler = async (request: Request): Response => {
  const body = await request.json();
  const stream = await finalRetrievalChain.stream({
    question: body.question
  }, { configurable: { sessionId: body.session_id } });

  return new Response(stream, { 
    status: 200,
    headers: {
      "Content-Type": "text/plain"
    },
  });
};

In [17]:
Deno.serve({ port }, handler);

Listening on http://0.0.0.0:8087/


{
  addr: [Object: null prototype] {
    hostname: [32m"0.0.0.0"[39m,
    port: [33m8087[39m,
    transport: [32m"tcp"[39m
  },
  finished: Promise { [36m<pending>[39m },
  shutdown: [36m[AsyncFunction: shutdown][39m,
  ref: [36m[Function: ref][39m,
  unref: [36m[Function: unref][39m,
  [[32mSymbol(Symbol.asyncDispose)[39m]: [36m[Function: [Symbol.asyncDispose]][39m
}

In [18]:
const decoder = new TextDecoder();

// readChunks() reads from the provided reader and yields the results into an async iterable
function readChunks(reader) {
  return {
    async* [Symbol.asyncIterator]() {
      let readResult = await reader.read();
      while (!readResult.done) {
        yield decoder.decode(readResult.value);
        readResult = await reader.read();
      }
    },
  };
}

const sleep = async () => {
  return new Promise((resolve) => setTimeout(resolve, 500));
}

In [19]:
const response = await fetch(`http://localhost:${port}`, {
    method: "POST",
    headers: {
        "content-type": "application/json",
    },
    body: JSON.stringify({
        question: "What are the prerequisites for this course?",
        session_id: "1", // Should randomly generate/assign
    })
});

// response.body is a ReadableStream
const reader = response.body?.getReader();

for await (const chunk of readChunks(reader)) {
  console.log("CHUNK:", chunk);
}

await sleep();

CHUNK: Based
CHUNK:  on
CHUNK:  the
CHUNK:  provided
CHUNK:  sources
CHUNK: ,
CHUNK:  it
CHUNK:  appears
CHUNK:  that
CHUNK:  the
CHUNK:  instructor
CHUNK:  is
CHUNK:  emphasizing
CHUNK:  the
CHUNK:  importance
CHUNK:  of
CHUNK:  developing
CHUNK:  practical
CHUNK:  skills
CHUNK:  in
CHUNK:  machine
CHUNK:  learning
CHUNK:  beyond
CHUNK:  just
CHUNK:  memor
CHUNK: izing
CHUNK:  algorithms
CHUNK: .
CHUNK:  The
CHUNK:  context
CHUNK:  suggests
CHUNK:  that
CHUNK:  students
CHUNK:  are
CHUNK:  expected
CHUNK:  to
CHUNK:  be
CHUNK:  able
CHUNK:  to
CHUNK:  apply
CHUNK:  machine
CHUNK:  learning
CHUNK:  tools
CHUNK:  and
CHUNK:  techniques
CHUNK:  effectively
CHUNK: ,
CHUNK:  much
CHUNK:  like
CHUNK:  a
CHUNK:  master
CHUNK:  carp
CHUNK: enter
CHUNK:  uses
CHUNK:  tools
CHUNK:  to
CHUNK:  achieve
CHUNK:  specific
CHUNK:  tasks
CHUNK: .


CHUNK: Given
CHUNK:  this
CHUNK:  emphasis
CHUNK:  on
CHUNK:  application
CHUNK:  and
CHUNK:  not
CHUNK:  just
CHUNK:  theoretical
CHUNK:  knowledge
CHUNK:

In [20]:
const response = await fetch(`http://localhost:${port}`, {
  method: "POST",
  headers: {
    "content-type": "application/json",
  },
  body: JSON.stringify({
    question: "Can you list them in bullet point format?",
    session_id: "1", // Should randomly generate/assign
  })
});

// response.body is a ReadableStream
const reader = response.body?.getReader();

for await (const chunk of readChunks(reader)) {
  console.log("CHUNK:", chunk);
}

await sleep();

CHUNK: Unfortunately
CHUNK: ,
CHUNK:  I
CHUNK:  must
CHUNK:  inform
CHUNK:  you
CHUNK:  that
CHUNK:  the
CHUNK:  provided
CHUNK:  sources
CHUNK:  do
CHUNK:  not
CHUNK:  explicitly
CHUNK:  mention
CHUNK:  the
CHUNK:  prerequisites
CHUNK:  for
CHUNK:  this
CHUNK:  course
CHUNK: .
CHUNK:  However
CHUNK: ,
CHUNK:  based
CHUNK:  on
CHUNK:  the
CHUNK:  context
CHUNK:  and
CHUNK:  chat
CHUNK:  history
CHUNK: ,
CHUNK:  it
CHUNK:  seems
CHUNK:  that
CHUNK:  students
CHUNK:  in
CHUNK:  this
CHUNK:  class
CHUNK:  have
CHUNK:  likely
CHUNK:  taken
CHUNK:  a
CHUNK:  prior
CHUNK:  course
CHUNK:  where
CHUNK:  they
CHUNK:  were
CHUNK:  introduced
CHUNK:  to
CHUNK:  some
CHUNK:  machine
CHUNK:  learning
CHUNK:  concepts
CHUNK: .


CHUNK: The
CHUNK:  instructor
CHUNK:  mentions
CHUNK:  forming
CHUNK:  project
CHUNK:  partners
CHUNK:  and
CHUNK:  study
CHUNK:  groups
CHUNK: ,
CHUNK:  which
CHUNK:  suggests
CHUNK:  that
CHUNK:  students
CHUNK:  are
CHUNK:  expected
CHUNK:  to
CHUNK:  apply
CHUNK:  theore

In [21]:
const response = await fetch(`http://localhost:${port}`, {
  method: "POST",
  headers: {
    "content-type": "application/json",
  },
  body: JSON.stringify({
    question: "What did I just ask you?",
    session_id: "2", // Should randomly generate/assign
  })
});

// response.body is a ReadableStream
const reader = response.body?.getReader();

for await (const chunk of readChunks(reader)) {
  console.log("CHUNK:", chunk);
}

await sleep();

CHUNK: Unfortunately
CHUNK: ,
CHUNK:  your
CHUNK:  previous
CHUNK:  question
CHUNK:  has
CHUNK:  not
CHUNK:  been
CHUNK:  provided
CHUNK:  in
CHUNK:  the
CHUNK:  context
CHUNK: .
CHUNK:  The
CHUNK:  conversation
CHUNK:  appears
CHUNK:  to
CHUNK:  be
CHUNK:  an
CHUNK:  excerpt
CHUNK:  from
CHUNK:  a
CHUNK:  lecture
CHUNK:  on
CHUNK:  machine
CHUNK:  learning
CHUNK: ,
CHUNK:  with
CHUNK:  the
CHUNK:  instructor
CHUNK:  discussing
CHUNK:  various
CHUNK:  topics
CHUNK:  such
CHUNK:  as
CHUNK:  supervised
CHUNK:  learning
CHUNK: ,
CHUNK:  tools
CHUNK:  of
CHUNK:  carp
CHUNK: entry
CHUNK: ,
CHUNK:  and
CHUNK:  the
CHUNK:  importance
CHUNK:  of
CHUNK:  applying
CHUNK:  machine
CHUNK:  learning
CHUNK:  algorithms
CHUNK:  effectively
CHUNK: .
CHUNK:  However
CHUNK: ,
CHUNK:  I
CHUNK:  do
CHUNK:  not
CHUNK:  see
CHUNK:  any
CHUNK:  indication
CHUNK:  of
CHUNK:  your
CHUNK:  previous
CHUNK:  question
CHUNK:  being
CHUNK:  asked
CHUNK:  or
CHUNK:  answered
CHUNK:  within
CHUNK:  this
CHUNK:  conte