# Block Mappings

Generate course mappings between LHS and RHS texts using embeddings and vector databases.

In [None]:
import { readFileSync } from 'node:fs';

import { SystemMessage, HumanMessage } from "@langchain/core/messages";
import { ChatPromptTemplate, MessagesPlaceholder } from "@langchain/core/prompts";
import { OpenAIEmbeddings } from "@langchain/openai";
import { MemoryVectorStore } from "langchain/vectorstores/memory";

import { SERVER_DATA_DIR } from '../server/src/util/fileUtils.ts';
import { newModel } from '../server/src/agents/agent.ts';

// takes a string text and splits it into segments
// using at least two newlines as the delimiter. 
// returns an array of {segment, startIndex, endIndex}  
// with the start index and the end index of each segment
function splitText(text: string) {
  const segments = text.split('\n\n');
  const segind = [];
  let lastIndex = 0;
  for (const segment of segments) {
    segind.push({segment: segment, start: lastIndex, stop: lastIndex + segment.length});
    lastIndex += segment.length + 2; // +2 for the two newlines
  }
  const segindices = [];
  for (const {segment, start, stop} of segind) {
    if (segment !== '') {
      if (segment[0] === '\n') {
        segindices.push({segment: segment.slice(1), start: start+1, stop: stop});
      } else {
        segindices.push({segment, start, stop});
      }
    }
  }
  return segindices;
}

// summarize a given text segment into a short description
const prompt = ChatPromptTemplate.fromMessages([ new MessagesPlaceholder("messages") ]);
const llm = newModel("Anthropic");
const chain = prompt.pipe(llm);
const PROMPT = "Summarize the following text into a short description in less than 10 words. The description should be a single sentence that captures the main idea of the text. The text may contain multiple paragraphs, but the description should be concise and to the point. The description should not include any specific details or examples from the text. The description should be in English and should not include any special characters or formatting.";

const lhsText = readFileSync(`${SERVER_DATA_DIR}/AES-md/selected-text.txt`, 'utf-8');
const rhsText = readFileSync(`${SERVER_DATA_DIR}/AES-md/pre-written.txt`, 'utf-8');

const lhsSegIndices = splitText(lhsText);
const lhsSegments = [];
for (const {segment, start, stop} of lhsSegIndices) { lhsSegments.push({pageContent: segment, metadata: {start: start, stop: stop}}); }
console.log("Number of LHS segments", lhsSegments.length);

const embeddings = new OpenAIEmbeddings({ model: "text-embedding-3-large" });
const vectorStore = new MemoryVectorStore(embeddings);
await vectorStore.addDocuments(lhsSegments);

const rhsSegIndices = splitText(rhsText);
console.log("Number of RHS segments", rhsSegIndices.length);

const numResults = 2;
const mappings = [];
for (const {segment, start, stop} of rhsSegIndices) { 
  console.log("Segment:", segment);
  const output = await chain.invoke({ messages: [
    new SystemMessage(PROMPT),
    new HumanMessage(segment)
  ]});
  const description = output.content;
  const rhsRanges = [ {start: start, end: stop} ];

  const searchResults = await vectorStore.similaritySearch(segment, numResults);
  const lhsRanges = [];
  for (const doc of searchResults) {
    lhsRanges.push({start: doc.metadata.start, end: doc.metadata.stop});
  }
  const annotation = { description: description, lhsRanges: lhsRanges, rhsRanges: rhsRanges };
  console.log("Annotation:", JSON.stringify(annotation, null, 2));
  mappings.push(annotation);
}

Number of LHS segments 62
Number of RHS segments 33
Segment: import Std
import Init.Data.ByteArray
import VerifiedFipsCryptography.Util.HexString
Annotation: {
  "description": "Converts hexadecimal strings to byte arrays and vice versa.",
  "lhsRanges": [
    {
      "start": 31,
      "end": 161
    },
    {
      "start": 12255,
      "end": 12273
    }
  ],
  "rhsRanges": [
    {
      "start": 0,
      "end": 84
    }
  ]
}
Segment: namespace AES
Annotation: {
  "description": "AES namespace for Advanced Encryption Standard implementation.",
  "lhsRanges": [
    {
      "start": 31,
      "end": 161
    },
    {
      "start": 966,
      "end": 1560
    }
  ],
  "rhsRanges": [
    {
      "start": 86,
      "end": 99
    }
  ]
}
Segment: def testBit (b: UInt8) (i: UInt8): Bool :=
  (b >>> i) % 2 == 1
Annotation: {
  "description": "Function checks if specific bit in byte is set.",
  "lhsRanges": [
    {
      "start": 5364,
      "end": 5652
    },
    {
      "start": 5118,
     

[33m33[39m

In [5]:
const annotations = {mappings: mappings, lhsLabels: [], rhsLabels: []};
console.log(JSON.stringify(annotations, null, 2));


{
  "mappings": [
    {
      "description": "Converts hexadecimal strings to byte arrays and vice versa.",
      "lhsRanges": [
        {
          "start": 31,
          "end": 161
        },
        {
          "start": 12255,
          "end": 12273
        }
      ],
      "rhsRanges": [
        {
          "start": 0,
          "end": 84
        }
      ]
    },
    {
      "description": "AES namespace for Advanced Encryption Standard implementation.",
      "lhsRanges": [
        {
          "start": 31,
          "end": 161
        },
        {
          "start": 966,
          "end": 1560
        }
      ],
      "rhsRanges": [
        {
          "start": 86,
          "end": 99
        }
      ]
    },
    {
      "description": "Function checks if specific bit in byte is set.",
      "lhsRanges": [
        {
          "start": 5364,
          "end": 5652
        },
        {
          "start": 5118,
          "end": 5362
        }
      ],
      "rhsRanges": [
        {
   

In [None]:
import { readFileSync } from 'node:fs';
import { SERVER_DATA_DIR } from '../server/src/util/fileUtils.ts';

const lhsText = readFileSync(`${SERVER_DATA_DIR}/AES-md/selected-text.txt`, 'utf-8');
const rhsText = readFileSync(`${SERVER_DATA_DIR}/AES-md/pre-written.txt`, 'utf-8');
console.log("##"+lhsText.substring(31,161)+"##");

##The general function for executing AES-128, AES-192, or AES-256 is denoted by CIPHER(); its inverse is denoted by INVCIPHER().[^1]##
