# X3DH Block Mappings

Generate course mappings between LHS and RHS texts of the X3DH protocol, using embeddings and vector databases.

In [5]:
import { readFileSync } from 'node:fs';

import { SystemMessage, HumanMessage } from "@langchain/core/messages";
import { ChatPromptTemplate, MessagesPlaceholder } from "@langchain/core/prompts";
import { OpenAIEmbeddings } from "@langchain/openai";
import { MemoryVectorStore } from "langchain/vectorstores/memory";

import { SERVER_DATA_DIR } from '../server/src/util/fileUtils.ts';
import { newModel } from '../server/src/agents/agent.ts';
import { splitText } from '../server/src/util/textUtils.ts';

async function blockMappings(fromText: string, toText: string, numMatches: number) {
  const prompt = ChatPromptTemplate.fromMessages([ new MessagesPlaceholder("messages") ]);
  const promptText = "Summarize the following text into a short description in less than 10 words. The description should be a single sentence that captures the main idea of the text. The text may contain multiple paragraphs, but the description should be concise and to the point. The description should not include any specific details or examples from the text. The description should be in English and should not include any special characters or formatting.";
  const llm = newModel("Anthropic");
  const chain = prompt.pipe(llm);

  const embeddings = new OpenAIEmbeddings({ model: "text-embedding-3-large" });
  const vectorStore = new MemoryVectorStore(embeddings);

  const toBlocks = splitText(toText);
  await vectorStore.addDocuments(toBlocks);
  console.log("Number of blocks in toText", toBlocks.length);

  const fromBlocks = splitText(fromText);
  console.log("Number of blocks in fromText", fromBlocks.length);

  const mappings = [];
  for (const {pageContent, metadata} of fromBlocks) { 
    console.log("Block:", pageContent);

    // summarize a given text block into a short description
    const output = await chain.invoke({ messages: [
      new SystemMessage(promptText),
      new HumanMessage(pageContent)
    ]});
    const description = output.content;
    const fromRanges = [ {start: metadata.start, end: metadata.end} ];

    const searchResults = await vectorStore.similaritySearch(pageContent, numMatches);
    const toRanges = [];
    for (const doc of searchResults) {
      toRanges.push({start: doc.metadata.start, end: doc.metadata.end});
    }
    const mapping = { description: description, toRanges: toRanges, fromRanges: fromRanges };
    console.log("Annotation:", JSON.stringify(mapping, null, 2));
    mappings.push(mapping);
  }
  return mappings;
}


const lhsText = readFileSync(`${SERVER_DATA_DIR}/X3DH/selected-text.txt`, 'utf-8');
const rhsText = readFileSync(`${SERVER_DATA_DIR}/X3DH/pre-written.txt`, 'utf-8');

const blkMappings = await blockMappings(rhsText, lhsText, 2);
const mappings = [];
for (const blkMapping of blkMappings) {
  mappings.push({ description: blkMapping.description, lhsRanges: blkMapping.toRanges, rhsRanges: blkMapping.fromRanges })
}
const annotations = {mappings: mappings, lhsLabels: [], rhsLabels: []};

Number of segments in toText 119
Number of segments in fromText 70
Segment: use curve25519_dalek::{edwards::EdwardsPoint, scalar::Scalar};
use rand_core::{OsRng, RngCore}; 
use sha2::{Digest, Sha512};
// use hmac::{Hmac, Mac};
use hkdf::Hkdf;
Annotation: {
  "description": "Cryptographic operations using curve25519_dalek, random generation, and hashing.",
  "toRanges": [
    {
      "start": 2455,
      "end": 3594
    },
    {
      "start": 1778,
      "end": 1971
    }
  ],
  "fromRanges": [
    {
      "start": 0,
      "end": 166
    }
  ]
}
Segment: // X3DH parameters
const INFO: &[u8] = b"MyProtocolImplementation";
Annotation: {
  "description": "X3DH protocol implementation parameters with INFO constant.",
  "toRanges": [
    {
      "start": 1690,
      "end": 1714
    },
    {
      "start": 5806,
      "end": 5829
    }
  ],
  "fromRanges": [
    {
      "start": 168,
      "end": 235
    }
  ]
}
Segment: // Key types for the protocol
pub struct IdentityKey {
    private_key

[33m70[39m

In [4]:
console.log(JSON.stringify(annotations, null, 2));

{
  "mappings": [
    {
      "description": "Cryptographic operations using curve25519_dalek, random generation, and hashing.",
      "lhsRanges": [
        {
          "start": 2455,
          "end": 3594
        },
        {
          "start": 1778,
          "end": 1971
        }
      ],
      "rhsRanges": [
        {
          "start": 0,
          "end": 166
        }
      ]
    },
    {
      "description": "X3DH protocol implementation parameters with INFO constant.",
      "lhsRanges": [
        {
          "start": 1690,
          "end": 1714
        },
        {
          "start": 5806,
          "end": 5829
        }
      ],
      "rhsRanges": [
        {
          "start": 168,
          "end": 235
        }
      ]
    },
    {
      "description": "Cryptographic identity key with private and public components.",
      "lhsRanges": [
        {
          "start": 4542,
          "end": 4796
        },
        {
          "start": 14219,
          "end": 14440
        }


In [6]:
const blkMappings = await blockMappings(lhsText, rhsText, 2);
const mappings = [];
for (const blkMapping of blkMappings) {
  mappings.push({ description: blkMapping.description, rhsRanges: blkMapping.toRanges, lhsRanges: blkMapping.fromRanges })
}
const annotations = {mappings: mappings, lhsLabels: [], rhsLabels: []};

Number of segments in toText 33
Number of segments in fromText 62
Segment: # 5. Algorithm Specifications
Annotation: {
  "description": "Algorithm specifications define computational procedures for solving problems.",
  "toRanges": [
    {
      "start": 8318,
      "end": 8941
    },
    {
      "start": 86,
      "end": 99
    }
  ],
  "fromRanges": [
    {
      "start": 0,
      "end": 29
    }
  ]
}
Segment: The general function for executing AES-128, AES-192, or AES-256 is denoted by CIPHER(); its inverse is denoted by INVCIPHER().[^1]
Annotation: {
  "description": "AES encryption and decryption functions are CIPHER() and INVCIPHER().",
  "toRanges": [
    {
      "start": 10216,
      "end": 10416
    },
    {
      "start": 10822,
      "end": 10829
    }
  ],
  "fromRanges": [
    {
      "start": 31,
      "end": 161
    }
  ]
}
Segment: [^1]: Informally, these functions are sometimes called "encryption" and "decryption," but neutral terminology is appropriate because there 

[33m62[39m

In [7]:
console.log(JSON.stringify(annotations, null, 2));

{
  "mappings": [
    {
      "description": "Algorithm specifications define computational procedures for solving problems.",
      "rhsRanges": [
        {
          "start": 8318,
          "end": 8941
        },
        {
          "start": 86,
          "end": 99
        }
      ],
      "lhsRanges": [
        {
          "start": 0,
          "end": 29
        }
      ]
    },
    {
      "description": "AES encryption and decryption functions are CIPHER() and INVCIPHER().",
      "rhsRanges": [
        {
          "start": 10216,
          "end": 10416
        },
        {
          "start": 10822,
          "end": 10829
        }
      ],
      "lhsRanges": [
        {
          "start": 31,
          "end": 161
        }
      ]
    },
    {
      "description": "Block cipher functions serve multiple purposes beyond encryption.",
      "rhsRanges": [
        {
          "start": 8318,
          "end": 8941
        },
        {
          "start": 8943,
          "end": 9626
   

In [5]:
import { readFileSync } from 'node:fs';
import { SERVER_DATA_DIR } from '../server/src/util/fileUtils.ts';

const lhsText = readFileSync(`${SERVER_DATA_DIR}/AES-md/selected-text.txt`, 'utf-8');
const rhsText = readFileSync(`${SERVER_DATA_DIR}/AES-md/pre-written.txt`, 'utf-8');
console.log("##"+lhsText.substring(31,161)+"##");

##The general function for executing AES-128, AES-192, or AES-256 is denoted by CIPHER(); its inverse is denoted by INVCIPHER().[^1]##


In [None]:
import { readFileSync } from 'node:fs';
const filePath = `../server/data/AES-md/categories.json`;
const fileContent = readFileSync(filePath, 'utf-8');
const categories = JSON.parse(fileContent);

const annotations = [];
for (const category of categories) {
    const annotation = {
        description: category.description,
        category: category.category,
        ranges: [{start: category.start, end: category.stop}],
    }
    annotations.push(annotation);
}
console.log(JSON.stringify(annotations, null, 2));

[
  {
    "description": "Section header introducing the algorithm specifications",
    "label": "Navigation",
    "ranges": [
      {
        "start": 0,
        "end": 29
      }
    ]
  },
  {
    "description": "Introduction of the general functions CIPHER() and INVCIPHER() for AES algorithms",
    "label": "Definition",
    "ranges": [
      {
        "start": 31,
        "end": 161
      }
    ]
  },
  {
    "description": "Footnote explaining why neutral terminology (CIPHER/INVCIPHER) is used instead of encryption/decryption",
    "label": "Elaboration",
    "ranges": [
      {
        "start": 163,
        "end": 361
      }
    ]
  },
  {
    "description": "Explanation of rounds and round keys as core components of the CIPHER and INVCIPHER algorithms",
    "label": "Definition",
    "ranges": [
      {
        "start": 363,
        "end": 651
      }
    ]
  },
  {
    "description": "Definition of the KEYEXPANSION() function that generates round keys from the cipher key",
  