
list wikimedia articles?

ROUTE = /wikimedia-search.json


In [None]:
const readIndex = importer.import('load wikimedia index')

let allTitles = []

async function readWikimedia(search) {

  let matches = []
  if(search) {
    search = search.toLocaleLowerCase().split(/,|\s+/)
  }

  // use memory cache
  if (allTitles.length) {
    for (let i = 0; i < allTitles.length; i++) {
      if (search && search.filter(s => allTitles[i].title.includes(s)).length >= search.length - 1) {
        console.log('match found in set:', allTitles[i].title)
        matches.push(allTitles[i])
      }
    }
    return matches
  }

  // === Step 3: Tie it all together ===
  await new Promise(resolve => readIndex(search, ({ offset, length }, titles, finished) => {
    console.log('reading index:', finished, titles[0])
    for (let i = 0; i < titles.length; i++) {
      allTitles.push({ offset, length, title: titles[i] })
      if (search && search.filter(s => titles[i].includes(s)).length >= search.length - 1) {
        console.log('match found in set:', titles[i])
        matches.push({ offset, length, title: titles[i] })
      }
    }

    if (finished === true) {
      resolve()
    }
  }));

  return matches

}

module.exports = readWikimedia



load wikimedia index?


In [None]:
const fs = require('fs');
const path = require('path')
//const zlib = require('zlib');

const INDEX_FILE = path.join(process.env.HOME || process.env.HOMEPATH || process.env.USERPROFILE, 'Downloads', 'enwiki-20250420-pages-articles-multistream-index.txt')
const XML_FILE = path.join(process.env.HOME || process.env.HOMEPATH || process.env.USERPROFILE, 'Downloads', 'enwiki-20250420-pages-articles-multistream.xml.bz2')

function readIndex(search, callback) {
  const length = fs.statSync(INDEX_FILE).size
  const stream = fs.createReadStream(INDEX_FILE, {
    highWaterMark: 64 * 1024,
  });
  let lastOffset
  let leftover = ''
  let lastTitles = []
  let offsetCount = 0
  stream.on('data', chunk => {
    offsetCount += chunk.length
    const lines = (leftover + chunk.toString()).split('\n');
    leftover = lines.pop();
    for (const line of lines) {
      const [offsetStr, pageId, title] = line.split(':');
      const currentOffset = parseInt(offsetStr)
      if (currentOffset && currentOffset !== lastOffset) {
        if (lastOffset) {
          callback({ length: currentOffset - lastOffset, offset: lastOffset }, lastTitles, offsetCount / length * 100.0);
        }
        lastOffset = currentOffset
        lastTitles = []
      }
      lastTitles.push(title.toLocaleLowerCase())
    }
  });

  stream.on('end', () => {
    callback({ length: fs.statSync(XML_FILE).size - lastOffset, offset: lastOffset }, lastTitles, true);
  });
}

module.exports = readIndex



extract wikimedia chunk?


In [None]:
const fs = require('fs');
const path = require('path')
//const zlib = require('zlib');
const { PassThrough, Readable } = require('stream');
const XmlStream = require('xml-stream');
const bz2 = require('unbzip2-stream');

const XML_FILE = path.join(process.env.HOME || process.env.HOMEPATH || process.env.USERPROFILE, 'Downloads', 'enwiki-20250420-pages-articles-multistream.xml.bz2')

// Extract and parse one chunk
function extractChunk(startOffset, endOffset, callback) {
  console.log('reading:', startOffset, endOffset)
  const fileStream = fs.createReadStream(XML_FILE, {
    start: startOffset,
    end: endOffset - 1 // end is inclusive
  });

  const decompress = bz2();

  const wrapperStart = Readable.from(['<root>']);
  const wrapperEnd = Readable.from(['</root>']);

  const decompressedStream = fileStream.pipe(decompress);

  // Combine: <root> + decompressed + </root>
  const fullStream = PassThrough();
  wrapperStart.pipe(fullStream, { end: false });
  decompressedStream.pipe(fullStream, { end: false });
  decompressedStream.on('end', () => {
    wrapperEnd.pipe(fullStream);
  });

  const xml = new XmlStream(fullStream);

  xml.on('endElement: page', page => {
    //console.log(`Title: ${page.title}`);
    callback(page)
  });

  xml.on('end', () => {
    console.log(`Chunk finished\n`);
    callback(false);
  });

  xml.on('error', err => {
    console.error('XML error:', err);
  });

  decompress.on('error', err => {
    console.error('BZ2 decompression error:', err);
  });
}

module.exports = extractChunk



wikimedia-page.html?

ROUTE = /wikimedia-page.html


In [None]:
const extractChunk = importer.import('extract wikimedia chunk')
const { Remarkable } = require('remarkable');
const md = new Remarkable({ html: true, xhtmlOut: true, breaks: true });
const wtf = require('wtf_wikipedia')
const Mustache = require('mustache');


async function extractWikimedia(offset, length, search) {
  let markdown = ''
  offset = parseInt(offset)
  length = parseInt(length)
  try {
    await new Promise(resolve => extractChunk(offset, offset + length, page => {
      if (page === false) {
        return resolve()
      }

      if (page.title.toLocaleLowerCase().includes(search.toLocaleLowerCase())) {
        markdown = page.revision.text['$text']
      }
    }))

    let doc = wtf(markdown)
    let template = importer.interpret('wikimedia mustache template').code
    const preprocessed = preprocessDoc(doc.json());
    const content = Mustache.render(template, preprocessed);
    const html = Mustache.render(importer.interpret('wikiemedia clone index').code, {
      content: content
    });
    return html
  } catch (e) {
    throw e
  }
}

function convertWikiHeadingsToHTML(text) {
  return text.replace(/(\={1,6})\s*(.*?)\s*\1/g, function(_, eqs, content) {
    const level = eqs.length; // == means <h2>, === means <h3>, etc.
    const tag = level >= 2 && level <= 6 ? 'h' + level : 'p';
    return `<${tag}>${content}</${tag}>`;
  });
}

function applyLinks(sentence) {
  var linkedText = convertWikiHeadingsToHTML(sentence.text);

  if (sentence.links) {
    for (var i = 0; i < sentence.links.length; i++) {
      var link = sentence.links[i];
      var htmlLink = '<a href="/wiki/' + encodeURIComponent(link.page) + '">' + link.text + '</a>';
      linkedText = linkedText.replace(link.text, htmlLink);
    }
  }

  var newSentence = {};
  for (var key in sentence) {
    newSentence[key] = sentence[key];
  }
  newSentence.text = md.render(linkedText);

  return newSentence;
}

function preprocessDoc(doc) {
  var newDoc = {};
  for (var key in doc) {
    newDoc[key] = doc[key];
  }

  newDoc.sections = doc.sections.map(function (section) {
    var newSection = {};
    for (var key in section) {
      newSection[key] = section[key];
    }

    newSection.paragraphs = section.paragraphs.map(function (paragraph) {
      var newParagraph = {};
      for (var key in paragraph) {
        newParagraph[key] = paragraph[key];
      }

      newParagraph.sentences = paragraph.sentences.map(function (sentence) {
        return applyLinks(sentence);
      });

      return newParagraph;
    });


    newSection.templates = section.templates.map(template => ({
      template: template.template,
      prop: JSON.stringify(template)
    }))

    newSection.infoboxes = JSON.stringify(section.infoboxes)

    return newSection;
  });

  return newDoc;
}


module.exports = extractWikimedia



wikimedia mustache template?


In [None]:
<h1>{{title}}</h1>
<p><strong>Page ID:</strong> {{pageID}}</p>

{{#categories.length}}
<p><strong>Categories:</strong> {{#categories}}<span>{{.}}</span> {{/categories}}</p>
{{/categories.length}}

{{#sections}}
<h{{depth}}>{{title}}</h{{depth}}>

{{#paragraphs}}
<p>
  {{#sentences}}
  {{#text}}
  {{{.}}}
  {{/text}}
  {{/sentences}}
</p>
{{/paragraphs}}

{{#templates.length}}
<h4>Templates</h4>
<ul>
  {{#templates}}
  <li>{{{template}}}: {{{prop}}}</li>
  {{/templates}}
</ul>
{{/templates.length}}

{{#infoboxes}}
<h4>Infoboxes</h4>
{{{infoboxes}}}
{{/infoboxes}}

{{#references.length}}
<h4>References</h4>
<ul>
  {{#references}}
  <li>{{type}}: {{title}}</li>
  {{/references}}
</ul>
{{/references.length}}

{{/sections}}


wikimedia service?

ROUTE[] = /wiki/:article

ROUTE[] = /wiki

ROOT = true

DEFAULT = true


In [None]:
const readWikimedia = importer.import('list wikimedia articles')
const levSearch = importer.import('search levenshtein distance')
const extractWikimedia = importer.import('wikimedia-page.html')

async function searchWikimedia(article) {
  let searchResults = await readWikimedia(article)

  if (searchResults.length === 0) {
    throw new Error('Page not found: ' + article)
  }

  let sorted = levSearch(searchResults, { keys: ['title'] }, article)

  let content = extractWikimedia(sorted[0].offset, sorted[0].length, sorted[0].title)

  return content
}

module.exports = searchWikimedia



wikiemedia clone index?


In [None]:
<!DOCTYPE html>
<html lang="en">

<head>
  <meta charset="UTF-8" />
  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
  <title>Wikiflip</title>
  <style>
    body {
      margin: 0;
      padding: 0;
      font-family: system-ui, sans-serif;
      background: #f9f9f9;
      color: #333;
    }

    header {
      background: #4b8bbe;
      color: white;
      padding: 2rem 1rem;
      text-align: center;
      box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
    }

    header h1 {
      font-size: 2.5rem;
      margin: 0 0 0.5rem;
    }

    header p {
      margin: 0;
      font-size: 1rem;
      opacity: 0.9;
    }

    #search-box {
      margin: 2rem auto;
      max-width: 600px;
      display: flex;
      justify-content: center;
    }

    #search-box input {
      width: 100%;
      padding: 0.75rem 1rem;
      font-size: 1.2rem;
      border: 2px solid #ccc;
      border-radius: 8px;
      outline: none;
      transition: 0.3s border;
    }

    #search-box input:focus {
      border-color: #4b8bbe;
    }

    #main-content {
      max-width: 800px;
      margin: 2rem auto;
      padding: 1rem;
      background: white;
      border-radius: 12px;
      box-shadow: 0 0 12px rgba(0, 0, 0, 0.05);
    }

    .infobox {
      margin-top: 2rem;
      padding: 1rem;
      border-left: 4px solid #4b8bbe;
      background: #eef6ff;
    }

    details summary {
      cursor: pointer;
      font-weight: bold;
    }

    a {
      color: #2970b8;
      text-decoration: none;
    }

    a:hover {
      text-decoration: underline;
    }
  </style>
</head>

<body>

  <header>
    <h1>Wikiflip</h1>
    <p>The slightly off-brand, slightly cooler encyclopedia clone</p>
  </header>

  <form id="search-box" method="get" action="/wiki">
    <input name="article" type="text" placeholder="Search articles...">
  </form>

  <div id="main-content">
    <!-- Mustache-rendered content goes here -->
    {{{ content }}}

    <div class="infobox">
      <details open>
        <summary>🧠 Infobox Decoder</summary>
        <div id="infobox-area">
          <!-- Dynamically rendered infobox properties here -->
          <p>No infoboxes loaded yet.</p>
        </div>
      </details>
    </div>
  </div>

</body>

</html>