Releases: gremllm/lib
Releases · gremllm/lib
v0.0.2
What's Changed
- feat: ensure ffi converts to markdown by @TheOutdoorProgrammer in #7
Full Changelog: v0.0.1...v0.0.2
LLM Schema CGO Binaries
This release includes cross-platform CGO shared libraries that can be used from any language supporting C FFI.
Available Libraries:
- Linux AMD64 (
.so) - Linux ARM64 (
.so) - macOS AMD64 (
.dylib) - macOS ARM64 (
.dylib) - Windows AMD64 (
.dll)
Usage Examples
Python:
#!/usr/bin/env python3
"""
Test script to verify the CGO library works via Python FFI
"""
from ctypes import cdll, c_char_p, POINTER
import os
# Load the shared library
lib_path = os.path.join(os.path.dirname(__file__), '..', '..', 'build', 'libschema.so')
lib = cdll.LoadLibrary(lib_path)
# Set up function signatures
from ctypes import c_int
lib.Convert.argtypes = [c_char_p, POINTER(c_char_p), c_int]
lib.Convert.restype = c_char_p
# Test HTML
test_html = b"""<!DOCTYPE html>
<html>
<head><title>Test</title></head>
<body>
<header><h1>This should be stripped</h1></header>
<nav><a href="/">Home</a></nav>
<main><p>This content should remain</p></main>
<footer><p>This should also be stripped</p></footer>
</body>
</html>"""
print("Testing Convert()...")
strings = []
arr = (c_char_p * len(strings))(*[s.encode('utf-8') for s in strings])
result = lib.Convert(test_html, arr, len(strings))
result_str = result.decode('utf-8')
print("\nInput HTML:")
print(test_html.decode('utf-8'))
print("\nOutput HTML:")
print(result_str)
failures = 0
# Check that header and footer are stripped
if b'<header>' not in result_str.encode():
print("\n✓ Header tag successfully stripped")
else:
print("\n✗ Header tag still present")
failures += 1
if b'<footer>' not in result_str.encode():
print("✓ Footer tag successfully stripped")
else:
print("✗ Footer tag still present")
failures += 1
if b'This content should remain' in result_str.encode():
print("✓ Main content preserved")
else:
print("✗ Main content missing")
failures += 1
print("\n" + "="*50)
if failures > 0:
print(f"{failures} test(s) failed")
exit(1)
print("CGO library is working correctly!")JavaScript (Node.js):
const koffi = require('koffi')
// Load the shared library
const lib = koffi.load('./build/libschema.so')
// Define function signatures - using char* for auto string conversion
// Second param is char** (array of strings), third is int (array length)
const Convert = lib.func('char* Convert(char* htmlInput, char** elementsToStrip, int elementsLen)')
// Note: Not using Free() due to koffi memory management complexity
// In production, you'd need a proper memory management strategy
// Test HTML
const htmlInput = `<!DOCTYPE html>
<html>
<head><title>Test</title></head>
<body>
<header><h1>This should be stripped</h1></header>
<nav><a href="/">Home</a></nav>
<main><p>This content should remain</p></main>
<footer><p>This should also be stripped</p></footer>
</body>
</html>`
console.log('Testing Convert()...\n')
console.log('Input HTML:')
console.log(htmlInput)
// Call with empty array (use defaults)
const elementsToStrip = []
const result = Convert(htmlInput, elementsToStrip, elementsToStrip.length)
console.log('\nOutput HTML:')
console.log(result)
let errors = 0;
// Basic checks
if (!result.includes('<header>')) {
console.log('\n✓ Header tag successfully stripped')
} else {
console.log('\n✗ Header tag still present')
errors++;
}
if (!result.includes('<footer>')) {
console.log('✓ Footer tag successfully stripped')
} else {
console.log('✗ Footer tag still present')
errors++;
}
if (result.includes('This content should remain')) {
console.log('✓ Main content preserved')
} else {
console.log('✗ Main content missing')
errors++;
}
if (errors > 0) {
console.log(`\n${errors} errors found`)
process.exit(1)
}
console.log('\nCGO library is working correctly!')v0.0.1
What's Changed
- feat: choose to go with CGO by @TheOutdoorProgrammer in #1
- feat: initial commit by @TheOutdoorProgrammer in #2
- feat: rename the repo from schema to lib by @TheOutdoorProgrammer in #3
- feat: remove nodes from html by default and allow data-llm to choose whether to remove nodes forcibly or not by @TheOutdoorProgrammer in #4
- added script converter by @JakeNesler in #5
- added new converter + middleware config for cache by @JakeNesler in #6
New Contributors
- @TheOutdoorProgrammer made their first contribution in #1
- @JakeNesler made their first contribution in #5
Full Changelog: https://github.com/gremllm/lib/commits/v0.0.1
LLM Schema CGO Binaries
This release includes cross-platform CGO shared libraries that can be used from any language supporting C FFI.
Available Libraries:
- Linux AMD64 (
.so) - Linux ARM64 (
.so) - macOS AMD64 (
.dylib) - macOS ARM64 (
.dylib) - Windows AMD64 (
.dll)
Usage Examples
Python:
#!/usr/bin/env python3
"""
Test script to verify the CGO library works via Python FFI
"""
from ctypes import cdll, c_char_p, POINTER
import os
# Load the shared library
lib_path = os.path.join(os.path.dirname(__file__), '..', '..', 'build', 'libschema.so')
lib = cdll.LoadLibrary(lib_path)
# Set up function signatures
from ctypes import c_int
lib.Convert.argtypes = [c_char_p, POINTER(c_char_p), c_int]
lib.Convert.restype = c_char_p
# Test HTML
test_html = b"""<!DOCTYPE html>
<html>
<head><title>Test</title></head>
<body>
<header><h1>This should be stripped</h1></header>
<nav><a href="/">Home</a></nav>
<main><p>This content should remain</p></main>
<footer><p>This should also be stripped</p></footer>
</body>
</html>"""
print("Testing Convert()...")
strings = []
arr = (c_char_p * len(strings))(*[s.encode('utf-8') for s in strings])
result = lib.Convert(test_html, arr, len(strings))
result_str = result.decode('utf-8')
print("\nInput HTML:")
print(test_html.decode('utf-8'))
print("\nOutput HTML:")
print(result_str)
failures = 0
# Check that header and footer are stripped
if b'<header>' not in result_str.encode():
print("\n✓ Header tag successfully stripped")
else:
print("\n✗ Header tag still present")
failures += 1
if b'<footer>' not in result_str.encode():
print("✓ Footer tag successfully stripped")
else:
print("✗ Footer tag still present")
failures += 1
if b'<main>' in result_str.encode():
print("✓ Main content preserved")
else:
print("✗ Main content missing")
failures += 1
print("\n" + "="*50)
if failures > 0:
print(f"{failures} test(s) failed")
exit(1)
print("CGO library is working correctly!")JavaScript (Node.js):
const koffi = require('koffi')
// Load the shared library
const lib = koffi.load('./build/libschema.so')
// Define function signatures - using char* for auto string conversion
// Second param is char** (array of strings), third is int (array length)
const Convert = lib.func('char* Convert(char* htmlInput, char** elementsToStrip, int elementsLen)')
// Note: Not using Free() due to koffi memory management complexity
// In production, you'd need a proper memory management strategy
// Test HTML
const htmlInput = `<!DOCTYPE html>
<html>
<head><title>Test</title></head>
<body>
<header><h1>This should be stripped</h1></header>
<nav><a href="/">Home</a></nav>
<main><p>This content should remain</p></main>
<footer><p>This should also be stripped</p></footer>
</body>
</html>`
console.log('Testing Convert()...\n')
console.log('Input HTML:')
console.log(htmlInput)
// Call with empty array (use defaults)
const elementsToStrip = []
const result = Convert(htmlInput, elementsToStrip, elementsToStrip.length)
console.log('\nOutput HTML:')
console.log(result)
let errors = 0;
// Basic checks
if (!result.includes('<header>')) {
console.log('\n✓ Header tag successfully stripped')
} else {
console.log('\n✗ Header tag still present')
errors++;
}
if (!result.includes('<footer>')) {
console.log('✓ Footer tag successfully stripped')
} else {
console.log('✗ Footer tag still present')
errors++;
}
if (result.includes('<main>')) {
console.log('✓ Main content preserved')
} else {
console.log('✗ Main content missing')
errors++;
}
if (errors > 0) {
console.log(`\n${errors} errors found`)
process.exit(1)
}
console.log('\nCGO library is working correctly!')