diff --git a/Cargo.lock b/Cargo.lock index 5acffd0fac3baa..afc3744abd1cb6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6,7 +6,6 @@ version = 4 name = "_base64" version = "0.1.0" dependencies = [ - "base64", "cpython-sys", ] @@ -19,12 +18,6 @@ dependencies = [ "memchr", ] -[[package]] -name = "base64" -version = "0.22.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" - [[package]] name = "bindgen" version = "0.72.1" diff --git a/Modules/_base64/Cargo.toml b/Modules/_base64/Cargo.toml index 038ec4bb02eb02..0810b787ab2773 100644 --- a/Modules/_base64/Cargo.toml +++ b/Modules/_base64/Cargo.toml @@ -4,9 +4,8 @@ version = "0.1.0" edition = "2024" [dependencies] -base64 = "0.22.1" cpython-sys ={ path = "../cpython-sys" } [lib] name = "_base64" -crate-type = ["staticlib"] \ No newline at end of file +crate-type = ["staticlib"] diff --git a/Modules/_base64/src/lib.rs b/Modules/_base64/src/lib.rs index f9c122314a3a32..330e00de3d2525 100644 --- a/Modules/_base64/src/lib.rs +++ b/Modules/_base64/src/lib.rs @@ -1,39 +1,183 @@ use std::cell::UnsafeCell; - -use std::ffi::CStr; -use std::ffi::CString; -use std::ffi::c_char; -use std::ffi::c_int; -use std::ffi::c_void; +use std::ffi::{c_char, c_int, c_void}; +use std::mem::MaybeUninit; +use std::ptr; +use std::slice; use cpython_sys::METH_FASTCALL; -use cpython_sys::Py_ssize_t; use cpython_sys::PyBytes_AsString; -use cpython_sys::PyBytes_FromString; +use cpython_sys::PyBytes_FromStringAndSize; +use cpython_sys::PyBuffer_Release; use cpython_sys::PyMethodDef; use cpython_sys::PyMethodDefFuncPointer; use cpython_sys::PyModuleDef; use cpython_sys::PyModuleDef_HEAD_INIT; use cpython_sys::PyModuleDef_Init; use cpython_sys::PyObject; +use cpython_sys::PyObject_GetBuffer; +use cpython_sys::Py_DecRef; +use cpython_sys::PyErr_NoMemory; +use cpython_sys::PyErr_SetString; +use cpython_sys::PyExc_TypeError; +use cpython_sys::Py_buffer; +use cpython_sys::Py_ssize_t; + +const PYBUF_SIMPLE: c_int = 0; +const PAD_BYTE: u8 = b'='; +const ENCODE_TABLE: [u8; 64] = + *b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + +#[inline] +fn encoded_output_len(input_len: usize) -> Option { + input_len + .checked_add(2) + .map(|n| n / 3) + .and_then(|blocks| blocks.checked_mul(4)) +} + +#[inline] +fn encode_into(input: &[u8], output: &mut [u8]) -> usize { + let mut src_index = 0; + let mut dst_index = 0; + let len = input.len(); + + while src_index + 3 <= len { + let chunk = (u32::from(input[src_index]) << 16) + | (u32::from(input[src_index + 1]) << 8) + | u32::from(input[src_index + 2]); + output[dst_index] = ENCODE_TABLE[((chunk >> 18) & 0x3f) as usize]; + output[dst_index + 1] = ENCODE_TABLE[((chunk >> 12) & 0x3f) as usize]; + output[dst_index + 2] = ENCODE_TABLE[((chunk >> 6) & 0x3f) as usize]; + output[dst_index + 3] = ENCODE_TABLE[(chunk & 0x3f) as usize]; + src_index += 3; + dst_index += 4; + } -use base64::prelude::*; + match len - src_index { + 0 => {} + 1 => { + let chunk = u32::from(input[src_index]) << 16; + output[dst_index] = ENCODE_TABLE[((chunk >> 18) & 0x3f) as usize]; + output[dst_index + 1] = ENCODE_TABLE[((chunk >> 12) & 0x3f) as usize]; + output[dst_index + 2] = PAD_BYTE; + output[dst_index + 3] = PAD_BYTE; + dst_index += 4; + } + 2 => { + let chunk = (u32::from(input[src_index]) << 16) + | (u32::from(input[src_index + 1]) << 8); + output[dst_index] = ENCODE_TABLE[((chunk >> 18) & 0x3f) as usize]; + output[dst_index + 1] = ENCODE_TABLE[((chunk >> 12) & 0x3f) as usize]; + output[dst_index + 2] = ENCODE_TABLE[((chunk >> 6) & 0x3f) as usize]; + output[dst_index + 3] = PAD_BYTE; + dst_index += 4; + } + _ => unreachable!("len - src_index cannot exceed 2"), + } + + dst_index +} + +struct BorrowedBuffer { + view: Py_buffer, +} + +impl BorrowedBuffer { + unsafe fn from_object(obj: *mut PyObject) -> Result { + let mut view = MaybeUninit::::uninit(); + if unsafe { PyObject_GetBuffer(obj, view.as_mut_ptr(), PYBUF_SIMPLE) } != 0 { + return Err(()); + } + Ok(Self { + view: unsafe { view.assume_init() }, + }) + } + + fn len(&self) -> Py_ssize_t { + self.view.len + } + + fn as_ptr(&self) -> *const u8 { + self.view.buf.cast::() as *const u8 + } +} + +impl Drop for BorrowedBuffer { + fn drop(&mut self) { + unsafe { + PyBuffer_Release(&mut self.view); + } + } +} #[unsafe(no_mangle)] -pub unsafe extern "C" fn standard_b64encode( +pub unsafe extern "C" fn b64encode( _module: *mut PyObject, args: *mut *mut PyObject, - _nargs: Py_ssize_t, + nargs: Py_ssize_t, ) -> *mut PyObject { - let buff = unsafe { *args }; - let ptr = unsafe { PyBytes_AsString(buff) }; - if ptr.is_null() { - // Error handling omitted for now - unimplemented!("Error handling goes here...") + if nargs != 1 { + unsafe { + PyErr_SetString( + PyExc_TypeError, + c"b64encode() takes exactly one argument".as_ptr(), + ); + } + return ptr::null_mut(); + } + + let source = unsafe { *args }; + let buffer = match unsafe { BorrowedBuffer::from_object(source) } { + Ok(buf) => buf, + Err(_) => return ptr::null_mut(), + }; + + let view_len = buffer.len(); + if view_len < 0 { + unsafe { + PyErr_SetString( + PyExc_TypeError, + c"b64encode() argument has negative length".as_ptr(), + ); + } + return ptr::null_mut(); } - let cdata = unsafe { CStr::from_ptr(ptr) }; - let res = BASE64_STANDARD.encode(cdata.to_bytes()); - unsafe { PyBytes_FromString(CString::new(res).unwrap().as_ptr()) } + let input_len = view_len as usize; + let input = unsafe { slice::from_raw_parts(buffer.as_ptr(), input_len) }; + + let Some(output_len) = encoded_output_len(input_len) else { + unsafe { + PyErr_NoMemory(); + } + return ptr::null_mut(); + }; + + if output_len > isize::MAX as usize { + unsafe { + PyErr_NoMemory(); + } + return ptr::null_mut(); + } + + let result = unsafe { + PyBytes_FromStringAndSize(ptr::null(), output_len as Py_ssize_t) + }; + if result.is_null() { + return ptr::null_mut(); + } + + let dest_ptr = unsafe { PyBytes_AsString(result) }; + if dest_ptr.is_null() { + unsafe { + Py_DecRef(result); + } + return ptr::null_mut(); + } + let dest = unsafe { slice::from_raw_parts_mut(dest_ptr.cast::(), output_len) }; + + let written = encode_into(input, dest); + debug_assert_eq!(written, output_len); + result } #[unsafe(no_mangle)] @@ -62,9 +206,9 @@ unsafe impl Sync for ModuleDef {} pub static _BASE64_MODULE_METHODS: [PyMethodDef; 2] = { [ PyMethodDef { - ml_name: c"standard_b64encode".as_ptr() as *mut c_char, + ml_name: c"b64encode".as_ptr() as *mut c_char, ml_meth: PyMethodDefFuncPointer { - PyCFunctionFast: standard_b64encode, + PyCFunctionFast: b64encode, }, ml_flags: METH_FASTCALL, ml_doc: c"Demo for the _base64 module".as_ptr() as *mut c_char, diff --git a/Modules/cpython-sys/build.rs b/Modules/cpython-sys/build.rs index b55f03c5b066b5..c45ccc0b2684c7 100644 --- a/Modules/cpython-sys/build.rs +++ b/Modules/cpython-sys/build.rs @@ -8,6 +8,7 @@ fn main() { .header("wrapper.h") .clang_arg(format!("-I{}", srcdir.as_os_str().to_str().unwrap())) .clang_arg(format!("-I{}/Include", srcdir.as_os_str().to_str().unwrap())) + .clang_arg(format!("-I{}/Include/internal", srcdir.as_os_str().to_str().unwrap())) .allowlist_function("Py.*") .allowlist_function("_Py.*") .allowlist_type("Py.*") @@ -24,4 +25,4 @@ fn main() { bindings .write_to_file(out_path.join("bindings.rs")) .expect("Couldn't write bindings!"); -} \ No newline at end of file +} diff --git a/Modules/makesetup b/Modules/makesetup index b701a61a548bae..773de9117f4a22 100755 --- a/Modules/makesetup +++ b/Modules/makesetup @@ -83,6 +83,8 @@ case $makepre in '') makepre=Makefile.pre;; esac +UNAME_SYSTEM=`uname -s 2>/dev/null || echo unknown` + # Newline for sed i and a commands NL='\ ' @@ -289,7 +291,14 @@ sed -e 's/[ ]*#.*//' -e '/^[ ]*$/d' | echo "$rule" >>$rulesf for mod in $mods do - custom_ldflags="-Wl,--defsym=PyInit_$mod=PyInit_$mod" + case $UNAME_SYSTEM in + Darwin*) + custom_ldflags="$custom_ldflags -Wl,-u,_PyInit_$mod" + ;; + *) + custom_ldflags="$custom_ldflags -Wl,--defsym=PyInit_$mod=PyInit_$mod" + ;; + esac done fi case $doconfig in diff --git a/Python/remote_debug.h b/Python/remote_debug.h index e7676013197fa9..eac7f2aee132eb 100644 --- a/Python/remote_debug.h +++ b/Python/remote_debug.h @@ -29,6 +29,7 @@ extern "C" { #include "pyconfig.h" #include "internal/pycore_ceval.h" +#include "internal/pycore_debug_offsets.h" #ifdef __linux__ # include diff --git a/Tools/build/regen-rust-wrapper-h.py b/Tools/build/regen-rust-wrapper-h.py index 763bf1133d4ecb..998d808ea40ac2 100644 --- a/Tools/build/regen-rust-wrapper-h.py +++ b/Tools/build/regen-rust-wrapper-h.py @@ -3,8 +3,11 @@ from pathlib import Path ROOT = Path(__file__).resolve().parents[2] -INCLUDE = ROOT / "Include" WRAPPER_H = ROOT / "Modules" / "cpython-sys" / "wrapper.h" +SKIP_PREFIXES = ("cpython/",) +SKIP_EXACT = { + "internal/pycore_crossinterp_data_registry.h", +} def normalize_path(header: str) -> str: return re.sub(r'(:?\.\/)(:?Include\/)?', '', header) @@ -18,7 +21,13 @@ def main(output: str = WRAPPER_H) -> None: f.write("#include \"Modules/expat/expat.h\"\n") for header in headers.split(): normalized_path = normalize_path(header) + if normalized_path.startswith(SKIP_PREFIXES): + continue + if normalized_path in SKIP_EXACT: + continue f.write(f"#include \"{normalized_path}\"\n") + if normalized_path == "Python/remote_debug.h": + f.write("#undef UNUSED\n") if __name__ == "__main__": import sys