Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 19 additions & 2 deletions src/notebooks/deepnote/deepnoteDataConverter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -243,9 +243,9 @@ export class DeepnoteDataConverter {
} else if (item.mime === 'application/json') {
data['application/json'] = JSON.parse(new TextDecoder().decode(item.data));
} else if (item.mime === 'image/png') {
data['image/png'] = btoa(String.fromCharCode(...new Uint8Array(item.data)));
data['image/png'] = this.uint8ArrayToBase64(item.data);
} else if (item.mime === 'image/jpeg') {
data['image/jpeg'] = btoa(String.fromCharCode(...new Uint8Array(item.data)));
data['image/jpeg'] = this.uint8ArrayToBase64(item.data);
} else if (item.mime === 'application/vnd.deepnote.dataframe.v3+json') {
data['application/vnd.deepnote.dataframe.v3+json'] = JSON.parse(
new TextDecoder().decode(item.data)
Expand Down Expand Up @@ -523,4 +523,21 @@ export class DeepnoteDataConverter {
return new NotebookCellOutput([]);
});
}

/**
* Converts a Uint8Array to a base64 string without causing stack overflow.
* Uses chunked processing to avoid call stack limits with large arrays.
*/
private uint8ArrayToBase64(data: Uint8Array): string {
// Process in chunks to avoid stack overflow from spread operator
const chunkSize = 8192;
let binaryString = '';

for (let i = 0; i < data.length; i += chunkSize) {
const chunk = data.subarray(i, Math.min(i + chunkSize, data.length));
binaryString += String.fromCharCode(...chunk);
}

return btoa(binaryString);
}
}
49 changes: 48 additions & 1 deletion src/notebooks/deepnote/deepnoteDataConverter.unit.test.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { assert } from 'chai';
import { NotebookCellKind, type NotebookCellData } from 'vscode';
import { NotebookCellKind, NotebookCellOutput, NotebookCellOutputItem, type NotebookCellData } from 'vscode';

import { DeepnoteDataConverter } from './deepnoteDataConverter';
import type { DeepnoteBlock, DeepnoteOutput } from '../../platform/deepnote/deepnoteTypes';
Expand Down Expand Up @@ -797,4 +797,51 @@ suite('DeepnoteDataConverter', () => {
assert.deepStrictEqual(roundTripBlocks, originalBlocks);
});
});

suite('large data handling', () => {
test('should handle cells with large image data without stack overflow', () => {
// Create a large image data buffer (1MB) - this reproduces the bug where
// saving fails with "Maximum call stack size exceeded" because
// btoa(String.fromCharCode(...largeArray)) causes stack overflow
const largeImageSize = 1000000; // 1MB
const largeImageData = new Uint8Array(largeImageSize);

for (let i = 0; i < largeImageSize; i++) {
largeImageData[i] = i % 256;
}

const cellWithLargeImage: NotebookCellData = {
kind: NotebookCellKind.Code,
value: 'display_image()',
languageId: 'python',
metadata: {
__deepnotePocket: {
type: 'code',
sortingKey: 'a0'
},
id: 'block-1'
},
outputs: [
new NotebookCellOutput([new NotebookCellOutputItem(largeImageData, 'image/png')], {
cellId: 'block-1',
cellIndex: 0
})
]
};

// This should not throw - it should handle large data gracefully
const blocks = converter.convertCellsToBlocks([cellWithLargeImage]);

assert.strictEqual(blocks.length, 1);
assert.strictEqual(blocks[0].type, 'code');
assert.isDefined(blocks[0].outputs);
assert.strictEqual(blocks[0].outputs!.length, 1);

// Verify the output data is properly base64 encoded
const output = blocks[0].outputs![0] as { data?: Record<string, unknown> };
assert.isDefined(output.data);
assert.isDefined(output.data!['image/png']);
assert.isString(output.data!['image/png']);
});
});
});
69 changes: 58 additions & 11 deletions src/notebooks/deepnote/deepnoteSerializer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,43 @@ import { l10n, workspace, type CancellationToken, type NotebookData, type Notebo
import { logger } from '../../platform/logging';
import { IDeepnoteNotebookManager } from '../types';
import { DeepnoteDataConverter } from './deepnoteDataConverter';
import type { DeepnoteFile, DeepnoteNotebook } from '../../platform/deepnote/deepnoteTypes';
import type { DeepnoteBlock, DeepnoteFile, DeepnoteNotebook } from '../../platform/deepnote/deepnoteTypes';

export { DeepnoteBlock, DeepnoteNotebook, DeepnoteOutput, DeepnoteFile } from '../../platform/deepnote/deepnoteTypes';

/**
* Deep clones an object while removing circular references.
* Uses a recursion stack pattern to only drop true cycles, preserving shared references.
*/
function cloneWithoutCircularRefs<T>(obj: T, seen = new WeakSet<object>()): T {
if (obj === null || typeof obj !== 'object') {
return obj;
}

if (seen.has(obj as object)) {
// True circular reference on the current path - drop it
return undefined as T;
}

seen.add(obj as object);

try {
if (Array.isArray(obj)) {
return obj.map((item) => cloneWithoutCircularRefs(item, seen)) as T;
}

const clone: Record<string, unknown> = {};

for (const key of Object.keys(obj as Record<string, unknown>)) {
clone[key] = cloneWithoutCircularRefs((obj as Record<string, unknown>)[key], seen);
}

return clone as T;
} finally {
seen.delete(obj as object);
}
}

/**
* Serializer for converting between Deepnote YAML files and VS Code notebook format.
* Handles reading/writing .deepnote files and manages project state persistence.
Expand Down Expand Up @@ -108,49 +141,63 @@ export class DeepnoteNotebookSerializer implements NotebookSerializer {
}

try {
logger.debug('SerializeNotebook: Starting serialization');

const projectId = data.metadata?.deepnoteProjectId;

if (!projectId) {
throw new Error('Missing Deepnote project ID in notebook metadata');
}

logger.debug(`SerializeNotebook: Project ID: ${projectId}`);

const originalProject = this.notebookManager.getOriginalProject(projectId) as DeepnoteFile | undefined;

if (!originalProject) {
throw new Error('Original Deepnote project not found. Cannot save changes.');
}

logger.debug('SerializeNotebook: Got original project');

const notebookId =
data.metadata?.deepnoteNotebookId || this.notebookManager.getTheSelectedNotebookForAProject(projectId);

if (!notebookId) {
throw new Error('Cannot determine which notebook to save');
}

const notebookIndex = originalProject.project.notebooks.findIndex(
(nb: { id: string }) => nb.id === notebookId
);
logger.debug(`SerializeNotebook: Notebook ID: ${notebookId}`);

const notebook = originalProject.project.notebooks.find((nb: { id: string }) => nb.id === notebookId);

if (notebookIndex === -1) {
if (!notebook) {
throw new Error(`Notebook with ID ${notebookId} not found in project`);
}

const updatedProject = JSON.parse(JSON.stringify(originalProject)) as DeepnoteFile;
logger.debug(`SerializeNotebook: Found notebook, converting ${data.cells.length} cells to blocks`);

// Clone blocks while removing circular references that may have been
// introduced by VS Code's notebook cell/output handling
const blocks = this.converter.convertCellsToBlocks(data.cells);

logger.debug(`SerializeNotebook: Converted to ${blocks.length} blocks, now cloning without circular refs`);

notebook.blocks = cloneWithoutCircularRefs<DeepnoteBlock[]>(blocks);

const updatedBlocks = this.converter.convertCellsToBlocks(data.cells);
logger.debug('SerializeNotebook: Cloned blocks, updating modifiedAt');

updatedProject.project.notebooks[notebookIndex].blocks = updatedBlocks;
originalProject.metadata.modifiedAt = new Date().toISOString();

updatedProject.metadata.modifiedAt = new Date().toISOString();
logger.debug('SerializeNotebook: Starting yaml.dump');

const yamlString = yaml.dump(updatedProject, {
const yamlString = yaml.dump(originalProject, {
indent: 2,
lineWidth: -1,
noRefs: true,
sortKeys: false
});

this.notebookManager.storeOriginalProject(projectId, updatedProject, notebookId);
logger.debug(`SerializeNotebook: yaml.dump complete, ${yamlString.length} chars`);

return new TextEncoder().encode(yamlString);
} catch (error) {
Expand Down
71 changes: 71 additions & 0 deletions src/notebooks/deepnote/deepnoteSerializer.unit.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,77 @@ project:
});
});

suite('circular reference handling', () => {
test('should serialize notebook with circular references in output metadata', async () => {
// Create output with circular reference - this reproduces the bug
// where saving fails with "Maximum call stack size exceeded"
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const circularOutput: any = {
output_type: 'execute_result',
execution_count: 1,
data: { 'text/plain': 'test' },
metadata: {}
};
// Create circular reference
circularOutput.metadata.self = circularOutput;

const projectWithCircularRef: DeepnoteFile = {
version: '1.0',
metadata: {
createdAt: '2023-01-01T00:00:00Z',
modifiedAt: '2023-01-02T00:00:00Z'
},
project: {
id: 'project-circular',
name: 'Circular Test',
notebooks: [
{
id: 'notebook-1',
name: 'Test Notebook',
blocks: [
{
blockGroup: 'group-1',
id: 'block-1',
content: 'test',
sortingKey: 'a0',
type: 'code',
outputs: [circularOutput]
}
],
executionMode: 'block',
isModule: false
}
],
settings: {}
}
};

manager.storeOriginalProject('project-circular', projectWithCircularRef, 'notebook-1');

const notebookData = {
cells: [
{
kind: 2, // NotebookCellKind.Code
value: 'test',
languageId: 'python',
metadata: {}
}
],
metadata: {
deepnoteProjectId: 'project-circular',
deepnoteNotebookId: 'notebook-1'
}
};

// Should successfully serialize even with circular references
const result = await serializer.serializeNotebook(notebookData as any, {} as any);

assert.instanceOf(result, Uint8Array);
const yamlString = new TextDecoder().decode(result);
assert.include(yamlString, 'project-circular');
});
});

suite('integration scenarios', () => {
test('should maintain independence between serializer instances', () => {
const manager1 = new DeepnoteNotebookManager();
Expand Down