Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: allow disabling storage persistence #1539

Merged
merged 7 commits into from Sep 15, 2022
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
11 changes: 11 additions & 0 deletions packages/memory-storage/src/memory-storage.ts
Expand Up @@ -26,6 +26,14 @@ export interface MemoryStorageOptions {
* @default process.env.DEBUG?.includes('*') ?? process.env.DEBUG?.includes('crawlee:memory-storage') ?? false
*/
writeMetadata?: boolean;

/**
* Whether the memory storage should also write its stored content to the disk.
*
* You can also disable this by setting the `CRAWLEE_NO_WRITE_TO_DISK` environment variable.
B4nan marked this conversation as resolved.
Show resolved Hide resolved
* @default true
*/
writeFilesToDisk?: boolean;
}

export class MemoryStorage implements storage.StorageClient {
Expand All @@ -34,6 +42,7 @@ export class MemoryStorage implements storage.StorageClient {
readonly keyValueStoresDirectory: string;
readonly requestQueuesDirectory: string;
readonly writeMetadata: boolean;
readonly writeFilesToDisk: boolean;

readonly keyValueStoresHandled: KeyValueStoreClient[] = [];
readonly datasetClientsHandled: DatasetClient[] = [];
Expand All @@ -43,6 +52,7 @@ export class MemoryStorage implements storage.StorageClient {
s.object({
localDataDirectory: s.string.optional,
writeMetadata: s.boolean.optional,
writeFilesToDisk: s.boolean.optional,
}).parse(options);

// v3.0.0 used `crawlee_storage` as the default, we changed this in v3.0.1 to just `storage`,
Expand All @@ -61,6 +71,7 @@ export class MemoryStorage implements storage.StorageClient {
this.keyValueStoresDirectory = resolve(this.localDataDirectory, 'key_value_stores');
this.requestQueuesDirectory = resolve(this.localDataDirectory, 'request_queues');
this.writeMetadata = options.writeMetadata ?? process.env.DEBUG?.includes('*') ?? process.env.DEBUG?.includes('crawlee:memory-storage') ?? false;
this.writeFilesToDisk = process.env.CRAWLEE_NO_WRITE_TO_DISK ? false : options.writeFilesToDisk ?? true;

initWorkerIfNeeded();
}
Expand Down
Expand Up @@ -57,6 +57,7 @@ export class DatasetCollectionClient implements storage.DatasetCollectionClient
id: datasetInfo.name ?? datasetInfo.id,
data: datasetInfo,
writeMetadata: this.client.writeMetadata,
writeFilesToDisk: this.client.writeFilesToDisk,
});

return datasetInfo;
Expand Down
2 changes: 2 additions & 0 deletions packages/memory-storage/src/resource-clients/dataset.ts
Expand Up @@ -188,6 +188,7 @@ export class DatasetClient<Data extends Dictionary = Dictionary> extends BaseCli
entityDirectory: existingStoreById.datasetDirectory,
id: existingStoreById.name ?? existingStoreById.id,
writeMetadata: this.client.writeMetadata,
writeFilesToDisk: this.client.writeFilesToDisk,
});
}

Expand Down Expand Up @@ -260,6 +261,7 @@ export class DatasetClient<Data extends Dictionary = Dictionary> extends BaseCli
entityDirectory: this.datasetDirectory,
id: this.name ?? this.id,
writeMetadata: this.client.writeMetadata,
writeFilesToDisk: this.client.writeFilesToDisk,
});
}
}
Expand Up @@ -57,6 +57,7 @@ export class KeyValueStoreCollectionClient implements storage.KeyValueStoreColle
id: kvStoreInfo.name ?? kvStoreInfo.id,
data: kvStoreInfo,
writeMetadata: this.client.writeMetadata,
writeFilesToDisk: this.client.writeFilesToDisk,
});

return kvStoreInfo;
Expand Down
Expand Up @@ -276,6 +276,7 @@ export class KeyValueStoreClient extends BaseClient {
entityDirectory: existingStoreById.keyValueStoreDirectory,
id: existingStoreById.name ?? existingStoreById.id,
writeMetadata: this.client.writeMetadata,
writeFilesToDisk: this.client.writeFilesToDisk,
});
}

Expand Down Expand Up @@ -304,6 +305,7 @@ export class KeyValueStoreClient extends BaseClient {
entityDirectory: existingStoreById.keyValueStoreDirectory,
id: existingStoreById.name ?? existingStoreById.id,
writeMetadata: this.client.writeMetadata,
writeFilesToDisk: this.client.writeFilesToDisk,
});
}
}
Expand Down Expand Up @@ -334,6 +336,7 @@ export class KeyValueStoreClient extends BaseClient {
entityDirectory: this.keyValueStoreDirectory,
id: this.name ?? this.id,
writeMetadata: this.client.writeMetadata,
writeFilesToDisk: this.client.writeFilesToDisk,
});
}
}
Expand Up @@ -57,6 +57,7 @@ export class RequestQueueCollectionClient implements storage.RequestQueueCollect
id: queueInfo.name ?? queueInfo.id,
data: queueInfo,
writeMetadata: this.client.writeMetadata,
writeFilesToDisk: this.client.writeFilesToDisk,
});

return queueInfo;
Expand Down
3 changes: 3 additions & 0 deletions packages/memory-storage/src/resource-clients/request-queue.ts
Expand Up @@ -326,6 +326,7 @@ export class RequestQueueClient extends BaseClient implements storage.RequestQue
entityDirectory: existingQueueById.requestQueueDirectory,
id: existingQueueById.name ?? existingQueueById.id,
writeMetadata: existingQueueById.client.writeMetadata,
writeFilesToDisk: existingQueueById.client.writeFilesToDisk,
});
}
}
Expand Down Expand Up @@ -361,6 +362,7 @@ export class RequestQueueClient extends BaseClient implements storage.RequestQue
entityDirectory: this.requestQueueDirectory,
id: this.name ?? this.id,
writeMetadata: this.client.writeMetadata,
writeFilesToDisk: this.client.writeFilesToDisk,
});
}

Expand All @@ -372,6 +374,7 @@ export class RequestQueueClient extends BaseClient implements storage.RequestQue
entityDirectory: this.requestQueueDirectory,
id: this.name ?? this.id,
writeMetadata: this.client.writeMetadata,
writeFilesToDisk: this.client.writeFilesToDisk,
});
}

Expand Down
3 changes: 3 additions & 0 deletions packages/memory-storage/src/utils.ts
Expand Up @@ -90,6 +90,7 @@ interface MetadataUpdate<Type extends EntityType, DataType> {
entityDirectory: string;
data: DataType;
writeMetadata: boolean;
writeFilesToDisk: boolean;
}

interface EntriesUpdate<Type extends EntityType, DataType> {
Expand All @@ -99,6 +100,7 @@ interface EntriesUpdate<Type extends EntityType, DataType> {
entityDirectory: string;
data: DataType;
writeMetadata: boolean;
writeFilesToDisk: boolean;
}

interface EntryDelete<Type extends EntityType> {
Expand All @@ -107,6 +109,7 @@ interface EntryDelete<Type extends EntityType> {
action: 'delete-entry';
entityDirectory: string;
writeMetadata: boolean;
writeFilesToDisk: boolean;
data: {
id: string;
};
Expand Down
5 changes: 5 additions & 0 deletions packages/memory-storage/src/workers/worker-utils.ts
Expand Up @@ -65,6 +65,11 @@ async function updateItems(message: WorkerUpdateEntriesMessage) {
const dir = message.entityDirectory;
await ensureDir(dir);

// Skip writing files to the disk if the client has the option set to false
if (!message.writeFilesToDisk) {
B4nan marked this conversation as resolved.
Show resolved Hide resolved
return;
}

switch (message.entityType) {
case 'requestQueues': {
// Write the entry to the file
Expand Down
11 changes: 11 additions & 0 deletions packages/memory-storage/test/__shared__.ts
@@ -0,0 +1,11 @@
import { access } from 'node:fs/promises';
import { setTimeout } from 'node:timers/promises';

export async function waitTillWrittenToDisk(path: string): Promise<void> {
try {
await access(path);
} catch {
await setTimeout(50);
return waitTillWrittenToDisk(path);
}
}
61 changes: 61 additions & 0 deletions packages/memory-storage/test/no-writing-to-disk.test.ts
@@ -0,0 +1,61 @@
import { readdir, rm } from 'node:fs/promises';
import { resolve } from 'node:path';
import { MemoryStorage } from '@crawlee/memory-storage';
import { waitTillWrittenToDisk } from './__shared__';

describe('writeFilesToDisk option', () => {
const tmpLocation = resolve(__dirname, './tmp/no-writing-to-disk');

afterAll(async () => {
await rm(tmpLocation, { force: true, recursive: true });
});

describe('when false and writeMetadata is also false', () => {
const localDataDirectory = resolve(tmpLocation, './no-metadata');
const storage = new MemoryStorage({
localDataDirectory,
writeFilesToDisk: false,
});

test('creating a key-value pair in a key-value store should not write data to the disk', async () => {
const keyValueStoreInfo = await storage.keyValueStores().getOrCreate();

const keyValueStore = storage.keyValueStore(keyValueStoreInfo.id);
await keyValueStore.setRecord({ key: 'foo', value: 'test' });
const storePath = resolve(storage.keyValueStoresDirectory, `${keyValueStoreInfo.id}`);

await waitTillWrittenToDisk(storePath);

const directoryFiles = await readdir(storePath);

expect(directoryFiles).toHaveLength(0);
});
});

describe('when false and writeMetadata is true', () => {
const localDataDirectory = resolve(tmpLocation, './with-metadata');
const storage = new MemoryStorage({
localDataDirectory,
writeFilesToDisk: false,
writeMetadata: true,
});

test('creating a key-value pair in a key-value store should not write data to the disk, but it should write the __metadata__ file', async () => {
const keyValueStoreInfo = await storage.keyValueStores().getOrCreate();

const keyValueStore = storage.keyValueStore(keyValueStoreInfo.id);
await keyValueStore.setRecord({ key: 'foo', value: 'test' });

const storePath = resolve(storage.keyValueStoresDirectory, `${keyValueStoreInfo.id}`);

await waitTillWrittenToDisk(storePath);

const directoryFiles = await readdir(storePath);

expect(directoryFiles).toHaveLength(1);
expect(directoryFiles).toEqual([
'__metadata__.json',
]);
});
});
});
13 changes: 2 additions & 11 deletions packages/memory-storage/test/write-metadata.test.ts
@@ -1,16 +1,7 @@
import { MemoryStorage } from '@crawlee/memory-storage';
import { access, readdir, rm } from 'node:fs/promises';
import { readdir, rm } from 'node:fs/promises';
import { resolve } from 'node:path';
import { setTimeout } from 'node:timers/promises';

async function waitTillWrittenToDisk(path: string): Promise<void> {
try {
await access(path);
} catch {
await setTimeout(50);
return waitTillWrittenToDisk(path);
}
}
import { waitTillWrittenToDisk } from './__shared__';

describe('writeMetadata option', () => {
const tmpLocation = resolve(__dirname, './tmp/write-metadata-tests');
Expand Down