Skip to content

Commit

Permalink
feat(server)!: pgvecto.rs 0.2 and pgvector compatibility (#6785)
Browse files Browse the repository at this point in the history
* basic changes

update version check

set ef_search for clip

* pgvector compatibility

Revert "pgvector compatibility"

This reverts commit 2b66a52.

pgvector compatibility: minimal edition

pgvector startup check

* update extension at startup

* wording

shortened vector extension variable name

* nightly docker

* fixed version checks

* update tests

add tests for updating extension

remove unnecessary check

* simplify `getRuntimeConfig`

* wording

* reindex on minor version update

* 0.2 upgrade testing

update prod compose

* acquire lock for init

* wip vector down on shutdown

* use upgrade helper

* update image tag

* refine restart check

check error message

* test reindex

testing

upstream fix

formatting

fixed reindexing

* use enum in signature

* fix tests

remove unused code

* add reindexing tests

* update to official 0.2

remove alpha from version name

* add warning test if restart required

* update test image to 0.2.0

* linting and test cleanup

* formatting

* update sql

* wording

* handle setting search path for new and existing databases

* handle new db in reindex check

* fix post-update reindexing

* get dim size

* formatting

* use vbase

* handle different db name

* update sql

* linting

* fix suggested env
  • Loading branch information
mertalev committed Feb 7, 2024
1 parent b2775c4 commit 56b0643
Show file tree
Hide file tree
Showing 25 changed files with 640 additions and 236 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ jobs:
runs-on: ubuntu-latest
services:
postgres:
image: tensorchord/pgvecto-rs:pg14-v0.1.11@sha256:0335a1a22f8c5dd1b697f14f079934f5152eaaa216c09b61e293be285491f8ee
image: tensorchord/pgvecto-rs:pg14-v0.2.0
env:
POSTGRES_PASSWORD: postgres
POSTGRES_USER: postgres
Expand Down
2 changes: 1 addition & 1 deletion cli/test/e2e/setup.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ export default async () => {

if (process.env.DB_HOSTNAME === undefined) {
// DB hostname not set which likely means we're not running e2e through docker compose. Start a local postgres container.
const pg = await new PostgreSqlContainer('tensorchord/pgvecto-rs:pg14-v0.1.11')
const pg = await new PostgreSqlContainer('tensorchord/pgvecto-rs:pg14-v0.2.0')
.withExposedPorts(5432)
.withDatabase('immich')
.withUsername('postgres')
Expand Down
2 changes: 1 addition & 1 deletion docker/docker-compose.dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ services:

database:
container_name: immich_postgres
image: tensorchord/pgvecto-rs:pg14-v0.1.11@sha256:0335a1a22f8c5dd1b697f14f079934f5152eaaa216c09b61e293be285491f8ee
image: tensorchord/pgvecto-rs:pg14-v0.2.0
env_file:
- .env
environment:
Expand Down
5 changes: 3 additions & 2 deletions docker/docker-compose.prod.yml
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ services:

database:
container_name: immich_postgres
image: tensorchord/pgvecto-rs:pg14-v0.1.11@sha256:0335a1a22f8c5dd1b697f14f079934f5152eaaa216c09b61e293be285491f8ee
image: tensorchord/pgvecto-rs:pg14-v0.2.0
env_file:
- .env
environment:
Expand All @@ -70,7 +70,8 @@ services:
POSTGRES_DB: ${DB_DATABASE_NAME}
volumes:
- ${UPLOAD_LOCATION}/postgres:/var/lib/postgresql/data
restart: always
ports:
- 5432:5432

volumes:
model-cache:
2 changes: 1 addition & 1 deletion docker/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ services:

database:
container_name: immich_postgres
image: tensorchord/pgvecto-rs:pg14-v0.1.11@sha256:0335a1a22f8c5dd1b697f14f079934f5152eaaa216c09b61e293be285491f8ee
image: tensorchord/pgvecto-rs:pg14-v0.2.0
env_file:
- .env
environment:
Expand Down
2 changes: 1 addition & 1 deletion server/e2e/api/setup.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { PostgreSqlContainer } from '@testcontainers/postgresql';

export default async () => {
const pg = await new PostgreSqlContainer('tensorchord/pgvecto-rs:pg14-v0.1.11')
const pg = await new PostgreSqlContainer('tensorchord/pgvecto-rs:pg14-v0.2.0')
.withDatabase('immich')
.withUsername('postgres')
.withPassword('postgres')
Expand Down
2 changes: 1 addition & 1 deletion server/e2e/docker-compose.server-e2e.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ services:
- database

database:
image: tensorchord/pgvecto-rs:pg14-v0.1.11@sha256:0335a1a22f8c5dd1b697f14f079934f5152eaaa216c09b61e293be285491f8ee
image: tensorchord/pgvecto-rs:pg14-v0.2.0
command: -c fsync=off -c shared_preload_libraries=vectors.so
environment:
POSTGRES_PASSWORD: postgres
Expand Down
2 changes: 1 addition & 1 deletion server/e2e/jobs/setup.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ export default async () => {

if (process.env.DB_HOSTNAME === undefined) {
// DB hostname not set which likely means we're not running e2e through docker compose. Start a local postgres container.
const pg = await new PostgreSqlContainer('tensorchord/pgvecto-rs:pg14-v0.1.11')
const pg = await new PostgreSqlContainer('tensorchord/pgvecto-rs:pg14-v0.2.0')
.withExposedPorts(5432)
.withDatabase('immich')
.withUsername('postgres')
Expand Down
228 changes: 151 additions & 77 deletions server/src/domain/database/database.service.spec.ts
Original file line number Diff line number Diff line change
@@ -1,154 +1,228 @@
import { DatabaseExtension, DatabaseService, IDatabaseRepository, Version } from '@app/domain';
import {
DatabaseExtension,
DatabaseService,
IDatabaseRepository,
VectorIndex,
Version,
VersionType,
} from '@app/domain';
import { ImmichLogger } from '@app/infra/logger';
import { newDatabaseRepositoryMock } from '@test';

describe(DatabaseService.name, () => {
let sut: DatabaseService;
let databaseMock: jest.Mocked<IDatabaseRepository>;
let fatalLog: jest.SpyInstance;

beforeEach(async () => {
databaseMock = newDatabaseRepositoryMock();
fatalLog = jest.spyOn(ImmichLogger.prototype, 'fatal');

sut = new DatabaseService(databaseMock);

sut.minVectorsVersion = new Version(0, 1, 1);
sut.maxVectorsVersion = new Version(0, 1, 11);
});

afterEach(() => {
fatalLog.mockRestore();
});

it('should work', () => {
expect(sut).toBeDefined();
});

describe('init', () => {
it('should resolve successfully if minimum supported PostgreSQL and vectors version are installed', async () => {
describe.each([
[{ vectorExt: DatabaseExtension.VECTORS, extName: 'pgvecto.rs', minVersion: new Version(0, 1, 1) }],
[{ vectorExt: DatabaseExtension.VECTOR, extName: 'pgvector', minVersion: new Version(0, 5, 0) }],
] as const)('init', ({ vectorExt, extName, minVersion }) => {
let fatalLog: jest.SpyInstance;
let errorLog: jest.SpyInstance;
let warnLog: jest.SpyInstance;

beforeEach(async () => {
fatalLog = jest.spyOn(ImmichLogger.prototype, 'fatal');
errorLog = jest.spyOn(ImmichLogger.prototype, 'error');
warnLog = jest.spyOn(ImmichLogger.prototype, 'warn');
databaseMock.getPreferredVectorExtension.mockReturnValue(vectorExt);
databaseMock.getExtensionVersion.mockResolvedValue(minVersion);

sut = new DatabaseService(databaseMock);

sut.minVectorVersion = minVersion;
sut.minVectorsVersion = minVersion;
sut.vectorVersionPin = VersionType.MINOR;
sut.vectorsVersionPin = VersionType.MINOR;
});

afterEach(() => {
fatalLog.mockRestore();
warnLog.mockRestore();
});

it(`should resolve successfully if minimum supported PostgreSQL and ${extName} version are installed`, async () => {
databaseMock.getPostgresVersion.mockResolvedValueOnce(new Version(14, 0, 0));
databaseMock.getExtensionVersion.mockResolvedValueOnce(new Version(0, 1, 1));

await expect(sut.init()).resolves.toBeUndefined();

expect(databaseMock.getPostgresVersion).toHaveBeenCalledTimes(2);
expect(databaseMock.createExtension).toHaveBeenCalledWith(DatabaseExtension.VECTORS);
expect(databaseMock.getPostgresVersion).toHaveBeenCalled();
expect(databaseMock.createExtension).toHaveBeenCalledWith(vectorExt);
expect(databaseMock.createExtension).toHaveBeenCalledTimes(1);
expect(databaseMock.getExtensionVersion).toHaveBeenCalledTimes(1);
expect(databaseMock.getExtensionVersion).toHaveBeenCalled();
expect(databaseMock.runMigrations).toHaveBeenCalledTimes(1);
expect(fatalLog).not.toHaveBeenCalled();
});

it('should throw an error if PostgreSQL version is below minimum supported version', async () => {
databaseMock.getPostgresVersion.mockResolvedValueOnce(new Version(13, 0, 0));

await expect(sut.init()).rejects.toThrow(/PostgreSQL version is 13/s);
await expect(sut.init()).rejects.toThrow('PostgreSQL version is 13');

expect(databaseMock.getPostgresVersion).toHaveBeenCalledTimes(1);
});

it('should resolve successfully if minimum supported vectors version is installed', async () => {
databaseMock.getExtensionVersion.mockResolvedValueOnce(new Version(0, 1, 1));

it(`should resolve successfully if minimum supported ${extName} version is installed`, async () => {
await expect(sut.init()).resolves.toBeUndefined();

expect(databaseMock.createExtension).toHaveBeenCalledWith(DatabaseExtension.VECTORS);
expect(databaseMock.createExtension).toHaveBeenCalledWith(vectorExt);
expect(databaseMock.createExtension).toHaveBeenCalledTimes(1);
expect(databaseMock.getExtensionVersion).toHaveBeenCalledTimes(1);
expect(databaseMock.runMigrations).toHaveBeenCalledTimes(1);
expect(fatalLog).not.toHaveBeenCalled();
});

it('should resolve successfully if maximum supported vectors version is installed', async () => {
databaseMock.getExtensionVersion.mockResolvedValueOnce(new Version(0, 1, 11));
it(`should throw an error if ${extName} version is not installed even after createVectorExtension`, async () => {
databaseMock.getExtensionVersion.mockResolvedValue(null);

await expect(sut.init()).resolves.toBeUndefined();
await expect(sut.init()).rejects.toThrow(`Unexpected: ${extName} extension is not installed.`);

expect(databaseMock.createExtension).toHaveBeenCalledWith(DatabaseExtension.VECTORS);
expect(databaseMock.createExtension).toHaveBeenCalledTimes(1);
expect(databaseMock.getExtensionVersion).toHaveBeenCalledTimes(1);
expect(databaseMock.runMigrations).toHaveBeenCalledTimes(1);
expect(fatalLog).not.toHaveBeenCalled();
expect(databaseMock.runMigrations).not.toHaveBeenCalled();
});

it('should throw an error if vectors version is not installed even after createVectors', async () => {
databaseMock.getExtensionVersion.mockResolvedValueOnce(null);
it(`should throw an error if ${extName} version is below minimum supported version`, async () => {
databaseMock.getExtensionVersion.mockResolvedValue(
new Version(minVersion.major, minVersion.minor - 1, minVersion.patch),
);

await expect(sut.init()).rejects.toThrow('Unexpected: The pgvecto.rs extension is not installed.');
await expect(sut.init()).rejects.toThrow(extName);

expect(databaseMock.getExtensionVersion).toHaveBeenCalledTimes(1);
expect(databaseMock.createExtension).toHaveBeenCalledTimes(1);
expect(databaseMock.runMigrations).not.toHaveBeenCalled();
});

it('should throw an error if vectors version is below minimum supported version', async () => {
databaseMock.getExtensionVersion.mockResolvedValueOnce(new Version(0, 0, 1));
it.each([
{ type: VersionType.EQUAL, max: 'no', actual: 'patch' },
{ type: VersionType.PATCH, max: 'patch', actual: 'minor' },
{ type: VersionType.MINOR, max: 'minor', actual: 'major' },
] as const)(
`should throw an error if $max upgrade from min version is allowed and ${extName} version is $actual`,
async ({ type, actual }) => {
const version = new Version(minVersion.major, minVersion.minor, minVersion.patch);
version[actual] = minVersion[actual] + 1;
databaseMock.getExtensionVersion.mockResolvedValue(version);
if (vectorExt === DatabaseExtension.VECTOR) {
sut.minVectorVersion = minVersion;
sut.vectorVersionPin = type;
} else {
sut.minVectorsVersion = minVersion;
sut.vectorsVersionPin = type;
}

await expect(sut.init()).rejects.toThrow(extName);

expect(databaseMock.runMigrations).not.toHaveBeenCalled();
},
);

await expect(sut.init()).rejects.toThrow(/('tensorchord\/pgvecto-rs:pg14-v0.1.11')/s);
it(`should throw an error if ${extName} version is a nightly`, async () => {
databaseMock.getExtensionVersion.mockResolvedValue(new Version(0, 0, 0));

expect(databaseMock.getExtensionVersion).toHaveBeenCalledTimes(1);
await expect(sut.init()).rejects.toThrow(extName);

expect(databaseMock.createExtension).toHaveBeenCalledTimes(1);
expect(databaseMock.runMigrations).not.toHaveBeenCalled();
});

it('should throw an error if vectors version is above maximum supported version', async () => {
databaseMock.getExtensionVersion.mockResolvedValueOnce(new Version(0, 1, 12));
it(`should throw error if ${extName} extension could not be created`, async () => {
databaseMock.createExtension.mockRejectedValue(new Error('Failed to create extension'));

await expect(sut.init()).rejects.toThrow(
/('DROP EXTENSION IF EXISTS vectors').*('tensorchord\/pgvecto-rs:pg14-v0\.1\.11')/s,
);
await expect(sut.init()).rejects.toThrow('Failed to create extension');

expect(databaseMock.getExtensionVersion).toHaveBeenCalledTimes(1);
expect(fatalLog).toHaveBeenCalledTimes(1);
expect(databaseMock.createExtension).toHaveBeenCalledTimes(1);
expect(databaseMock.runMigrations).not.toHaveBeenCalled();
});

it('should throw an error if vectors version is a nightly', async () => {
databaseMock.getExtensionVersion.mockResolvedValueOnce(new Version(0, 0, 0));
it(`should update ${extName} if a newer version is available`, async () => {
const version = new Version(minVersion.major, minVersion.minor + 1, minVersion.patch);
databaseMock.getAvailableExtensionVersion.mockResolvedValue(version);

await expect(sut.init()).rejects.toThrow(
/(nightly).*('DROP EXTENSION IF EXISTS vectors').*('tensorchord\/pgvecto-rs:pg14-v0\.1\.11')/s,
);
await expect(sut.init()).resolves.toBeUndefined();

expect(databaseMock.getExtensionVersion).toHaveBeenCalledTimes(1);
expect(databaseMock.createExtension).toHaveBeenCalledTimes(1);
expect(databaseMock.runMigrations).not.toHaveBeenCalled();
expect(databaseMock.updateVectorExtension).toHaveBeenCalledWith(vectorExt, version);
expect(databaseMock.updateVectorExtension).toHaveBeenCalledTimes(1);
expect(databaseMock.runMigrations).toHaveBeenCalledTimes(1);
expect(fatalLog).not.toHaveBeenCalled();
});

it('should throw error if vectors extension could not be created', async () => {
databaseMock.createExtension.mockRejectedValueOnce(new Error('Failed to create extension'));
it(`should not update ${extName} if a newer version is higher than the maximum`, async () => {
const version = new Version(minVersion.major + 1, minVersion.minor, minVersion.patch);
databaseMock.getAvailableExtensionVersion.mockResolvedValue(version);

await expect(sut.init()).rejects.toThrow('Failed to create extension');
await expect(sut.init()).resolves.toBeUndefined();

expect(fatalLog).toHaveBeenCalledTimes(1);
expect(fatalLog.mock.calls[0][0]).toMatch(/('tensorchord\/pgvecto-rs:pg14-v0\.1\.11').*(v1\.91\.0)/s);
expect(databaseMock.createExtension).toHaveBeenCalledTimes(1);
expect(databaseMock.runMigrations).not.toHaveBeenCalled();
expect(databaseMock.updateVectorExtension).not.toHaveBeenCalled();
expect(databaseMock.runMigrations).toHaveBeenCalledTimes(1);
expect(fatalLog).not.toHaveBeenCalled();
});

it.each([{ major: 14 }, { major: 15 }, { major: 16 }])(
`should suggest image with postgres $major if database is $major`,
async ({ major }) => {
databaseMock.getExtensionVersion.mockResolvedValue(new Version(0, 0, 1));
databaseMock.getPostgresVersion.mockResolvedValue(new Version(major, 0, 0));

await expect(sut.init()).rejects.toThrow(new RegExp(`tensorchord\/pgvecto-rs:pg${major}-v0\\.1\\.11`, 's'));
},
);
it(`should warn if attempted to update ${extName} and failed`, async () => {
const version = new Version(minVersion.major, minVersion.minor, minVersion.patch + 1);
databaseMock.getAvailableExtensionVersion.mockResolvedValue(version);
databaseMock.updateVectorExtension.mockRejectedValue(new Error('Failed to update extension'));

it('should not suggest image if postgres version is not in 14, 15 or 16', async () => {
databaseMock.getPostgresVersion.mockResolvedValueOnce(new Version(17, 0, 0));
databaseMock.getPostgresVersion.mockResolvedValueOnce(new Version(17, 0, 0));
await expect(sut.init()).resolves.toBeUndefined();

await expect(sut.init()).rejects.toThrow(/^(?:(?!tensorchord\/pgvecto-rs).)*$/s);
expect(warnLog).toHaveBeenCalledTimes(1);
expect(warnLog.mock.calls[0][0]).toContain(extName);
expect(errorLog).toHaveBeenCalledTimes(1);
expect(fatalLog).not.toHaveBeenCalled();
expect(databaseMock.updateVectorExtension).toHaveBeenCalledWith(vectorExt, version);
expect(databaseMock.runMigrations).toHaveBeenCalledTimes(1);
});

it('should reject and suggest the maximum supported version when unsupported pgvecto.rs version is in use', async () => {
databaseMock.getExtensionVersion.mockResolvedValue(new Version(0, 0, 1));
it(`should warn if ${extName} update requires restart`, async () => {
const version = new Version(minVersion.major, minVersion.minor, minVersion.patch + 1);
databaseMock.getAvailableExtensionVersion.mockResolvedValue(version);
databaseMock.updateVectorExtension.mockResolvedValue({ restartRequired: true });

await expect(sut.init()).rejects.toThrow(/('tensorchord\/pgvecto-rs:pg14-v0\.1\.11')/s);
await expect(sut.init()).resolves.toBeUndefined();

sut.maxVectorsVersion = new Version(0, 1, 12);
await expect(sut.init()).rejects.toThrow(/('tensorchord\/pgvecto-rs:pg14-v0\.1\.12')/s);
expect(warnLog).toHaveBeenCalledTimes(1);
expect(warnLog.mock.calls[0][0]).toContain(extName);
expect(databaseMock.updateVectorExtension).toHaveBeenCalledWith(vectorExt, version);
expect(databaseMock.runMigrations).toHaveBeenCalledTimes(1);
expect(fatalLog).not.toHaveBeenCalled();
});

it.each([{ index: VectorIndex.CLIP }, { index: VectorIndex.FACE }])(
`should reindex $index if necessary`,
async ({ index }) => {
databaseMock.shouldReindex.mockImplementation((indexArg) => Promise.resolve(indexArg === index));

await expect(sut.init()).resolves.toBeUndefined();

expect(databaseMock.shouldReindex).toHaveBeenCalledWith(index);
expect(databaseMock.shouldReindex).toHaveBeenCalledTimes(2);
expect(databaseMock.reindex).toHaveBeenCalledWith(index);
expect(databaseMock.reindex).toHaveBeenCalledTimes(1);
expect(databaseMock.runMigrations).toHaveBeenCalledTimes(1);
expect(fatalLog).not.toHaveBeenCalled();
},
);

it.each([{ index: VectorIndex.CLIP }, { index: VectorIndex.FACE }])(
`should not reindex $index if not necessary`,
async () => {
databaseMock.shouldReindex.mockResolvedValue(false);

await expect(sut.init()).resolves.toBeUndefined();

expect(databaseMock.shouldReindex).toHaveBeenCalledTimes(2);
expect(databaseMock.reindex).not.toHaveBeenCalled();
expect(databaseMock.runMigrations).toHaveBeenCalledTimes(1);
expect(fatalLog).not.toHaveBeenCalled();
},
);
});
});

0 comments on commit 56b0643

Please sign in to comment.