From d8313378798ac7635191457088b879d5c32bdd08 Mon Sep 17 00:00:00 2001 From: Jake Jarvis Date: Sat, 15 Nov 2025 18:39:19 -0500 Subject: [PATCH 01/11] feat: add favicons and screenshots tables with caching and persistence logic - Introduced new SQL tables for favicons and screenshots, including necessary constraints and indexes. - Implemented caching strategy for favicons and screenshots using Redis and Postgres, allowing for efficient retrieval and storage. - Added functions to upsert and fetch favicons and screenshots by domain ID, ensuring data integrity and expiration handling. - Enhanced service logic to persist generated assets to the database after creation. --- drizzle/0001_perpetual_wallow.sql | 29 + drizzle/meta/0001_snapshot.json | 1361 ++++++++++++++++++++++++++++ drizzle/meta/_journal.json | 7 + lib/cache.test.ts | 135 +++ lib/cache.ts | 84 +- lib/db/pglite.ts | 4 + lib/db/repos/domain-helpers.ts | 28 + lib/db/repos/favicons.test.ts | 181 ++++ lib/db/repos/favicons.ts | 29 + lib/db/repos/screenshots.test.ts | 176 ++++ lib/db/repos/screenshots.ts | 31 + lib/db/schema.ts | 39 + lib/db/ttl.ts | 10 + lib/db/zod.ts | 12 + server/services/favicon.test.ts | 23 + server/services/favicon.ts | 50 +- server/services/screenshot.test.ts | 5 + server/services/screenshot.ts | 52 +- 18 files changed, 2247 insertions(+), 9 deletions(-) create mode 100644 drizzle/0001_perpetual_wallow.sql create mode 100644 drizzle/meta/0001_snapshot.json create mode 100644 lib/db/repos/domain-helpers.ts create mode 100644 lib/db/repos/favicons.test.ts create mode 100644 lib/db/repos/favicons.ts create mode 100644 lib/db/repos/screenshots.test.ts create mode 100644 lib/db/repos/screenshots.ts diff --git a/drizzle/0001_perpetual_wallow.sql b/drizzle/0001_perpetual_wallow.sql new file mode 100644 index 00000000..e3d942c5 --- /dev/null +++ b/drizzle/0001_perpetual_wallow.sql @@ -0,0 +1,29 @@ +CREATE TABLE "favicons" ( + "domain_id" uuid PRIMARY KEY NOT NULL, + "url" text, + "pathname" text, + "size" integer NOT NULL, + "source" text, + "not_found" boolean DEFAULT false NOT NULL, + "upstream_status" integer, + "upstream_content_type" text, + "fetched_at" timestamp with time zone NOT NULL, + "expires_at" timestamp with time zone NOT NULL +); +--> statement-breakpoint +CREATE TABLE "screenshots" ( + "domain_id" uuid PRIMARY KEY NOT NULL, + "url" text, + "pathname" text, + "width" integer NOT NULL, + "height" integer NOT NULL, + "source" text, + "not_found" boolean DEFAULT false NOT NULL, + "fetched_at" timestamp with time zone NOT NULL, + "expires_at" timestamp with time zone NOT NULL +); +--> statement-breakpoint +ALTER TABLE "favicons" ADD CONSTRAINT "favicons_domain_id_domains_id_fk" FOREIGN KEY ("domain_id") REFERENCES "public"."domains"("id") ON DELETE cascade ON UPDATE no action;--> statement-breakpoint +ALTER TABLE "screenshots" ADD CONSTRAINT "screenshots_domain_id_domains_id_fk" FOREIGN KEY ("domain_id") REFERENCES "public"."domains"("id") ON DELETE cascade ON UPDATE no action;--> statement-breakpoint +CREATE INDEX "i_favicons_expires" ON "favicons" USING btree ("expires_at");--> statement-breakpoint +CREATE INDEX "i_screenshots_expires" ON "screenshots" USING btree ("expires_at"); \ No newline at end of file diff --git a/drizzle/meta/0001_snapshot.json b/drizzle/meta/0001_snapshot.json new file mode 100644 index 00000000..1cdd0bd6 --- /dev/null +++ b/drizzle/meta/0001_snapshot.json @@ -0,0 +1,1361 @@ +{ + "id": "3b7362f6-a42e-44e4-a133-b64aa95cc140", + "prevId": "69e923f5-30d7-4e8c-b018-56b429c7534b", + "version": "7", + "dialect": "postgresql", + "tables": { + "public.certificates": { + "name": "certificates", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "uuid", + "primaryKey": true, + "notNull": true, + "default": "gen_random_uuid()" + }, + "domain_id": { + "name": "domain_id", + "type": "uuid", + "primaryKey": false, + "notNull": true + }, + "issuer": { + "name": "issuer", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "subject": { + "name": "subject", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "alt_names": { + "name": "alt_names", + "type": "jsonb", + "primaryKey": false, + "notNull": true, + "default": "'[]'::jsonb" + }, + "valid_from": { + "name": "valid_from", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true + }, + "valid_to": { + "name": "valid_to", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true + }, + "ca_provider_id": { + "name": "ca_provider_id", + "type": "uuid", + "primaryKey": false, + "notNull": false + }, + "fetched_at": { + "name": "fetched_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true + }, + "expires_at": { + "name": "expires_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true + } + }, + "indexes": { + "i_certs_domain": { + "name": "i_certs_domain", + "columns": [ + { + "expression": "domain_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "i_certs_valid_to": { + "name": "i_certs_valid_to", + "columns": [ + { + "expression": "valid_to", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "i_certs_expires": { + "name": "i_certs_expires", + "columns": [ + { + "expression": "expires_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "certificates_domain_id_domains_id_fk": { + "name": "certificates_domain_id_domains_id_fk", + "tableFrom": "certificates", + "tableTo": "domains", + "columnsFrom": [ + "domain_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + }, + "certificates_ca_provider_id_providers_id_fk": { + "name": "certificates_ca_provider_id_providers_id_fk", + "tableFrom": "certificates", + "tableTo": "providers", + "columnsFrom": [ + "ca_provider_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": { + "ck_cert_valid_window": { + "name": "ck_cert_valid_window", + "value": "\"certificates\".\"valid_to\" >= \"certificates\".\"valid_from\"" + } + }, + "isRLSEnabled": false + }, + "public.dns_records": { + "name": "dns_records", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "uuid", + "primaryKey": true, + "notNull": true, + "default": "gen_random_uuid()" + }, + "domain_id": { + "name": "domain_id", + "type": "uuid", + "primaryKey": false, + "notNull": true + }, + "type": { + "name": "type", + "type": "dns_record_type", + "typeSchema": "public", + "primaryKey": false, + "notNull": true + }, + "name": { + "name": "name", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "value": { + "name": "value", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "ttl": { + "name": "ttl", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "priority": { + "name": "priority", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "is_cloudflare": { + "name": "is_cloudflare", + "type": "boolean", + "primaryKey": false, + "notNull": false + }, + "resolver": { + "name": "resolver", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "fetched_at": { + "name": "fetched_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true + }, + "expires_at": { + "name": "expires_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true + } + }, + "indexes": { + "i_dns_domain_type": { + "name": "i_dns_domain_type", + "columns": [ + { + "expression": "domain_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "type", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "i_dns_type_value": { + "name": "i_dns_type_value", + "columns": [ + { + "expression": "type", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "value", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "i_dns_expires": { + "name": "i_dns_expires", + "columns": [ + { + "expression": "expires_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "dns_records_domain_id_domains_id_fk": { + "name": "dns_records_domain_id_domains_id_fk", + "tableFrom": "dns_records", + "tableTo": "domains", + "columnsFrom": [ + "domain_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": { + "u_dns_record": { + "name": "u_dns_record", + "nullsNotDistinct": false, + "columns": [ + "domain_id", + "type", + "name", + "value", + "priority" + ] + } + }, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.domains": { + "name": "domains", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "uuid", + "primaryKey": true, + "notNull": true, + "default": "gen_random_uuid()" + }, + "name": { + "name": "name", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "tld": { + "name": "tld", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "unicode_name": { + "name": "unicode_name", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "last_accessed_at": { + "name": "last_accessed_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "i_domains_tld": { + "name": "i_domains_tld", + "columns": [ + { + "expression": "tld", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "i_domains_last_accessed": { + "name": "i_domains_last_accessed", + "columns": [ + { + "expression": "last_accessed_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": { + "u_domains_name": { + "name": "u_domains_name", + "nullsNotDistinct": false, + "columns": [ + "name" + ] + } + }, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.favicons": { + "name": "favicons", + "schema": "", + "columns": { + "domain_id": { + "name": "domain_id", + "type": "uuid", + "primaryKey": true, + "notNull": true + }, + "url": { + "name": "url", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "pathname": { + "name": "pathname", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "size": { + "name": "size", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "source": { + "name": "source", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "not_found": { + "name": "not_found", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "upstream_status": { + "name": "upstream_status", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "upstream_content_type": { + "name": "upstream_content_type", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "fetched_at": { + "name": "fetched_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true + }, + "expires_at": { + "name": "expires_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true + } + }, + "indexes": { + "i_favicons_expires": { + "name": "i_favicons_expires", + "columns": [ + { + "expression": "expires_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "favicons_domain_id_domains_id_fk": { + "name": "favicons_domain_id_domains_id_fk", + "tableFrom": "favicons", + "tableTo": "domains", + "columnsFrom": [ + "domain_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.hosting": { + "name": "hosting", + "schema": "", + "columns": { + "domain_id": { + "name": "domain_id", + "type": "uuid", + "primaryKey": true, + "notNull": true + }, + "hosting_provider_id": { + "name": "hosting_provider_id", + "type": "uuid", + "primaryKey": false, + "notNull": false + }, + "email_provider_id": { + "name": "email_provider_id", + "type": "uuid", + "primaryKey": false, + "notNull": false + }, + "dns_provider_id": { + "name": "dns_provider_id", + "type": "uuid", + "primaryKey": false, + "notNull": false + }, + "geo_city": { + "name": "geo_city", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "geo_region": { + "name": "geo_region", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "geo_country": { + "name": "geo_country", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "geo_country_code": { + "name": "geo_country_code", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "geo_lat": { + "name": "geo_lat", + "type": "double precision", + "primaryKey": false, + "notNull": false + }, + "geo_lon": { + "name": "geo_lon", + "type": "double precision", + "primaryKey": false, + "notNull": false + }, + "fetched_at": { + "name": "fetched_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true + }, + "expires_at": { + "name": "expires_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true + } + }, + "indexes": { + "i_hosting_providers": { + "name": "i_hosting_providers", + "columns": [ + { + "expression": "hosting_provider_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "email_provider_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "dns_provider_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "hosting_domain_id_domains_id_fk": { + "name": "hosting_domain_id_domains_id_fk", + "tableFrom": "hosting", + "tableTo": "domains", + "columnsFrom": [ + "domain_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + }, + "hosting_hosting_provider_id_providers_id_fk": { + "name": "hosting_hosting_provider_id_providers_id_fk", + "tableFrom": "hosting", + "tableTo": "providers", + "columnsFrom": [ + "hosting_provider_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + }, + "hosting_email_provider_id_providers_id_fk": { + "name": "hosting_email_provider_id_providers_id_fk", + "tableFrom": "hosting", + "tableTo": "providers", + "columnsFrom": [ + "email_provider_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + }, + "hosting_dns_provider_id_providers_id_fk": { + "name": "hosting_dns_provider_id_providers_id_fk", + "tableFrom": "hosting", + "tableTo": "providers", + "columnsFrom": [ + "dns_provider_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.http_headers": { + "name": "http_headers", + "schema": "", + "columns": { + "domain_id": { + "name": "domain_id", + "type": "uuid", + "primaryKey": true, + "notNull": true + }, + "headers": { + "name": "headers", + "type": "jsonb", + "primaryKey": false, + "notNull": true, + "default": "'[]'::jsonb" + }, + "status": { + "name": "status", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 200 + }, + "fetched_at": { + "name": "fetched_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true + }, + "expires_at": { + "name": "expires_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true + } + }, + "indexes": {}, + "foreignKeys": { + "http_headers_domain_id_domains_id_fk": { + "name": "http_headers_domain_id_domains_id_fk", + "tableFrom": "http_headers", + "tableTo": "domains", + "columnsFrom": [ + "domain_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.providers": { + "name": "providers", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "uuid", + "primaryKey": true, + "notNull": true, + "default": "gen_random_uuid()" + }, + "category": { + "name": "category", + "type": "provider_category", + "typeSchema": "public", + "primaryKey": false, + "notNull": true + }, + "name": { + "name": "name", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "domain": { + "name": "domain", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "slug": { + "name": "slug", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "source": { + "name": "source", + "type": "provider_source", + "typeSchema": "public", + "primaryKey": false, + "notNull": true, + "default": "'discovered'" + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": { + "i_providers_name_lower": { + "name": "i_providers_name_lower", + "columns": [ + { + "expression": "category", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "lower(\"name\")", + "asc": true, + "isExpression": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": { + "u_providers_category_slug": { + "name": "u_providers_category_slug", + "nullsNotDistinct": false, + "columns": [ + "category", + "slug" + ] + } + }, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.registrations": { + "name": "registrations", + "schema": "", + "columns": { + "domain_id": { + "name": "domain_id", + "type": "uuid", + "primaryKey": true, + "notNull": true + }, + "is_registered": { + "name": "is_registered", + "type": "boolean", + "primaryKey": false, + "notNull": true + }, + "privacy_enabled": { + "name": "privacy_enabled", + "type": "boolean", + "primaryKey": false, + "notNull": false + }, + "registry": { + "name": "registry", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "creation_date": { + "name": "creation_date", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "updated_date": { + "name": "updated_date", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "expiration_date": { + "name": "expiration_date", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "deletion_date": { + "name": "deletion_date", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "transfer_lock": { + "name": "transfer_lock", + "type": "boolean", + "primaryKey": false, + "notNull": false + }, + "statuses": { + "name": "statuses", + "type": "jsonb", + "primaryKey": false, + "notNull": true, + "default": "'[]'::jsonb" + }, + "contacts": { + "name": "contacts", + "type": "jsonb", + "primaryKey": false, + "notNull": true, + "default": "'[]'::jsonb" + }, + "nameservers": { + "name": "nameservers", + "type": "jsonb", + "primaryKey": false, + "notNull": true, + "default": "'[]'::jsonb" + }, + "whois_server": { + "name": "whois_server", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "rdap_servers": { + "name": "rdap_servers", + "type": "jsonb", + "primaryKey": false, + "notNull": true, + "default": "'[]'::jsonb" + }, + "source": { + "name": "source", + "type": "registration_source", + "typeSchema": "public", + "primaryKey": false, + "notNull": true + }, + "registrar_provider_id": { + "name": "registrar_provider_id", + "type": "uuid", + "primaryKey": false, + "notNull": false + }, + "reseller_provider_id": { + "name": "reseller_provider_id", + "type": "uuid", + "primaryKey": false, + "notNull": false + }, + "fetched_at": { + "name": "fetched_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true + }, + "expires_at": { + "name": "expires_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true + } + }, + "indexes": { + "i_reg_registrar": { + "name": "i_reg_registrar", + "columns": [ + { + "expression": "registrar_provider_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "i_reg_expires": { + "name": "i_reg_expires", + "columns": [ + { + "expression": "expires_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "registrations_domain_id_domains_id_fk": { + "name": "registrations_domain_id_domains_id_fk", + "tableFrom": "registrations", + "tableTo": "domains", + "columnsFrom": [ + "domain_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + }, + "registrations_registrar_provider_id_providers_id_fk": { + "name": "registrations_registrar_provider_id_providers_id_fk", + "tableFrom": "registrations", + "tableTo": "providers", + "columnsFrom": [ + "registrar_provider_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + }, + "registrations_reseller_provider_id_providers_id_fk": { + "name": "registrations_reseller_provider_id_providers_id_fk", + "tableFrom": "registrations", + "tableTo": "providers", + "columnsFrom": [ + "reseller_provider_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.screenshots": { + "name": "screenshots", + "schema": "", + "columns": { + "domain_id": { + "name": "domain_id", + "type": "uuid", + "primaryKey": true, + "notNull": true + }, + "url": { + "name": "url", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "pathname": { + "name": "pathname", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "width": { + "name": "width", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "height": { + "name": "height", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "source": { + "name": "source", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "not_found": { + "name": "not_found", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "fetched_at": { + "name": "fetched_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true + }, + "expires_at": { + "name": "expires_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true + } + }, + "indexes": { + "i_screenshots_expires": { + "name": "i_screenshots_expires", + "columns": [ + { + "expression": "expires_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "screenshots_domain_id_domains_id_fk": { + "name": "screenshots_domain_id_domains_id_fk", + "tableFrom": "screenshots", + "tableTo": "domains", + "columnsFrom": [ + "domain_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.seo": { + "name": "seo", + "schema": "", + "columns": { + "domain_id": { + "name": "domain_id", + "type": "uuid", + "primaryKey": true, + "notNull": true + }, + "source_final_url": { + "name": "source_final_url", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "source_status": { + "name": "source_status", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "meta_open_graph": { + "name": "meta_open_graph", + "type": "jsonb", + "primaryKey": false, + "notNull": true, + "default": "'{}'::jsonb" + }, + "meta_twitter": { + "name": "meta_twitter", + "type": "jsonb", + "primaryKey": false, + "notNull": true, + "default": "'{}'::jsonb" + }, + "meta_general": { + "name": "meta_general", + "type": "jsonb", + "primaryKey": false, + "notNull": true, + "default": "'{}'::jsonb" + }, + "preview_title": { + "name": "preview_title", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "preview_description": { + "name": "preview_description", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "preview_image_url": { + "name": "preview_image_url", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "preview_image_uploaded_url": { + "name": "preview_image_uploaded_url", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "canonical_url": { + "name": "canonical_url", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "robots": { + "name": "robots", + "type": "jsonb", + "primaryKey": false, + "notNull": true, + "default": "'{}'::jsonb" + }, + "robots_sitemaps": { + "name": "robots_sitemaps", + "type": "jsonb", + "primaryKey": false, + "notNull": true, + "default": "'[]'::jsonb" + }, + "errors": { + "name": "errors", + "type": "jsonb", + "primaryKey": false, + "notNull": true, + "default": "'[]'::jsonb" + }, + "fetched_at": { + "name": "fetched_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true + }, + "expires_at": { + "name": "expires_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true + } + }, + "indexes": { + "i_seo_src_final_url": { + "name": "i_seo_src_final_url", + "columns": [ + { + "expression": "source_final_url", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "i_seo_canonical": { + "name": "i_seo_canonical", + "columns": [ + { + "expression": "canonical_url", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "seo_domain_id_domains_id_fk": { + "name": "seo_domain_id_domains_id_fk", + "tableFrom": "seo", + "tableTo": "domains", + "columnsFrom": [ + "domain_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + } + }, + "enums": { + "public.dns_record_type": { + "name": "dns_record_type", + "schema": "public", + "values": [ + "A", + "AAAA", + "MX", + "TXT", + "NS" + ] + }, + "public.provider_category": { + "name": "provider_category", + "schema": "public", + "values": [ + "hosting", + "email", + "dns", + "ca", + "registrar" + ] + }, + "public.provider_source": { + "name": "provider_source", + "schema": "public", + "values": [ + "catalog", + "discovered" + ] + }, + "public.registration_source": { + "name": "registration_source", + "schema": "public", + "values": [ + "rdap", + "whois" + ] + } + }, + "schemas": {}, + "sequences": {}, + "roles": {}, + "policies": {}, + "views": {}, + "_meta": { + "columns": {}, + "schemas": {}, + "tables": {} + } +} \ No newline at end of file diff --git a/drizzle/meta/_journal.json b/drizzle/meta/_journal.json index 503a23f7..3fb0d4e2 100644 --- a/drizzle/meta/_journal.json +++ b/drizzle/meta/_journal.json @@ -8,6 +8,13 @@ "when": 1763215417831, "tag": "0000_cute_pandemic", "breakpoints": true + }, + { + "idx": 1, + "version": "7", + "when": 1763249162040, + "tag": "0001_perpetual_wallow", + "breakpoints": true } ] } \ No newline at end of file diff --git a/lib/cache.test.ts b/lib/cache.test.ts index f2819257..cd2eb799 100644 --- a/lib/cache.test.ts +++ b/lib/cache.test.ts @@ -162,4 +162,139 @@ describe("cached assets", () => { expect(stored?.url).toBe(null); expect(stored?.notFound).toBe(true); }); + + it("checks DB cache (L2) on Redis miss", async () => { + const indexKey = ns("test", "asset7"); + const dbUrl = "https://db/cached.webp"; + + let fetchFromDbCalled = false; + let produceCalled = false; + + const result = await getOrCreateCachedAsset<{ source: string }>({ + indexKey, + ttlSeconds: 60, + fetchFromDb: async () => { + fetchFromDbCalled = true; + return { url: dbUrl, key: "db-key" }; + }, + produceAndUpload: async () => { + produceCalled = true; + return { url: "https://cdn/produced.webp" }; + }, + }); + + expect(result).toEqual({ url: dbUrl }); + expect(fetchFromDbCalled).toBe(true); + expect(produceCalled).toBe(false); + + // DB result should be cached in Redis + await new Promise((resolve) => setTimeout(resolve, 50)); + const { redis } = await import("@/lib/redis"); + const stored = (await redis.get(indexKey)) as { url?: string } | null; + expect(stored?.url).toBe(dbUrl); + }); + + it("generates asset when both Redis and DB miss", async () => { + const indexKey = ns("test", "asset8"); + + let fetchFromDbCalled = false; + let produceCalled = false; + let persistToDbCalled = false; + + const result = await getOrCreateCachedAsset<{ source: string }>({ + indexKey, + ttlSeconds: 60, + fetchFromDb: async () => { + fetchFromDbCalled = true; + return null; // DB miss + }, + produceAndUpload: async () => { + produceCalled = true; + return { url: "https://cdn/fresh.webp", key: "fresh-key" }; + }, + persistToDb: async (res) => { + persistToDbCalled = true; + expect(res.url).toBe("https://cdn/fresh.webp"); + }, + }); + + expect(result).toEqual({ url: "https://cdn/fresh.webp" }); + expect(fetchFromDbCalled).toBe(true); + expect(produceCalled).toBe(true); + + // Wait for fire-and-forget to complete + await new Promise((resolve) => setTimeout(resolve, 50)); + expect(persistToDbCalled).toBe(true); + }); + + it("skips DB callbacks when not provided", async () => { + const indexKey = ns("test", "asset9"); + + const result = await getOrCreateCachedAsset<{ source: string }>({ + indexKey, + ttlSeconds: 60, + produceAndUpload: async () => ({ + url: "https://cdn/no-db.webp", + key: "no-db", + }), + // fetchFromDb and persistToDb omitted + }); + + expect(result).toEqual({ url: "https://cdn/no-db.webp" }); + }); + + it("handles DB fetch errors gracefully", async () => { + const indexKey = ns("test", "asset10"); + + const warnSpy = vi.spyOn(console, "warn").mockImplementation(() => {}); + + const result = await getOrCreateCachedAsset<{ source: string }>({ + indexKey, + ttlSeconds: 60, + fetchFromDb: async () => { + throw new Error("DB connection failed"); + }, + produceAndUpload: async () => ({ + url: "https://cdn/fallback.webp", + }), + }); + + expect(result).toEqual({ url: "https://cdn/fallback.webp" }); + expect(warnSpy).toHaveBeenCalledWith( + expect.stringContaining("[cache] db read failed"), + expect.any(Error), + ); + + warnSpy.mockRestore(); + }); + + it("handles DB persist errors gracefully", async () => { + const indexKey = ns("test", "asset11"); + + const errorSpy = vi.spyOn(console, "error").mockImplementation(() => {}); + + const result = await getOrCreateCachedAsset<{ source: string }>({ + indexKey, + ttlSeconds: 60, + produceAndUpload: async () => ({ + url: "https://cdn/persist-fail.webp", + }), + persistToDb: async () => { + throw new Error("DB write failed"); + }, + }); + + expect(result).toEqual({ url: "https://cdn/persist-fail.webp" }); + + // Wait for fire-and-forget to complete + await new Promise((resolve) => setTimeout(resolve, 50)); + + expect(errorSpy).toHaveBeenCalledWith( + expect.stringContaining("[cache] db persist error"), + expect.any(Object), + expect.any(Error), + ); + + errorSpy.mockRestore(); + }); }); diff --git a/lib/cache.ts b/lib/cache.ts index a8fc0d56..cbb5d8ed 100644 --- a/lib/cache.ts +++ b/lib/cache.ts @@ -19,11 +19,34 @@ type CachedAssetOptions> = { notFound?: boolean; // true if asset permanently doesn't exist (don't retry) metrics?: TProduceMeta; }>; + /** + * Optional: Fetch cached asset from database (L2 cache) + */ + fetchFromDb?: () => Promise<{ + url: string | null; + key?: string; + notFound?: boolean; + } | null>; + /** + * Optional: Persist generated asset to database + */ + persistToDb?: (result: { + url: string | null; + key?: string; + notFound?: boolean; + }) => Promise; }; /** * Get or create a cached asset (favicon, screenshot, social preview). * + * Caching strategy with optional DB persistence: + * 1. Check Redis (L1 cache) - fastest + * 2. Check Postgres (L2 cache) if fetchFromDb provided - persistent + * 3. Generate asset if both miss + * 4. Persist to Postgres if persistToDb provided + * 5. Cache in Redis for next time + * * Uses simple fail-open caching without distributed locks. If multiple requests * race to generate the same asset, they will all generate it and cache it. * This is acceptable because: @@ -39,9 +62,11 @@ export async function getOrCreateCachedAsset>( indexKey, ttlSeconds = 604800, // 7 days default produceAndUpload, + fetchFromDb, + persistToDb, } = options; - // 1) Check cache first + // 1) Check Redis cache first (L1) try { const raw = (await redis.get(indexKey)) as { url?: unknown; @@ -67,12 +92,65 @@ export async function getOrCreateCachedAsset>( ); } - // 2) Generate asset + // 2) Check Postgres cache if available (L2) + if (fetchFromDb) { + try { + const dbResult = await fetchFromDb(); + if (dbResult) { + // Found in DB, cache it in Redis for next time (fire-and-forget) + const expiresAtMs = Date.now() + ttlSeconds * 1000; + redis + .set( + indexKey, + { + url: dbResult.url, + key: dbResult.key, + notFound: dbResult.notFound ?? undefined, + expiresAtMs, + }, + { ex: ttlSeconds }, + ) + .catch((err) => { + console.error( + "[cache] redis write from db failed", + { indexKey }, + err instanceof Error ? err : new Error(String(err)), + ); + }); + + console.debug(`[cache] db hit ${indexKey}`); + return { url: dbResult.url }; + } + } catch (err) { + // DB failures should not break the flow; log and fall through to generation + console.warn( + `[cache] db read failed ${indexKey}`, + err instanceof Error ? err : new Error(String(err)), + ); + } + } + + // 3) Generate asset (both caches missed or failed) try { const produced = await produceAndUpload(); const expiresAtMs = Date.now() + ttlSeconds * 1000; - // Cache for next time (fire-and-forget) + // 4) Persist to Postgres if callback provided (fire-and-forget) + if (persistToDb) { + persistToDb({ + url: produced.url, + key: produced.key, + notFound: produced.notFound, + }).catch((err) => { + console.error( + "[cache] db persist error", + { indexKey }, + err instanceof Error ? err : new Error(String(err)), + ); + }); + } + + // 5) Cache in Redis for next time (fire-and-forget) redis .set( indexKey, diff --git a/lib/db/pglite.ts b/lib/db/pglite.ts index 6672f6e4..69bd8906 100644 --- a/lib/db/pglite.ts +++ b/lib/db/pglite.ts @@ -56,6 +56,8 @@ export async function resetPGliteDb(): Promise { registrations, hosting, seo, + favicons, + screenshots, providers, domains, } = schema; @@ -65,6 +67,8 @@ export async function resetPGliteDb(): Promise { await db.delete(registrations); await db.delete(hosting); await db.delete(seo); + await db.delete(favicons); + await db.delete(screenshots); await db.delete(providers); await db.delete(domains); } diff --git a/lib/db/repos/domain-helpers.ts b/lib/db/repos/domain-helpers.ts new file mode 100644 index 00000000..62af1ba4 --- /dev/null +++ b/lib/db/repos/domain-helpers.ts @@ -0,0 +1,28 @@ +import "server-only"; +import { getDomainTld } from "rdapper"; +import { upsertDomain } from "@/lib/db/repos/domains"; + +/** + * Parse domain name and ensure a domain record exists in Postgres. + * This is used by services that need to persist data for a domain (favicon, screenshot, etc.) + * even when a full domain report hasn't been requested. + * + * @param domain - The domain name (should already be normalized/registrable) + * @returns The domain record with its ID + */ +export async function ensureDomainRecord(domain: string) { + const tld = getDomainTld(domain) ?? ""; + + // For unicode handling, we'd need to use toUnicode from node:url or a library, + // but for now we'll use the ASCII version as the unicode name if they match + // This is safe because rdapper already normalizes to ASCII/punycode when needed + const unicodeName = domain; + + const domainRecord = await upsertDomain({ + name: domain, + tld, + unicodeName, + }); + + return domainRecord; +} diff --git a/lib/db/repos/favicons.test.ts b/lib/db/repos/favicons.test.ts new file mode 100644 index 00000000..41f3baac --- /dev/null +++ b/lib/db/repos/favicons.test.ts @@ -0,0 +1,181 @@ +/* @vitest-environment node */ +import { + afterAll, + beforeAll, + beforeEach, + describe, + expect, + it, + vi, +} from "vitest"; + +// Mock the DB client before importing anything else +vi.mock("@/lib/db/client", async () => { + const { makePGliteDb } = await import("@/lib/db/pglite"); + const { db } = await makePGliteDb(); + return { db }; +}); + +import { db } from "@/lib/db/client"; +import { favicons } from "@/lib/db/schema"; +import { ttlForFavicon } from "@/lib/db/ttl"; +import { getFaviconByDomainId, upsertFavicon } from "./favicons"; + +let testDomainId: string; + +beforeAll(async () => { + // Create a test domain + const { upsertDomain } = await import("./domains"); + const domain = await upsertDomain({ + name: "test-favicon.com", + tld: "com", + unicodeName: "test-favicon.com", + }); + testDomainId = domain.id; +}); + +afterAll(async () => { + // PGlite cleanup handled automatically +}); + +beforeEach(async () => { + // Clear favicons table before each test + await db.delete(favicons); +}); + +describe("upsertFavicon", () => { + it("inserts a new favicon record", async () => { + const now = new Date(); + const expiresAt = ttlForFavicon(now); + + await upsertFavicon({ + domainId: testDomainId, + url: "https://example.com/favicon.webp", + pathname: "abc123/32x32.webp", + size: 32, + source: "duckduckgo", + notFound: false, + upstreamStatus: 200, + upstreamContentType: "image/x-icon", + fetchedAt: now, + expiresAt, + }); + + const rows = await db.select().from(favicons); + expect(rows).toHaveLength(1); + expect(rows[0]?.url).toBe("https://example.com/favicon.webp"); + expect(rows[0]?.source).toBe("duckduckgo"); + }); + + it("updates an existing favicon record", async () => { + const now = new Date(); + const expiresAt = ttlForFavicon(now); + + // Insert first + await upsertFavicon({ + domainId: testDomainId, + url: "https://example.com/favicon-old.webp", + pathname: "old123/32x32.webp", + size: 32, + source: "google", + notFound: false, + upstreamStatus: 200, + upstreamContentType: "image/x-icon", + fetchedAt: now, + expiresAt, + }); + + // Update with new data + const laterDate = new Date(now.getTime() + 1000); + await upsertFavicon({ + domainId: testDomainId, + url: "https://example.com/favicon-new.webp", + pathname: "new123/32x32.webp", + size: 32, + source: "duckduckgo", + notFound: false, + upstreamStatus: 200, + upstreamContentType: "image/webp", + fetchedAt: laterDate, + expiresAt, + }); + + const rows = await db.select().from(favicons); + expect(rows).toHaveLength(1); + expect(rows[0]?.url).toBe("https://example.com/favicon-new.webp"); + expect(rows[0]?.source).toBe("duckduckgo"); + }); + + it("handles notFound flag", async () => { + const now = new Date(); + const expiresAt = ttlForFavicon(now); + + await upsertFavicon({ + domainId: testDomainId, + url: null, + pathname: null, + size: 32, + source: null, + notFound: true, + upstreamStatus: null, + upstreamContentType: null, + fetchedAt: now, + expiresAt, + }); + + const rows = await db.select().from(favicons); + expect(rows).toHaveLength(1); + expect(rows[0]?.notFound).toBe(true); + expect(rows[0]?.url).toBeNull(); + }); +}); + +describe("getFaviconByDomainId", () => { + it("returns null when domain has no favicon", async () => { + const result = await getFaviconByDomainId(testDomainId); + expect(result).toBeNull(); + }); + + it("returns favicon when not expired", async () => { + const now = new Date(); + const expiresAt = new Date(now.getTime() + 3600 * 1000); // 1 hour from now + + await upsertFavicon({ + domainId: testDomainId, + url: "https://example.com/favicon.webp", + pathname: "abc123/32x32.webp", + size: 32, + source: "duckduckgo", + notFound: false, + upstreamStatus: 200, + upstreamContentType: "image/x-icon", + fetchedAt: now, + expiresAt, + }); + + const result = await getFaviconByDomainId(testDomainId); + expect(result).not.toBeNull(); + expect(result?.url).toBe("https://example.com/favicon.webp"); + }); + + it("returns null when favicon is expired", async () => { + const now = new Date(); + const expiresAt = new Date(now.getTime() - 1000); // 1 second ago + + await upsertFavicon({ + domainId: testDomainId, + url: "https://example.com/favicon.webp", + pathname: "abc123/32x32.webp", + size: 32, + source: "duckduckgo", + notFound: false, + upstreamStatus: 200, + upstreamContentType: "image/x-icon", + fetchedAt: now, + expiresAt, + }); + + const result = await getFaviconByDomainId(testDomainId); + expect(result).toBeNull(); + }); +}); diff --git a/lib/db/repos/favicons.ts b/lib/db/repos/favicons.ts new file mode 100644 index 00000000..2cc47d0d --- /dev/null +++ b/lib/db/repos/favicons.ts @@ -0,0 +1,29 @@ +import "server-only"; +import type { InferInsertModel } from "drizzle-orm"; +import { and, eq, gt } from "drizzle-orm"; +import { db } from "@/lib/db/client"; +import { favicons } from "@/lib/db/schema"; +import { FaviconInsert as FaviconInsertSchema } from "@/lib/db/zod"; + +type FaviconInsert = InferInsertModel; + +export async function upsertFavicon(params: FaviconInsert) { + const insertRow = FaviconInsertSchema.parse(params); + await db.insert(favicons).values(insertRow).onConflictDoUpdate({ + target: favicons.domainId, + set: insertRow, + }); +} + +/** + * Fetch favicon record for a domain, returning null if expired or not found. + */ +export async function getFaviconByDomainId(domainId: string) { + const now = new Date(); + const rows = await db + .select() + .from(favicons) + .where(and(eq(favicons.domainId, domainId), gt(favicons.expiresAt, now))) + .limit(1); + return rows[0] ?? null; +} diff --git a/lib/db/repos/screenshots.test.ts b/lib/db/repos/screenshots.test.ts new file mode 100644 index 00000000..2a62392c --- /dev/null +++ b/lib/db/repos/screenshots.test.ts @@ -0,0 +1,176 @@ +/* @vitest-environment node */ +import { + afterAll, + beforeAll, + beforeEach, + describe, + expect, + it, + vi, +} from "vitest"; + +// Mock the DB client before importing anything else +vi.mock("@/lib/db/client", async () => { + const { makePGliteDb } = await import("@/lib/db/pglite"); + const { db } = await makePGliteDb(); + return { db }; +}); + +import { db } from "@/lib/db/client"; +import { screenshots } from "@/lib/db/schema"; +import { ttlForScreenshot } from "@/lib/db/ttl"; +import { getScreenshotByDomainId, upsertScreenshot } from "./screenshots"; + +let testDomainId: string; + +beforeAll(async () => { + // Create a test domain + const { upsertDomain } = await import("./domains"); + const domain = await upsertDomain({ + name: "test-screenshot.com", + tld: "com", + unicodeName: "test-screenshot.com", + }); + testDomainId = domain.id; +}); + +afterAll(async () => { + // PGlite cleanup handled automatically +}); + +beforeEach(async () => { + // Clear screenshots table before each test + await db.delete(screenshots); +}); + +describe("upsertScreenshot", () => { + it("inserts a new screenshot record", async () => { + const now = new Date(); + const expiresAt = ttlForScreenshot(now); + + await upsertScreenshot({ + domainId: testDomainId, + url: "https://example.com/screenshot.webp", + pathname: "abc123/1200x630.webp", + width: 1200, + height: 630, + source: "direct_https", + notFound: false, + fetchedAt: now, + expiresAt, + }); + + const rows = await db.select().from(screenshots); + expect(rows).toHaveLength(1); + expect(rows[0]?.url).toBe("https://example.com/screenshot.webp"); + expect(rows[0]?.width).toBe(1200); + expect(rows[0]?.height).toBe(630); + }); + + it("updates an existing screenshot record", async () => { + const now = new Date(); + const expiresAt = ttlForScreenshot(now); + + // Insert first + await upsertScreenshot({ + domainId: testDomainId, + url: "https://example.com/screenshot-old.webp", + pathname: "old123/1200x630.webp", + width: 1200, + height: 630, + source: "direct_http", + notFound: false, + fetchedAt: now, + expiresAt, + }); + + // Update with new data + const laterDate = new Date(now.getTime() + 1000); + await upsertScreenshot({ + domainId: testDomainId, + url: "https://example.com/screenshot-new.webp", + pathname: "new123/1200x630.webp", + width: 1200, + height: 630, + source: "direct_https", + notFound: false, + fetchedAt: laterDate, + expiresAt, + }); + + const rows = await db.select().from(screenshots); + expect(rows).toHaveLength(1); + expect(rows[0]?.url).toBe("https://example.com/screenshot-new.webp"); + expect(rows[0]?.source).toBe("direct_https"); + }); + + it("handles notFound flag", async () => { + const now = new Date(); + const expiresAt = ttlForScreenshot(now); + + await upsertScreenshot({ + domainId: testDomainId, + url: null, + pathname: null, + width: 1200, + height: 630, + source: null, + notFound: true, + fetchedAt: now, + expiresAt, + }); + + const rows = await db.select().from(screenshots); + expect(rows).toHaveLength(1); + expect(rows[0]?.notFound).toBe(true); + expect(rows[0]?.url).toBeNull(); + }); +}); + +describe("getScreenshotByDomainId", () => { + it("returns null when domain has no screenshot", async () => { + const result = await getScreenshotByDomainId(testDomainId); + expect(result).toBeNull(); + }); + + it("returns screenshot when not expired", async () => { + const now = new Date(); + const expiresAt = new Date(now.getTime() + 3600 * 1000); // 1 hour from now + + await upsertScreenshot({ + domainId: testDomainId, + url: "https://example.com/screenshot.webp", + pathname: "abc123/1200x630.webp", + width: 1200, + height: 630, + source: "direct_https", + notFound: false, + fetchedAt: now, + expiresAt, + }); + + const result = await getScreenshotByDomainId(testDomainId); + expect(result).not.toBeNull(); + expect(result?.url).toBe("https://example.com/screenshot.webp"); + }); + + it("returns null when screenshot is expired", async () => { + const now = new Date(); + const expiresAt = new Date(now.getTime() - 1000); // 1 second ago + + await upsertScreenshot({ + domainId: testDomainId, + url: "https://example.com/screenshot.webp", + pathname: "abc123/1200x630.webp", + width: 1200, + height: 630, + source: "direct_https", + notFound: false, + fetchedAt: now, + expiresAt, + }); + + const result = await getScreenshotByDomainId(testDomainId); + expect(result).toBeNull(); + }); +}); diff --git a/lib/db/repos/screenshots.ts b/lib/db/repos/screenshots.ts new file mode 100644 index 00000000..c2c9ff88 --- /dev/null +++ b/lib/db/repos/screenshots.ts @@ -0,0 +1,31 @@ +import "server-only"; +import type { InferInsertModel } from "drizzle-orm"; +import { and, eq, gt } from "drizzle-orm"; +import { db } from "@/lib/db/client"; +import { screenshots } from "@/lib/db/schema"; +import { ScreenshotInsert as ScreenshotInsertSchema } from "@/lib/db/zod"; + +type ScreenshotInsert = InferInsertModel; + +export async function upsertScreenshot(params: ScreenshotInsert) { + const insertRow = ScreenshotInsertSchema.parse(params); + await db.insert(screenshots).values(insertRow).onConflictDoUpdate({ + target: screenshots.domainId, + set: insertRow, + }); +} + +/** + * Fetch screenshot record for a domain, returning null if expired or not found. + */ +export async function getScreenshotByDomainId(domainId: string) { + const now = new Date(); + const rows = await db + .select() + .from(screenshots) + .where( + and(eq(screenshots.domainId, domainId), gt(screenshots.expiresAt, now)), + ) + .limit(1); + return rows[0] ?? null; +} diff --git a/lib/db/schema.ts b/lib/db/schema.ts index 45b85709..bc71f9c9 100644 --- a/lib/db/schema.ts +++ b/lib/db/schema.ts @@ -291,3 +291,42 @@ export const seo = pgTable( index("i_seo_canonical").on(t.canonicalUrl), ], ); + +// Favicons +export const favicons = pgTable( + "favicons", + { + domainId: uuid("domain_id") + .primaryKey() + .references(() => domains.id, { onDelete: "cascade" }), + url: text("url"), + pathname: text("pathname"), + size: integer("size").notNull(), + source: text("source"), + notFound: boolean("not_found").notNull().default(false), + upstreamStatus: integer("upstream_status"), + upstreamContentType: text("upstream_content_type"), + fetchedAt: timestamp("fetched_at", { withTimezone: true }).notNull(), + expiresAt: timestamp("expires_at", { withTimezone: true }).notNull(), + }, + (t) => [index("i_favicons_expires").on(t.expiresAt)], +); + +// Screenshots +export const screenshots = pgTable( + "screenshots", + { + domainId: uuid("domain_id") + .primaryKey() + .references(() => domains.id, { onDelete: "cascade" }), + url: text("url"), + pathname: text("pathname"), + width: integer("width").notNull(), + height: integer("height").notNull(), + source: text("source"), + notFound: boolean("not_found").notNull().default(false), + fetchedAt: timestamp("fetched_at", { withTimezone: true }).notNull(), + expiresAt: timestamp("expires_at", { withTimezone: true }).notNull(), + }, + (t) => [index("i_screenshots_expires").on(t.expiresAt)], +); diff --git a/lib/db/ttl.ts b/lib/db/ttl.ts index 2224dc88..2b42c66c 100644 --- a/lib/db/ttl.ts +++ b/lib/db/ttl.ts @@ -4,11 +4,13 @@ import { TTL_CERTIFICATES_WINDOW, TTL_DNS_DEFAULT, TTL_DNS_MAX, + TTL_FAVICON, TTL_HEADERS, TTL_HOSTING, TTL_REGISTRATION_EXPIRY_THRESHOLD, TTL_REGISTRATION_NEAR_EXPIRY, TTL_REGISTRATION_REGISTERED, + TTL_SCREENSHOT, TTL_SEO, } from "@/lib/constants"; @@ -72,3 +74,11 @@ export function ttlForHosting(now: Date): Date { export function ttlForSeo(now: Date): Date { return addSeconds(now, TTL_SEO); } + +export function ttlForFavicon(now: Date): Date { + return addSeconds(now, TTL_FAVICON); +} + +export function ttlForScreenshot(now: Date): Date { + return addSeconds(now, TTL_SCREENSHOT); +} diff --git a/lib/db/zod.ts b/lib/db/zod.ts index b09240bf..18d33aa4 100644 --- a/lib/db/zod.ts +++ b/lib/db/zod.ts @@ -5,10 +5,12 @@ import { dnsRecords, dnsRecordType, domains, + favicons, hosting, httpHeaders, providers, registrations, + screenshots, seo, } from "@/lib/db/schema"; @@ -58,3 +60,13 @@ export const HostingRowUpdate = zWrite.createUpdateSchema(hosting); export const SeoRowSelect = zRead.createSelectSchema(seo); export const SeoRowInsert = zWrite.createInsertSchema(seo); export const SeoRowUpdate = zWrite.createUpdateSchema(seo); + +// Favicons +export const FaviconSelect = zRead.createSelectSchema(favicons); +export const FaviconInsert = zWrite.createInsertSchema(favicons); +export const FaviconUpdate = zWrite.createUpdateSchema(favicons); + +// Screenshots +export const ScreenshotSelect = zRead.createSelectSchema(screenshots); +export const ScreenshotInsert = zWrite.createInsertSchema(screenshots); +export const ScreenshotUpdate = zWrite.createUpdateSchema(screenshots); diff --git a/server/services/favicon.test.ts b/server/services/favicon.test.ts index 1d7bcd73..cbd2fd11 100644 --- a/server/services/favicon.test.ts +++ b/server/services/favicon.test.ts @@ -1,6 +1,24 @@ /* @vitest-environment node */ import { afterEach, beforeAll, describe, expect, it, vi } from "vitest"; +// Mock toRegistrableDomain to allow .invalid and .example domains for testing +vi.mock("@/lib/domain-server", async () => { + const actual = await vi.importActual( + "@/lib/domain-server", + ); + return { + ...actual, + toRegistrableDomain: (input: string) => { + // Allow .invalid and .example domains (reserved, never resolve) for safe testing + if (input.endsWith(".invalid") || input.endsWith(".example")) { + return input.toLowerCase(); + } + // Use real implementation for everything else + return actual.toRegistrableDomain(input); + }, + }; +}); + const storageMock = vi.hoisted(() => ({ storeImage: vi.fn(async () => ({ url: "https://test-store.public.blob.vercel-storage.com/abcdef0123456789abcdef0123456789/32x32.webp", @@ -24,6 +42,9 @@ vi.mock("sharp", () => ({ })); beforeAll(async () => { + const { makePGliteDb } = await import("@/lib/db/pglite"); + const { db } = await makePGliteDb(); + vi.doMock("@/lib/db/client", () => ({ db })); const { makeInMemoryRedis } = await import("@/lib/redis-mock"); const impl = makeInMemoryRedis(); vi.doMock("@/lib/redis", () => impl); @@ -38,6 +59,8 @@ beforeAll(async () => { afterEach(async () => { vi.restoreAllMocks(); storageMock.storeImage.mockReset(); + const { resetPGliteDb } = await import("@/lib/db/pglite"); + await resetPGliteDb(); const { resetInMemoryRedis } = await import("@/lib/redis-mock"); resetInMemoryRedis(); }); diff --git a/server/services/favicon.ts b/server/services/favicon.ts index 64c1b454..38c3de23 100644 --- a/server/services/favicon.ts +++ b/server/services/favicon.ts @@ -1,5 +1,10 @@ import { getOrCreateCachedAsset } from "@/lib/cache"; import { TTL_FAVICON, USER_AGENT } from "@/lib/constants"; +import { ensureDomainRecord } from "@/lib/db/repos/domain-helpers"; +import { findDomainByName } from "@/lib/db/repos/domains"; +import { getFaviconByDomainId, upsertFavicon } from "@/lib/db/repos/favicons"; +import { ttlForFavicon } from "@/lib/db/ttl"; +import { toRegistrableDomain } from "@/lib/domain-server"; import { fetchWithTimeout } from "@/lib/fetch"; import { convertBufferToImageCover } from "@/lib/image"; import { ns } from "@/lib/redis"; @@ -21,14 +26,34 @@ function buildSources(domain: string): string[] { export async function getOrCreateFaviconBlobUrl( domain: string, ): Promise<{ url: string | null }> { - const indexKey = ns("favicon", "url", domain, String(DEFAULT_SIZE)); + // Normalize to registrable domain + const registrable = toRegistrableDomain(domain); + if (!registrable) { + throw new Error(`Cannot extract registrable domain from ${domain}`); + } + + const indexKey = ns("favicon", "url", registrable, String(DEFAULT_SIZE)); const ttl = TTL_FAVICON; return await getOrCreateCachedAsset({ indexKey, ttlSeconds: ttl, + // Check Postgres for cached favicon + fetchFromDb: async () => { + const existingDomain = await findDomainByName(registrable); + if (!existingDomain) return null; + + const faviconRecord = await getFaviconByDomainId(existingDomain.id); + if (!faviconRecord) return null; + + return { + url: faviconRecord.url, + key: faviconRecord.pathname ?? undefined, + notFound: faviconRecord.notFound, + }; + }, produceAndUpload: async () => { - const sources = buildSources(domain); + const sources = buildSources(registrable); let allNotFound = true; // Track if all sources returned 404/not found for (const src of sources) { @@ -64,7 +89,7 @@ export async function getOrCreateFaviconBlobUrl( if (!webp) continue; const { url, pathname } = await storeImage({ kind: "favicon", - domain, + domain: registrable, buffer: webp, width: DEFAULT_SIZE, height: DEFAULT_SIZE, @@ -94,5 +119,24 @@ export async function getOrCreateFaviconBlobUrl( // Return null with notFound flag if ALL sources returned 404 return { url: null, notFound: allNotFound }; }, + // Persist to Postgres after generation + persistToDb: async (result) => { + const domainRecord = await ensureDomainRecord(registrable); + const now = new Date(); + const expiresAt = ttlForFavicon(now); + + await upsertFavicon({ + domainId: domainRecord.id, + url: result.url, + pathname: result.key ?? null, + size: DEFAULT_SIZE, + source: null, // Will be set from metrics if available + notFound: result.notFound ?? false, + upstreamStatus: null, + upstreamContentType: null, + fetchedAt: now, + expiresAt, + }); + }, }); } diff --git a/server/services/screenshot.test.ts b/server/services/screenshot.test.ts index 0c76f623..f4bd206e 100644 --- a/server/services/screenshot.test.ts +++ b/server/services/screenshot.test.ts @@ -51,6 +51,9 @@ vi.mock("@/lib/image", () => ({ let getOrCreateScreenshotBlobUrl: typeof import("./screenshot").getOrCreateScreenshotBlobUrl; beforeAll(async () => { + const { makePGliteDb } = await import("@/lib/db/pglite"); + const { db } = await makePGliteDb(); + vi.doMock("@/lib/db/client", () => ({ db })); const { makeInMemoryRedis } = await import("@/lib/redis-mock"); const impl = makeInMemoryRedis(); vi.doMock("@/lib/redis", () => impl); @@ -64,6 +67,8 @@ beforeEach(() => { afterEach(async () => { vi.restoreAllMocks(); storageMock.storeImage.mockReset(); + const { resetPGliteDb } = await import("@/lib/db/pglite"); + await resetPGliteDb(); const { resetInMemoryRedis } = await import("@/lib/redis-mock"); resetInMemoryRedis(); pageMock.goto.mockReset(); diff --git a/server/services/screenshot.ts b/server/services/screenshot.ts index 41c75fe3..994619c4 100644 --- a/server/services/screenshot.ts +++ b/server/services/screenshot.ts @@ -1,6 +1,14 @@ import type { Browser } from "puppeteer-core"; import { getOrCreateCachedAsset } from "@/lib/cache"; import { TTL_SCREENSHOT, USER_AGENT } from "@/lib/constants"; +import { ensureDomainRecord } from "@/lib/db/repos/domain-helpers"; +import { findDomainByName } from "@/lib/db/repos/domains"; +import { + getScreenshotByDomainId, + upsertScreenshot, +} from "@/lib/db/repos/screenshots"; +import { ttlForScreenshot } from "@/lib/db/ttl"; +import { toRegistrableDomain } from "@/lib/domain-server"; import { addWatermarkToScreenshot, optimizeImageCover } from "@/lib/image"; import { launchChromium } from "@/lib/puppeteer"; import { ns } from "@/lib/redis"; @@ -41,6 +49,12 @@ export async function getOrCreateScreenshotBlobUrl( backoffMaxMs?: number; }, ): Promise<{ url: string | null }> { + // Normalize to registrable domain + const registrable = toRegistrableDomain(domain); + if (!registrable) { + throw new Error(`Cannot extract registrable domain from ${domain}`); + } + const attempts = Math.max( 1, options?.attempts ?? CAPTURE_MAX_ATTEMPTS_DEFAULT, @@ -51,7 +65,7 @@ export async function getOrCreateScreenshotBlobUrl( const indexKey = ns( "screenshot", "url", - domain, + registrable, `${VIEWPORT_WIDTH}x${VIEWPORT_HEIGHT}`, ); const ttl = TTL_SCREENSHOT; @@ -59,11 +73,25 @@ export async function getOrCreateScreenshotBlobUrl( return await getOrCreateCachedAsset({ indexKey, ttlSeconds: ttl, + // Check Postgres for cached screenshot + fetchFromDb: async () => { + const existingDomain = await findDomainByName(registrable); + if (!existingDomain) return null; + + const screenshotRecord = await getScreenshotByDomainId(existingDomain.id); + if (!screenshotRecord) return null; + + return { + url: screenshotRecord.url, + key: screenshotRecord.pathname ?? undefined, + notFound: screenshotRecord.notFound, + }; + }, produceAndUpload: async () => { let browser: Browser | null = null; try { browser = await launchChromium(); - const tryUrls = buildHomepageUrls(domain); + const tryUrls = buildHomepageUrls(registrable); for (const url of tryUrls) { let lastError: unknown = null; for (let attemptIndex = 0; attemptIndex < attempts; attemptIndex++) { @@ -109,7 +137,7 @@ export async function getOrCreateScreenshotBlobUrl( ); const { url: storedUrl, pathname } = await storeImage({ kind: "screenshot", - domain, + domain: registrable, buffer: withWatermark, width: VIEWPORT_WIDTH, height: VIEWPORT_HEIGHT, @@ -148,5 +176,23 @@ export async function getOrCreateScreenshotBlobUrl( } } }, + // Persist to Postgres after generation + persistToDb: async (result) => { + const domainRecord = await ensureDomainRecord(registrable); + const now = new Date(); + const expiresAt = ttlForScreenshot(now); + + await upsertScreenshot({ + domainId: domainRecord.id, + url: result.url, + pathname: result.key ?? null, + width: VIEWPORT_WIDTH, + height: VIEWPORT_HEIGHT, + source: null, // Will be set from metrics if available + notFound: result.notFound ?? false, + fetchedAt: now, + expiresAt, + }); + }, }); } From ce9dd0e14107f679b5b496d31c847178047a3b0a Mon Sep 17 00:00:00 2001 From: Jake Jarvis Date: Sat, 15 Nov 2025 18:46:52 -0500 Subject: [PATCH 02/11] refactor: consolidate domain record management by moving ensureDomainRecord to domains.ts - Removed domain-helpers.ts and migrated the ensureDomainRecord function to domains.ts for better organization. - Updated service files to import ensureDomainRecord from the new location, maintaining functionality for favicon and screenshot services. --- lib/db/repos/domain-helpers.ts | 28 ---------------------------- lib/db/repos/domains.ts | 26 ++++++++++++++++++++++++++ server/services/favicon.ts | 3 +-- server/services/screenshot.ts | 3 +-- 4 files changed, 28 insertions(+), 32 deletions(-) delete mode 100644 lib/db/repos/domain-helpers.ts diff --git a/lib/db/repos/domain-helpers.ts b/lib/db/repos/domain-helpers.ts deleted file mode 100644 index 62af1ba4..00000000 --- a/lib/db/repos/domain-helpers.ts +++ /dev/null @@ -1,28 +0,0 @@ -import "server-only"; -import { getDomainTld } from "rdapper"; -import { upsertDomain } from "@/lib/db/repos/domains"; - -/** - * Parse domain name and ensure a domain record exists in Postgres. - * This is used by services that need to persist data for a domain (favicon, screenshot, etc.) - * even when a full domain report hasn't been requested. - * - * @param domain - The domain name (should already be normalized/registrable) - * @returns The domain record with its ID - */ -export async function ensureDomainRecord(domain: string) { - const tld = getDomainTld(domain) ?? ""; - - // For unicode handling, we'd need to use toUnicode from node:url or a library, - // but for now we'll use the ASCII version as the unicode name if they match - // This is safe because rdapper already normalizes to ASCII/punycode when needed - const unicodeName = domain; - - const domainRecord = await upsertDomain({ - name: domain, - tld, - unicodeName, - }); - - return domainRecord; -} diff --git a/lib/db/repos/domains.ts b/lib/db/repos/domains.ts index 518cb6dd..8d85f674 100644 --- a/lib/db/repos/domains.ts +++ b/lib/db/repos/domains.ts @@ -1,6 +1,7 @@ import "server-only"; import { eq, inArray, sql } from "drizzle-orm"; +import { getDomainTld } from "rdapper"; import { db } from "@/lib/db/client"; import { domains } from "@/lib/db/schema"; @@ -42,6 +43,31 @@ export async function findDomainByName(name: string) { return rows[0] ?? null; } +/** + * Parse domain name and ensure a domain record exists in Postgres. + * This is used by services that need to persist data for a domain (favicon, screenshot, etc.) + * even when a full domain report hasn't been requested. + * + * @param domain - The domain name (should already be normalized/registrable) + * @returns The domain record with its ID + */ +export async function ensureDomainRecord(domain: string) { + const tld = getDomainTld(domain) ?? ""; + + // For unicode handling, we'd need to use toUnicode from node:url or a library, + // but for now we'll use the ASCII version as the unicode name if they match + // This is safe because rdapper already normalizes to ASCII/punycode when needed + const unicodeName = domain; + + const domainRecord = await upsertDomain({ + name: domain, + tld, + unicodeName, + }); + + return domainRecord; +} + /** * Batch update lastAccessedAt timestamps for multiple domains. * Used by the access sync cron to flush Redis data to Postgres. diff --git a/server/services/favicon.ts b/server/services/favicon.ts index 38c3de23..a4144441 100644 --- a/server/services/favicon.ts +++ b/server/services/favicon.ts @@ -1,7 +1,6 @@ import { getOrCreateCachedAsset } from "@/lib/cache"; import { TTL_FAVICON, USER_AGENT } from "@/lib/constants"; -import { ensureDomainRecord } from "@/lib/db/repos/domain-helpers"; -import { findDomainByName } from "@/lib/db/repos/domains"; +import { ensureDomainRecord, findDomainByName } from "@/lib/db/repos/domains"; import { getFaviconByDomainId, upsertFavicon } from "@/lib/db/repos/favicons"; import { ttlForFavicon } from "@/lib/db/ttl"; import { toRegistrableDomain } from "@/lib/domain-server"; diff --git a/server/services/screenshot.ts b/server/services/screenshot.ts index 994619c4..f3ba3806 100644 --- a/server/services/screenshot.ts +++ b/server/services/screenshot.ts @@ -1,8 +1,7 @@ import type { Browser } from "puppeteer-core"; import { getOrCreateCachedAsset } from "@/lib/cache"; import { TTL_SCREENSHOT, USER_AGENT } from "@/lib/constants"; -import { ensureDomainRecord } from "@/lib/db/repos/domain-helpers"; -import { findDomainByName } from "@/lib/db/repos/domains"; +import { ensureDomainRecord, findDomainByName } from "@/lib/db/repos/domains"; import { getScreenshotByDomainId, upsertScreenshot, From 6ac2bb27dafc1eb37f29b2fe35b6dd9063e7e8d1 Mon Sep 17 00:00:00 2001 From: Jake Jarvis Date: Sat, 15 Nov 2025 18:49:04 -0500 Subject: [PATCH 03/11] feat: enhance caching logic to include metrics in asset retrieval - Added metrics parameter to CachedAssetOptions for better tracking of asset performance. - Updated getOrCreateCachedAsset to utilize metrics when persisting cached assets. - Modified favicon and screenshot services to extract relevant metrics from results, improving data handling and source tracking. --- lib/cache.ts | 2 ++ server/services/favicon.ts | 6 +++--- server/services/screenshot.ts | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/lib/cache.ts b/lib/cache.ts index cbb5d8ed..683eeb04 100644 --- a/lib/cache.ts +++ b/lib/cache.ts @@ -34,6 +34,7 @@ type CachedAssetOptions> = { url: string | null; key?: string; notFound?: boolean; + metrics?: TProduceMeta; }) => Promise; }; @@ -141,6 +142,7 @@ export async function getOrCreateCachedAsset>( url: produced.url, key: produced.key, notFound: produced.notFound, + metrics: produced.metrics, }).catch((err) => { console.error( "[cache] db persist error", diff --git a/server/services/favicon.ts b/server/services/favicon.ts index a4144441..f4615860 100644 --- a/server/services/favicon.ts +++ b/server/services/favicon.ts @@ -129,10 +129,10 @@ export async function getOrCreateFaviconBlobUrl( url: result.url, pathname: result.key ?? null, size: DEFAULT_SIZE, - source: null, // Will be set from metrics if available + source: result.metrics?.source ?? null, notFound: result.notFound ?? false, - upstreamStatus: null, - upstreamContentType: null, + upstreamStatus: result.metrics?.upstream_status ?? null, + upstreamContentType: result.metrics?.upstream_content_type ?? null, fetchedAt: now, expiresAt, }); diff --git a/server/services/screenshot.ts b/server/services/screenshot.ts index f3ba3806..e9e7c7ac 100644 --- a/server/services/screenshot.ts +++ b/server/services/screenshot.ts @@ -187,7 +187,7 @@ export async function getOrCreateScreenshotBlobUrl( pathname: result.key ?? null, width: VIEWPORT_WIDTH, height: VIEWPORT_HEIGHT, - source: null, // Will be set from metrics if available + source: result.metrics?.source ?? null, notFound: result.notFound ?? false, fetchedAt: now, expiresAt, From 9ccfa3d7b3f5a89c37ad9f0ea177063a4d408d47 Mon Sep 17 00:00:00 2001 From: Jake Jarvis Date: Sat, 15 Nov 2025 18:50:33 -0500 Subject: [PATCH 04/11] refactor: enhance upsert functions for favicons and screenshots to return inserted rows - Updated upsertFavicon and upsertScreenshot functions to return the inserted row or null, improving data handling. - Added InferSelectModel type for favicons and screenshots to support the new return type. --- lib/db/repos/favicons.ts | 20 ++++++++++++++------ lib/db/repos/screenshots.ts | 20 ++++++++++++++------ 2 files changed, 28 insertions(+), 12 deletions(-) diff --git a/lib/db/repos/favicons.ts b/lib/db/repos/favicons.ts index 2cc47d0d..42ef4f3b 100644 --- a/lib/db/repos/favicons.ts +++ b/lib/db/repos/favicons.ts @@ -1,18 +1,26 @@ import "server-only"; -import type { InferInsertModel } from "drizzle-orm"; +import type { InferInsertModel, InferSelectModel } from "drizzle-orm"; import { and, eq, gt } from "drizzle-orm"; import { db } from "@/lib/db/client"; import { favicons } from "@/lib/db/schema"; import { FaviconInsert as FaviconInsertSchema } from "@/lib/db/zod"; type FaviconInsert = InferInsertModel; +type Favicon = InferSelectModel; -export async function upsertFavicon(params: FaviconInsert) { +export async function upsertFavicon( + params: FaviconInsert, +): Promise { const insertRow = FaviconInsertSchema.parse(params); - await db.insert(favicons).values(insertRow).onConflictDoUpdate({ - target: favicons.domainId, - set: insertRow, - }); + const rows = await db + .insert(favicons) + .values(insertRow) + .onConflictDoUpdate({ + target: favicons.domainId, + set: insertRow, + }) + .returning(); + return rows[0] ?? null; } /** diff --git a/lib/db/repos/screenshots.ts b/lib/db/repos/screenshots.ts index c2c9ff88..69476de3 100644 --- a/lib/db/repos/screenshots.ts +++ b/lib/db/repos/screenshots.ts @@ -1,18 +1,26 @@ import "server-only"; -import type { InferInsertModel } from "drizzle-orm"; +import type { InferInsertModel, InferSelectModel } from "drizzle-orm"; import { and, eq, gt } from "drizzle-orm"; import { db } from "@/lib/db/client"; import { screenshots } from "@/lib/db/schema"; import { ScreenshotInsert as ScreenshotInsertSchema } from "@/lib/db/zod"; type ScreenshotInsert = InferInsertModel; +type Screenshot = InferSelectModel; -export async function upsertScreenshot(params: ScreenshotInsert) { +export async function upsertScreenshot( + params: ScreenshotInsert, +): Promise { const insertRow = ScreenshotInsertSchema.parse(params); - await db.insert(screenshots).values(insertRow).onConflictDoUpdate({ - target: screenshots.domainId, - set: insertRow, - }); + const rows = await db + .insert(screenshots) + .values(insertRow) + .onConflictDoUpdate({ + target: screenshots.domainId, + set: insertRow, + }) + .returning(); + return rows[0] ?? null; } /** From 9cdb3fefda1f072836b86b5b2a720a607b06b823 Mon Sep 17 00:00:00 2001 From: Jake Jarvis Date: Sat, 15 Nov 2025 19:08:14 -0500 Subject: [PATCH 05/11] refactor: remove access-sync cron job and update domain access handling - Deleted the access-sync cron job from vercel.json and removed the corresponding route implementation. - Updated the recordDomainAccess function to debounce database writes, ensuring updates occur only if more than 5 minutes have passed since the last attempt. - Introduced a new updateLastAccessed function to handle individual domain updates, reducing unnecessary writes to the database. --- app/api/cron/access-sync/route.ts | 135 ------------------------------ lib/access.test.ts | 6 ++ lib/access.ts | 45 ++++++---- lib/db/repos/domains.ts | 49 +++++------ vercel.json | 4 - 5 files changed, 62 insertions(+), 177 deletions(-) delete mode 100644 app/api/cron/access-sync/route.ts diff --git a/app/api/cron/access-sync/route.ts b/app/api/cron/access-sync/route.ts deleted file mode 100644 index f18dbef2..00000000 --- a/app/api/cron/access-sync/route.ts +++ /dev/null @@ -1,135 +0,0 @@ -import { NextResponse } from "next/server"; -import { batchUpdateLastAccessed } from "@/lib/db/repos/domains"; -import { ns, redis } from "@/lib/redis"; - -export async function GET(request: Request) { - // Verify Vercel cron secret - const authHeader = request.headers.get("authorization"); - const expectedAuth = process.env.CRON_SECRET - ? `Bearer ${process.env.CRON_SECRET}` - : null; - - if (!expectedAuth) { - return NextResponse.json( - { error: "CRON_SECRET not configured" }, - { status: 500 }, - ); - } - - if (authHeader !== expectedAuth) { - return NextResponse.json({ error: "Unauthorized" }, { status: 401 }); - } - - try { - const startedAt = Date.now(); - - // Scan for all access keys using incremental SCAN (non-blocking) - const pattern = ns("access", "domain", "*"); - const keys: string[] = []; - let cursor = "0"; - - // Iterate with SCAN until cursor returns to "0" - do { - const result = await redis.scan(cursor, { - match: pattern, - count: 100, // Reasonable batch size for each scan iteration - }); - cursor = result[0]; - keys.push(...result[1]); - } while (cursor !== "0"); - - console.info(`[access-sync] found ${keys.length} access keys to process`); - - if (keys.length === 0) { - return NextResponse.json({ - success: true, - synced: 0, - message: "no access data to sync", - }); - } - - // Atomically read and delete each key to avoid race conditions - // Using GETDEL ensures that if a fresh write happens after we read, - // it won't be deleted (it will create a new key that survives this sync) - // Batch read-and-delete all keys using pipeline for efficiency - const pipeline = redis.pipeline(); - for (const key of keys) { - pipeline.getdel(key); - } - const results = await pipeline.exec>(); - - const updates: Array<{ name: string; accessedAt: Date }> = []; - const prefix = "access:domain:"; - - for (let i = 0; i < keys.length; i++) { - const key = keys[i]; - const rawTimestamp = results[i]; - - // Redis returns string values, so coerce to number - const timestamp = Number(rawTimestamp); - - if (!Number.isFinite(timestamp)) { - continue; - } - - // Extract domain name from key: "access:domain:{domain}" - // Use prefix detection to preserve domains with colons (e.g., example.com:8080) - if (!key.startsWith(prefix)) { - continue; - } - const domain = key.substring(prefix.length); - - updates.push({ - name: domain, - accessedAt: new Date(timestamp), - }); - } - - if (updates.length === 0) { - console.warn( - `[access-sync] found ${keys.length} keys but no valid data to sync`, - ); - return NextResponse.json({ - success: true, - synced: 0, - message: "no valid access data to sync", - }); - } - - console.info( - `[access-sync] prepared ${updates.length} updates from ${keys.length} keys`, - ); - - // Batch update to Postgres (100 at a time to avoid overwhelming DB) - const BATCH_SIZE = 100; - let synced = 0; - - for (let i = 0; i < updates.length; i += BATCH_SIZE) { - const chunk = updates.slice(i, i + BATCH_SIZE); - await batchUpdateLastAccessed(chunk); - synced += chunk.length; - } - - console.info( - `[access-sync] ok synced=${synced} ${Date.now() - startedAt}ms`, - ); - - return NextResponse.json({ - success: true, - synced, - durationMs: Date.now() - startedAt, - }); - } catch (err) { - console.error( - "[access-sync] cron failed", - err instanceof Error ? err : new Error(String(err)), - ); - return NextResponse.json( - { - error: "Internal error", - message: err instanceof Error ? err.message : "unknown", - }, - { status: 500 }, - ); - } -} diff --git a/lib/access.test.ts b/lib/access.test.ts index 82a28e9f..a544cd63 100644 --- a/lib/access.test.ts +++ b/lib/access.test.ts @@ -8,6 +8,12 @@ import { vi, } from "vitest"; +// Mock the domains repo to avoid DB dependency in these tests +// (only testing pure calculation functions, not recordDomainAccess) +vi.mock("@/lib/db/repos/domains", () => ({ + updateLastAccessed: vi.fn(), +})); + describe("access-decay", () => { const now = new Date("2024-01-15T00:00:00Z"); const msPerDay = 24 * 60 * 60 * 1000; diff --git a/lib/access.ts b/lib/access.ts index 29ef7a08..4b02435b 100644 --- a/lib/access.ts +++ b/lib/access.ts @@ -1,5 +1,6 @@ import "server-only"; +import { after } from "next/server"; import { FAST_CHANGING_TIERS, REVALIDATE_MIN_CERTIFICATES, @@ -10,15 +11,28 @@ import { REVALIDATE_MIN_SEO, SLOW_CHANGING_TIERS, } from "@/lib/constants"; -import { ns, redis } from "@/lib/redis"; +import { updateLastAccessed } from "@/lib/db/repos/domains"; import type { Section } from "@/lib/schemas"; +/** + * Module-level map to track last write attempt per domain. + * Prevents excessive database writes by debouncing updates. + * Key: domain name, Value: timestamp of last write attempt + */ +const lastWriteAttempts = new Map(); + +/** + * Debounce window in milliseconds (5 minutes). + * Only write to DB if more than 5 minutes have passed since last attempt. + */ +const DEBOUNCE_MS = 5 * 60 * 1000; + /** * Record that a domain was accessed by a user (for decay calculation). - * Fire-and-forget pattern: does not throw on errors. + * Schedules the write to happen after the response is sent using Next.js after(). * - * Keys expire after 90 minutes (5400s) as a safety net, since the - * access-sync cron runs hourly and uses GETDEL to atomically read and delete. + * Uses module-level debouncing to limit writes to once per 5 minutes per domain. + * Writes directly to Postgres without intermediate Redis buffering. * * IMPORTANT: Only call this for real user requests, NOT for background * revalidation jobs (Inngest). Background jobs should not reset decay timers. @@ -26,16 +40,19 @@ import type { Section } from "@/lib/schemas"; * @param domain - The domain name that was accessed */ export function recordDomainAccess(domain: string): void { - // Fire-and-forget: intentionally not awaited to avoid blocking - // Errors are logged but don't break the service - const key = ns("access", "domain", domain); - const timestamp = Date.now(); - redis.set(key, timestamp, { ex: 5400 }).catch((err) => { - console.warn( - `[access] failed to record ${domain}`, - err instanceof Error ? err.message : String(err), - ); - }); + const now = Date.now(); + const lastAttempt = lastWriteAttempts.get(domain); + + // Debounce: skip if we recently attempted a write + if (lastAttempt && now - lastAttempt < DEBOUNCE_MS) { + return; + } + + // Record this attempt to prevent duplicate writes + lastWriteAttempts.set(domain, now); + + // Schedule DB write to happen after the response is sent + after(() => updateLastAccessed(domain)); } /** diff --git a/lib/db/repos/domains.ts b/lib/db/repos/domains.ts index 8d85f674..621941d9 100644 --- a/lib/db/repos/domains.ts +++ b/lib/db/repos/domains.ts @@ -1,6 +1,6 @@ import "server-only"; -import { eq, inArray, sql } from "drizzle-orm"; +import { eq, sql } from "drizzle-orm"; import { getDomainTld } from "rdapper"; import { db } from "@/lib/db/client"; import { domains } from "@/lib/db/schema"; @@ -69,30 +69,31 @@ export async function ensureDomainRecord(domain: string) { } /** - * Batch update lastAccessedAt timestamps for multiple domains. - * Used by the access sync cron to flush Redis data to Postgres. + * Update lastAccessedAt timestamp for a domain. + * Only updates if the domain hasn't been accessed in the last 5 minutes + * to reduce unnecessary writes. * - * @param updates - Array of domain names and their access timestamps + * Fire-and-forget: catches errors and logs warnings without throwing. + * + * @param name - The domain name to update */ -export async function batchUpdateLastAccessed( - updates: Array<{ name: string; accessedAt: Date }>, -): Promise { - if (updates.length === 0) return; - - // Use Drizzle's update with case-when pattern for batch updates - // This is more efficient than individual updates - const cases = updates.map( - (u) => - sql`WHEN ${domains.name} = ${u.name} THEN ${u.accessedAt.toISOString()}::timestamptz`, - ); +export async function updateLastAccessed(name: string): Promise { + try { + const fiveMinutesAgo = new Date(Date.now() - 5 * 60 * 1000); - const names = updates.map((u) => u.name); - - await db - .update(domains) - .set({ - lastAccessedAt: sql`CASE ${sql.join(cases, sql.raw(" "))} END`, - updatedAt: new Date(), - }) - .where(inArray(domains.name, names)); + await db + .update(domains) + .set({ + lastAccessedAt: new Date(), + updatedAt: new Date(), + }) + .where( + sql`${domains.name} = ${name} AND (${domains.lastAccessedAt} IS NULL OR ${domains.lastAccessedAt} < ${fiveMinutesAgo})`, + ); + } catch (err) { + console.warn( + `[access] failed to update lastAccessedAt for ${name}`, + err instanceof Error ? err.message : String(err), + ); + } } diff --git a/vercel.json b/vercel.json index 5fbfc96f..c0f8fa4c 100644 --- a/vercel.json +++ b/vercel.json @@ -8,10 +8,6 @@ } }, "crons": [ - { - "path": "/api/cron/access-sync", - "schedule": "0 * * * *" - }, { "path": "/api/cron/pricing-refresh", "schedule": "0 4 * * *" From f626108818c03c37a16394924ff232bcf5a197e4 Mon Sep 17 00:00:00 2001 From: Jake Jarvis Date: Sat, 15 Nov 2025 19:14:31 -0500 Subject: [PATCH 06/11] refactor: update caching logic to use 'after' for asynchronous operations - Replaced fire-and-forget pattern with 'after' for caching operations in cache.ts, pricing.ts, and registration.ts, ensuring that Redis writes do not block the main execution flow. - Improved error handling for Redis operations by maintaining the existing logging structure. - Enhanced code readability and maintainability by centralizing asynchronous cache updates. --- lib/cache.ts | 109 +++++++++++++++++--------------- server/services/pricing.ts | 39 +++++++----- server/services/registration.ts | 41 ++++++------ 3 files changed, 102 insertions(+), 87 deletions(-) diff --git a/lib/cache.ts b/lib/cache.ts index 683eeb04..aab07d62 100644 --- a/lib/cache.ts +++ b/lib/cache.ts @@ -1,3 +1,4 @@ +import { after } from "next/server"; import { redis } from "@/lib/redis"; type CachedAssetOptions> = { @@ -98,26 +99,28 @@ export async function getOrCreateCachedAsset>( try { const dbResult = await fetchFromDb(); if (dbResult) { - // Found in DB, cache it in Redis for next time (fire-and-forget) - const expiresAtMs = Date.now() + ttlSeconds * 1000; - redis - .set( - indexKey, - { - url: dbResult.url, - key: dbResult.key, - notFound: dbResult.notFound ?? undefined, - expiresAtMs, - }, - { ex: ttlSeconds }, - ) - .catch((err) => { - console.error( - "[cache] redis write from db failed", - { indexKey }, - err instanceof Error ? err : new Error(String(err)), - ); - }); + // Found in DB, cache it in Redis for next time + after(() => { + const expiresAtMs = Date.now() + ttlSeconds * 1000; + redis + .set( + indexKey, + { + url: dbResult.url, + key: dbResult.key, + notFound: dbResult.notFound ?? undefined, + expiresAtMs, + }, + { ex: ttlSeconds }, + ) + .catch((err) => { + console.error( + "[cache] redis write from db failed", + { indexKey }, + err instanceof Error ? err : new Error(String(err)), + ); + }); + }); console.debug(`[cache] db hit ${indexKey}`); return { url: dbResult.url }; @@ -136,41 +139,45 @@ export async function getOrCreateCachedAsset>( const produced = await produceAndUpload(); const expiresAtMs = Date.now() + ttlSeconds * 1000; - // 4) Persist to Postgres if callback provided (fire-and-forget) + // 4) Persist to Postgres if callback provided if (persistToDb) { - persistToDb({ - url: produced.url, - key: produced.key, - notFound: produced.notFound, - metrics: produced.metrics, - }).catch((err) => { - console.error( - "[cache] db persist error", - { indexKey }, - err instanceof Error ? err : new Error(String(err)), - ); - }); - } - - // 5) Cache in Redis for next time (fire-and-forget) - redis - .set( - indexKey, - { + after(() => { + persistToDb({ url: produced.url, key: produced.key, - notFound: produced.notFound ?? undefined, - expiresAtMs, - }, - { ex: ttlSeconds }, - ) - .catch((err) => { - console.error( - "[cache] cache write error", - { indexKey }, - err instanceof Error ? err : new Error(String(err)), - ); + notFound: produced.notFound, + metrics: produced.metrics, + }).catch((err) => { + console.error( + "[cache] db persist error", + { indexKey }, + err instanceof Error ? err : new Error(String(err)), + ); + }); }); + } + + // 5) Cache in Redis for next time + after(() => { + redis + .set( + indexKey, + { + url: produced.url, + key: produced.key, + notFound: produced.notFound ?? undefined, + expiresAtMs, + }, + { ex: ttlSeconds }, + ) + .catch((err) => { + console.error( + "[cache] cache write error", + { indexKey }, + err instanceof Error ? err : new Error(String(err)), + ); + }); + }); return { url: produced.url }; } catch (produceErr) { diff --git a/server/services/pricing.ts b/server/services/pricing.ts index f9c178db..0f0b9747 100644 --- a/server/services/pricing.ts +++ b/server/services/pricing.ts @@ -1,3 +1,4 @@ +import { after } from "next/server"; import { getDomainTld } from "rdapper"; import { ns, redis } from "@/lib/redis"; import type { Pricing } from "@/lib/schemas"; @@ -53,16 +54,18 @@ async function fetchProviderPricing( // Fetch fresh pricing try { const payload = await provider.fetchPricing(); - // Cache for next time (fire-and-forget) - redis - .set(provider.cacheKey, payload, { ex: provider.cacheTtlSeconds }) - .catch((err) => { - console.error( - `[pricing] cache write error ${provider.name}`, - { cacheKey: provider.cacheKey }, - err instanceof Error ? err : new Error(String(err)), - ); - }); + // Cache for next time + after(() => { + redis + .set(provider.cacheKey, payload, { ex: provider.cacheTtlSeconds }) + .catch((err) => { + console.error( + `[pricing] cache write error ${provider.name}`, + { cacheKey: provider.cacheKey }, + err instanceof Error ? err : new Error(String(err)), + ); + }); + }); console.info(`[pricing] fetch ok ${provider.name} (not cached)`); return payload; } catch (err) { @@ -70,13 +73,15 @@ async function fetchProviderPricing( `[pricing] fetch error ${provider.name}`, err instanceof Error ? err : new Error(String(err)), ); - // Short TTL negative cache (fire-and-forget) - redis.set(provider.cacheKey, null, { ex: 60 }).catch((cacheErr) => { - console.error( - `[pricing] negative cache write error ${provider.name}`, - { cacheKey: provider.cacheKey }, - cacheErr instanceof Error ? cacheErr : new Error(String(cacheErr)), - ); + // Short TTL negative cache + after(() => { + redis.set(provider.cacheKey, null, { ex: 60 }).catch((cacheErr) => { + console.error( + `[pricing] negative cache write error ${provider.name}`, + { cacheKey: provider.cacheKey }, + cacheErr instanceof Error ? cacheErr : new Error(String(cacheErr)), + ); + }); }); return null; } diff --git a/server/services/registration.ts b/server/services/registration.ts index 068e0d13..86cefcc9 100644 --- a/server/services/registration.ts +++ b/server/services/registration.ts @@ -1,4 +1,5 @@ import { eq } from "drizzle-orm"; +import { after } from "next/server"; import { getDomainTld, lookup } from "rdapper"; import { REDIS_TTL_REGISTERED, REDIS_TTL_UNREGISTERED } from "@/lib/constants"; import { db } from "@/lib/db/client"; @@ -138,19 +139,20 @@ export async function getRegistration(domain: string): Promise { }; // Update Redis fast-path cache to keep it hot for subsequent requests - // Fire-and-forget to avoid blocking the response on Redis latency const ttl = row.registration.isRegistered ? REDIS_TTL_REGISTERED : REDIS_TTL_UNREGISTERED; - setRegistrationStatusInCache( - registrable, - row.registration.isRegistered, - ttl, - ).catch((err) => { - console.warn( - `[registration] failed to warm Redis cache for ${registrable}:`, - err instanceof Error ? err : new Error(String(err)), - ); + after(() => { + setRegistrationStatusInCache( + registrable, + row.registration.isRegistered, + ttl, + ).catch((err) => { + console.warn( + `[registration] failed to warm Redis cache for ${registrable}:`, + err instanceof Error ? err : new Error(String(err)), + ); + }); }); // Schedule background revalidation using actual last access time @@ -219,18 +221,19 @@ export async function getRegistration(domain: string): Promise { } // Cache the registration status (true/false) in Redis for fast lookups - // Fire-and-forget to avoid blocking the response on Redis latency const ttl = record.isRegistered ? REDIS_TTL_REGISTERED : REDIS_TTL_UNREGISTERED; - setRegistrationStatusInCache(registrable, record.isRegistered, ttl).catch( - (err) => { - console.warn( - `[registration] failed to cache status for ${registrable}:`, - err instanceof Error ? err : new Error(String(err)), - ); - }, - ); + after(() => { + setRegistrationStatusInCache(registrable, record.isRegistered, ttl).catch( + (err) => { + console.warn( + `[registration] failed to cache status for ${registrable}:`, + err instanceof Error ? err : new Error(String(err)), + ); + }, + ); + }); // If unregistered, return response without persisting to Postgres if (!record.isRegistered) { From 50ba4a8e392c2064f3aea6082984b4c32ca2529c Mon Sep 17 00:00:00 2001 From: Jake Jarvis Date: Sat, 15 Nov 2025 19:25:47 -0500 Subject: [PATCH 07/11] refactor: implement 'after' for scheduling revalidation in various services - Replaced try-catch blocks with 'after' for scheduling revalidation in certificates, dns, headers, hosting, registration, and seo services, allowing for immediate execution of callbacks without blocking. - Improved error handling by ensuring that errors are logged similarly to the previous implementation. - Enhanced code readability and maintainability by centralizing asynchronous scheduling logic. --- lib/schedule.test.ts | 5 +++++ server/services/certificates.ts | 19 +++++++++-------- server/services/dns.ts | 37 +++++++++++++++++---------------- server/services/headers.test.ts | 5 +++++ server/services/headers.ts | 19 +++++++++-------- server/services/hosting.ts | 19 +++++++++-------- server/services/registration.ts | 36 ++++++++++++++++---------------- server/services/seo.ts | 19 +++++++++-------- vitest.setup.ts | 16 ++++++++++++++ 9 files changed, 103 insertions(+), 72 deletions(-) diff --git a/lib/schedule.test.ts b/lib/schedule.test.ts index 503ffc70..5a6ef54c 100644 --- a/lib/schedule.test.ts +++ b/lib/schedule.test.ts @@ -9,6 +9,11 @@ import { vi, } from "vitest"; +// Mock the domains repo to avoid DB dependency in these tests +vi.mock("@/lib/db/repos/domains", () => ({ + updateLastAccessed: vi.fn(), +})); + let scheduleRevalidation: typeof import("@/lib/schedule").scheduleRevalidation; let allSections: typeof import("@/lib/schedule").allSections; diff --git a/server/services/certificates.ts b/server/services/certificates.ts index 345daa0f..b36fd064 100644 --- a/server/services/certificates.ts +++ b/server/services/certificates.ts @@ -1,5 +1,6 @@ import tls from "node:tls"; import { eq } from "drizzle-orm"; +import { after } from "next/server"; import { db } from "@/lib/db/client"; import { replaceCertificates } from "@/lib/db/repos/certificates"; import { findDomainByName } from "@/lib/db/repos/domains"; @@ -166,20 +167,20 @@ export async function getCertificates(domain: string): Promise { expiresAt: nextDue, }); - try { + after(() => { const dueAtMs = nextDue.getTime(); - await scheduleRevalidation( + scheduleRevalidation( registrable, "certificates", dueAtMs, existingDomain.lastAccessedAt ?? null, - ); - } catch (err) { - console.warn( - `[certificates] schedule failed for ${registrable}`, - err instanceof Error ? err : new Error(String(err)), - ); - } + ).catch((err) => { + console.warn( + `[certificates] schedule failed for ${registrable}`, + err instanceof Error ? err : new Error(String(err)), + ); + }); + }); } console.info(`[certificates] ok ${registrable} chainLength=${out.length}`); diff --git a/server/services/dns.ts b/server/services/dns.ts index c4d84a16..8e2d051b 100644 --- a/server/services/dns.ts +++ b/server/services/dns.ts @@ -1,4 +1,5 @@ import { eq } from "drizzle-orm"; +import { after } from "next/server"; import { isCloudflareIp } from "@/lib/cloudflare"; import { USER_AGENT } from "@/lib/constants"; import { db } from "@/lib/db/client"; @@ -422,7 +423,7 @@ async function resolveAllInternal(domain: string): Promise { fetchedAt: now, recordsByType: recordsByTypeToPersist, }); - try { + after(() => { const times = Object.values(recordsByTypeToPersist) .flat() .map((r) => r.expiresAt?.getTime?.()) @@ -431,18 +432,18 @@ async function resolveAllInternal(domain: string): Promise { ); // Always schedule: use the soonest expiry if available, otherwise schedule immediately const soonest = times.length > 0 ? Math.min(...times) : Date.now(); - await scheduleRevalidation( + scheduleRevalidation( registrable, "dns", soonest, existingDomain.lastAccessedAt ?? null, - ); - } catch (err) { - console.warn( - `[dns] schedule failed partial ${registrable}`, - err instanceof Error ? err : new Error(String(err)), - ); - } + ).catch((err) => { + console.warn( + `[dns] schedule failed partial ${registrable}`, + err instanceof Error ? err : new Error(String(err)), + ); + }); + }); } // Merge cached fresh + newly fetched stale @@ -551,7 +552,7 @@ async function resolveAllInternal(domain: string): Promise { recordsByType: recordsByTypeToPersist, }); - try { + after(() => { const times = Object.values(recordsByTypeToPersist) .flat() .map((r) => r.expiresAt?.getTime?.()) @@ -559,18 +560,18 @@ async function resolveAllInternal(domain: string): Promise { (t): t is number => typeof t === "number" && Number.isFinite(t), ); const soonest = times.length > 0 ? Math.min(...times) : now.getTime(); - await scheduleRevalidation( + scheduleRevalidation( registrable, "dns", soonest, existingDomain.lastAccessedAt ?? null, - ); - } catch (err) { - console.warn( - `[dns] schedule failed full ${registrable}`, - err instanceof Error ? err : new Error(String(err)), - ); - } + ).catch((err) => { + console.warn( + `[dns] schedule failed full ${registrable}`, + err instanceof Error ? err : new Error(String(err)), + ); + }); + }); } console.info( `[dns] ok ${registrable} counts=${JSON.stringify(counts)} resolver=${resolverUsed} durations=${JSON.stringify(durationByProvider)}`, diff --git a/server/services/headers.test.ts b/server/services/headers.test.ts index 3161c3b5..d1dd4f10 100644 --- a/server/services/headers.test.ts +++ b/server/services/headers.test.ts @@ -18,6 +18,11 @@ vi.mock("@/lib/domain-server", async () => { }; }); +// Mock scheduleRevalidation to avoid Inngest API calls in tests +vi.mock("@/lib/schedule", () => ({ + scheduleRevalidation: vi.fn().mockResolvedValue(true), +})); + import { afterEach, beforeAll, diff --git a/server/services/headers.ts b/server/services/headers.ts index 53be88e0..8546dc9e 100644 --- a/server/services/headers.ts +++ b/server/services/headers.ts @@ -1,5 +1,6 @@ import { getStatusCode } from "@readme/http-status-codes"; import { eq } from "drizzle-orm"; +import { after } from "next/server"; import { cache } from "react"; import { IMPORTANT_HEADERS } from "@/lib/constants/headers"; import { db } from "@/lib/db/client"; @@ -96,19 +97,19 @@ export const probeHeaders = cache(async function probeHeaders( expiresAt, }); - try { - await scheduleRevalidation( + after(() => { + scheduleRevalidation( registrable, "headers", dueAtMs, existingDomain.lastAccessedAt ?? null, - ); - } catch (err) { - console.warn( - `[headers] schedule failed for ${registrable}`, - err instanceof Error ? err : new Error(String(err)), - ); - } + ).catch((err) => { + console.warn( + `[headers] schedule failed for ${registrable}`, + err instanceof Error ? err : new Error(String(err)), + ); + }); + }); } console.info( `[headers] ok ${registrable} status=${final.status} count=${normalized.length}`, diff --git a/server/services/hosting.ts b/server/services/hosting.ts index d6c8c73c..6a623658 100644 --- a/server/services/hosting.ts +++ b/server/services/hosting.ts @@ -1,5 +1,6 @@ import { eq } from "drizzle-orm"; import { alias } from "drizzle-orm/pg-core"; +import { after } from "next/server"; import { db } from "@/lib/db/client"; import { findDomainByName } from "@/lib/db/repos/domains"; import { upsertHosting } from "@/lib/db/repos/hosting"; @@ -256,19 +257,19 @@ export async function detectHosting(domain: string): Promise { expiresAt, }); - try { - await scheduleRevalidation( + after(() => { + scheduleRevalidation( registrable, "hosting", dueAtMs, existingDomain.lastAccessedAt ?? null, - ); - } catch (err) { - console.warn( - `[hosting] schedule failed for ${registrable}`, - err instanceof Error ? err : new Error(String(err)), - ); - } + ).catch((err) => { + console.warn( + `[hosting] schedule failed for ${registrable}`, + err instanceof Error ? err : new Error(String(err)), + ); + }); + }); } console.info( `[hosting] ok ${registrable} hosting=${hostingName} email=${emailName} dns=${dnsName}`, diff --git a/server/services/registration.ts b/server/services/registration.ts index 86cefcc9..56012f91 100644 --- a/server/services/registration.ts +++ b/server/services/registration.ts @@ -156,19 +156,19 @@ export async function getRegistration(domain: string): Promise { }); // Schedule background revalidation using actual last access time - try { - await scheduleRevalidation( + after(() => { + scheduleRevalidation( registrable, "registration", row.registration.expiresAt.getTime(), row.domainLastAccessedAt ?? null, - ); - } catch (err) { - console.warn( - `[registration] schedule failed for ${registrable}`, - err instanceof Error ? err : new Error(String(err)), - ); - } + ).catch((err) => { + console.warn( + `[registration] schedule failed for ${registrable}`, + err instanceof Error ? err : new Error(String(err)), + ); + }); + }); console.info( `[registration] ok cached ${registrable} registered=${row.registration.isRegistered} registrar=${registrarProvider.name}`, @@ -349,19 +349,19 @@ export async function getRegistration(domain: string): Promise { }); // Schedule background revalidation - try { - await scheduleRevalidation( + after(() => { + scheduleRevalidation( registrable, "registration", expiresAt.getTime(), domainRecord.lastAccessedAt ?? null, - ); - } catch (err) { - console.warn( - `[registration] schedule failed for ${registrable}`, - err instanceof Error ? err : new Error(String(err)), - ); - } + ).catch((err) => { + console.warn( + `[registration] schedule failed for ${registrable}`, + err instanceof Error ? err : new Error(String(err)), + ); + }); + }); console.info( `[registration] ok ${registrable} registered=${record.isRegistered} registrar=${withProvider.registrarProvider.name}`, diff --git a/server/services/seo.ts b/server/services/seo.ts index 5e30e855..023f37ca 100644 --- a/server/services/seo.ts +++ b/server/services/seo.ts @@ -1,4 +1,5 @@ import { eq } from "drizzle-orm"; +import { after } from "next/server"; import { USER_AGENT } from "@/lib/constants"; import { db } from "@/lib/db/client"; import { findDomainByName } from "@/lib/db/repos/domains"; @@ -236,19 +237,19 @@ export async function getSeo(domain: string): Promise { expiresAt, }); - try { - await scheduleRevalidation( + after(() => { + scheduleRevalidation( registrable, "seo", dueAtMs, existingDomain.lastAccessedAt ?? null, - ); - } catch (err) { - console.warn( - `[seo] schedule failed for ${registrable}`, - err instanceof Error ? err : new Error(String(err)), - ); - } + ).catch((err) => { + console.warn( + `[seo] schedule failed for ${registrable}`, + err instanceof Error ? err : new Error(String(err)), + ); + }); + }); } console.info( diff --git a/vitest.setup.ts b/vitest.setup.ts index 711bcab9..70467292 100644 --- a/vitest.setup.ts +++ b/vitest.setup.ts @@ -22,6 +22,22 @@ vi.mock("@/lib/analytics/client", () => ({ // Make server-only a no-op so we can import server modules in tests vi.mock("server-only", () => ({})); +// Mock Next.js after() to execute callbacks immediately in tests +// In production, after() schedules work after the response is sent +vi.mock("next/server", async () => { + const actual = + await vi.importActual("next/server"); + return { + ...actual, + after: (callback: () => void | Promise) => { + // Execute immediately in tests (no request context needed) + Promise.resolve(callback()).catch(() => { + // Swallow errors like production after() does + }); + }, + }; +}); + // Mock ResizeObserver for jsdom environment global.ResizeObserver = class ResizeObserver { observe = vi.fn(); From 6f355d560e05ca84c0b43ebff7d0d7877461f392 Mon Sep 17 00:00:00 2001 From: Jake Jarvis Date: Sat, 15 Nov 2025 19:27:24 -0500 Subject: [PATCH 08/11] refactor: implement debounce logic for domain lastAccessedAt updates --- lib/db/repos/domains.ts | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/lib/db/repos/domains.ts b/lib/db/repos/domains.ts index 621941d9..7db2b549 100644 --- a/lib/db/repos/domains.ts +++ b/lib/db/repos/domains.ts @@ -5,6 +5,13 @@ import { getDomainTld } from "rdapper"; import { db } from "@/lib/db/client"; import { domains } from "@/lib/db/schema"; +/** + * Debounce interval for updating domain lastAccessedAt timestamp. + * Prevents excessive writes by only updating if the last access was + * more than this many milliseconds ago. + */ +const DOMAIN_UPDATE_DEBOUNCE_MS = 5 * 60 * 1000; // 5 minutes + export type UpsertDomainParams = { name: string; // punycode lowercased tld: string; @@ -79,16 +86,15 @@ export async function ensureDomainRecord(domain: string) { */ export async function updateLastAccessed(name: string): Promise { try { - const fiveMinutesAgo = new Date(Date.now() - 5 * 60 * 1000); + const debounceThreshold = new Date(Date.now() - DOMAIN_UPDATE_DEBOUNCE_MS); await db .update(domains) .set({ lastAccessedAt: new Date(), - updatedAt: new Date(), }) .where( - sql`${domains.name} = ${name} AND (${domains.lastAccessedAt} IS NULL OR ${domains.lastAccessedAt} < ${fiveMinutesAgo})`, + sql`${domains.name} = ${name} AND (${domains.lastAccessedAt} IS NULL OR ${domains.lastAccessedAt} < ${debounceThreshold})`, ); } catch (err) { console.warn( From 326d4489420fa75b235a3fae5add6a6961635c36 Mon Sep 17 00:00:00 2001 From: Jake Jarvis Date: Sat, 15 Nov 2025 19:30:17 -0500 Subject: [PATCH 09/11] fix: make ensureDomainRecord throw an error if the domain lacks a valid TLD --- lib/db/repos/domains.ts | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/lib/db/repos/domains.ts b/lib/db/repos/domains.ts index 7db2b549..59e06dfd 100644 --- a/lib/db/repos/domains.ts +++ b/lib/db/repos/domains.ts @@ -57,9 +57,14 @@ export async function findDomainByName(name: string) { * * @param domain - The domain name (should already be normalized/registrable) * @returns The domain record with its ID + * @throws {Error} If the domain has no valid TLD */ export async function ensureDomainRecord(domain: string) { - const tld = getDomainTld(domain) ?? ""; + const tld = getDomainTld(domain); + + if (!tld) { + throw new Error(`Cannot persist domain "${domain}": unable to extract TLD`); + } // For unicode handling, we'd need to use toUnicode from node:url or a library, // but for now we'll use the ASCII version as the unicode name if they match From 07907f59450ddff096894a6392b26b79f2460e8c Mon Sep 17 00:00:00 2001 From: Jake Jarvis Date: Sat, 15 Nov 2025 19:32:00 -0500 Subject: [PATCH 10/11] refactor: update caching logic to handle definitive results and improve error handling - Made the produceAndUpload function optional in CachedAssetOptions, throwing an error if not provided during asset generation. - Enhanced cache hit logic to only treat results as definitive if the URL is present or marked as permanently not found, aligning with Redis L1 semantics. - Improved error handling for Redis write operations, ensuring consistent logging for failures. --- lib/cache.ts | 71 +++++++++++++++++++++++++++++++++------------------- 1 file changed, 45 insertions(+), 26 deletions(-) diff --git a/lib/cache.ts b/lib/cache.ts index aab07d62..38746c49 100644 --- a/lib/cache.ts +++ b/lib/cache.ts @@ -12,9 +12,10 @@ type CachedAssetOptions> = { */ ttlSeconds?: number; /** - * Produce and upload the asset, returning { url, key } and any metrics to attach + * Optional: Produce and upload the asset, returning { url, key } and any metrics to attach. + * If omitted and both caches miss, a clear error will be thrown. */ - produceAndUpload: () => Promise<{ + produceAndUpload?: () => Promise<{ url: string | null; key?: string; notFound?: boolean; // true if asset permanently doesn't exist (don't retry) @@ -99,31 +100,43 @@ export async function getOrCreateCachedAsset>( try { const dbResult = await fetchFromDb(); if (dbResult) { - // Found in DB, cache it in Redis for next time - after(() => { - const expiresAtMs = Date.now() + ttlSeconds * 1000; - redis - .set( - indexKey, - { - url: dbResult.url, - key: dbResult.key, - notFound: dbResult.notFound ?? undefined, - expiresAtMs, - }, - { ex: ttlSeconds }, - ) - .catch((err) => { - console.error( - "[cache] redis write from db failed", - { indexKey }, - err instanceof Error ? err : new Error(String(err)), - ); - }); - }); + // Only treat as cache hit if we have a definitive result: + // - url is present (string), OR + // - url is null but marked as permanently not found + // This mirrors Redis L1 semantics and prevents "not yet generated" + // rows from being treated as final results. + const isDefinitiveResult = + dbResult.url !== null || dbResult.notFound === true; + + if (isDefinitiveResult) { + // Found in DB, cache it in Redis for next time + after(() => { + const expiresAtMs = Date.now() + ttlSeconds * 1000; + redis + .set( + indexKey, + { + url: dbResult.url, + key: dbResult.key, + notFound: dbResult.notFound ?? undefined, + expiresAtMs, + }, + { ex: ttlSeconds }, + ) + .catch((err) => { + console.error( + "[cache] redis write from db failed", + { indexKey }, + err instanceof Error ? err : new Error(String(err)), + ); + }); + }); - console.debug(`[cache] db hit ${indexKey}`); - return { url: dbResult.url }; + console.debug(`[cache] db hit ${indexKey}`); + return { url: dbResult.url }; + } + // else: url is null but not marked as permanent notFound + // → treat as miss, fall through to generation } } catch (err) { // DB failures should not break the flow; log and fall through to generation @@ -135,6 +148,12 @@ export async function getOrCreateCachedAsset>( } // 3) Generate asset (both caches missed or failed) + if (!produceAndUpload) { + throw new Error( + `[cache] Cannot generate asset for ${indexKey}: produceAndUpload callback not provided`, + ); + } + try { const produced = await produceAndUpload(); const expiresAtMs = Date.now() + ttlSeconds * 1000; From b6244b969d830a2f97ad44f962419d8ed7bc7441 Mon Sep 17 00:00:00 2001 From: Jake Jarvis Date: Sat, 15 Nov 2025 19:37:47 -0500 Subject: [PATCH 11/11] refactor: improve query logic for updating lastAccessedAt in domains - Enhanced the updateLastAccessed function to utilize logical operators for better readability and maintainability. - Replaced raw SQL conditions with Drizzle ORM's query builder methods for improved clarity and type safety. --- lib/db/repos/domains.ts | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/lib/db/repos/domains.ts b/lib/db/repos/domains.ts index 59e06dfd..d52a9821 100644 --- a/lib/db/repos/domains.ts +++ b/lib/db/repos/domains.ts @@ -1,6 +1,6 @@ import "server-only"; -import { eq, sql } from "drizzle-orm"; +import { and, eq, isNull, lt, or } from "drizzle-orm"; import { getDomainTld } from "rdapper"; import { db } from "@/lib/db/client"; import { domains } from "@/lib/db/schema"; @@ -99,7 +99,13 @@ export async function updateLastAccessed(name: string): Promise { lastAccessedAt: new Date(), }) .where( - sql`${domains.name} = ${name} AND (${domains.lastAccessedAt} IS NULL OR ${domains.lastAccessedAt} < ${debounceThreshold})`, + and( + eq(domains.name, name), + or( + isNull(domains.lastAccessedAt), + lt(domains.lastAccessedAt, debounceThreshold), + ), + ), ); } catch (err) { console.warn(