From cc5c653dd1d3359b2a62f39b11d1581e67d2d12b Mon Sep 17 00:00:00 2001 From: Tim Hostetler <6970899+thostetler@users.noreply.github.com> Date: Tue, 16 Dec 2025 15:22:16 -0500 Subject: [PATCH 1/2] SCIX-752 Fix verify middleware crash and implement cookie synchronization Fixed critical bug where users clicking email verification links crashed due to missing session token checks. Implemented proper Sidecar Session pattern for seamless cross-domain session sharing between legacy BBB and Next.js apps. - Added defensive null checks in verifyMiddleware to handle missing session.token - Implemented cookie synchronization with domain stripping and SameSite=Lax per spec - Fixed hash encoding to use proper hex format instead of comma-separated decimals - Fixed apiCookieHash presence check to match empty string return value - Migrated middleware to use edge-compatible error handler - Added comprehensive documentation for Sidecar Session architecture Note: Hash encoding fix will invalidate existing sessions requiring users to re-authenticate. --- src/middleware.ts | 2 +- src/middlewares/initSession.ts | 95 +++++++++++++++++++++++++---- src/middlewares/verifyMiddleware.ts | 11 +++- 3 files changed, 94 insertions(+), 14 deletions(-) diff --git a/src/middleware.ts b/src/middleware.ts index ac0794337..879f2714b 100644 --- a/src/middleware.ts +++ b/src/middleware.ts @@ -7,7 +7,7 @@ import { edgeLogger } from '@/logger'; import { NextRequest, NextResponse } from 'next/server'; import { rateLimit } from '@/rateLimit'; import { isLegacySearchURL, legacySearchURLMiddleware } from '@/middlewares/legacySearchURLMiddleware'; -import { ErrorSource, handleError } from '@/lib/errorHandler'; +import { ErrorSource, handleError } from '@/lib/errorHandler.edge'; const log = edgeLogger.child({}, { msgPrefix: '[middleware] ' }); diff --git a/src/middlewares/initSession.ts b/src/middlewares/initSession.ts index fbb823eab..4bf2937ff 100644 --- a/src/middlewares/initSession.ts +++ b/src/middlewares/initSession.ts @@ -115,8 +115,9 @@ const bootstrap = async (cookie?: string) => { }; /** - * Hashes a string using SHA-1 - * @param str + * Hashes a string using SHA-1 and returns hex-encoded string + * @param str - String to hash + * @returns Hex-encoded SHA-1 hash, or empty string if input is empty/error */ const hash = async (str?: string) => { if (!str) { @@ -124,7 +125,9 @@ const hash = async (str?: string) => { } try { const buffer = await globalThis.crypto.subtle.digest('SHA-1', Buffer.from(str, 'utf-8')); - return Array.from(new Uint8Array(buffer)).toString(); + return Array.from(new Uint8Array(buffer)) + .map((b) => b.toString(16).padStart(2, '0')) + .join(''); } catch (err) { handleMiddlewareError(err, { context: { operation: 'hash' }, @@ -134,10 +137,27 @@ const hash = async (str?: string) => { }; /** - * Middleware to initialize the session - * @param req - * @param res - * @param session + * Middleware to initialize the session using the Sidecar Session pattern + * + * The Sidecar Session pattern enables session interoperability between different domains: + * - Driver (Source of Truth): Flask Cookie (ads_session) - cryptographically signed by backend + * - Sidecar (Cache): iron-session in Next.js - holds decrypted user data for fast access + * - The Link: Hash of the Flask cookie (apiCookieHash) - used to detect cookie changes + * + * Flow: + * 1. Check: Compare hash of incoming Flask cookie against cached hash + * 2. Sync: + * - Match: Use cached data (0ms latency) + * - Mismatch/Missing: Call API bootstrap to get fresh token and update cache + * + * Cookie Security: + * - Domain: Stripped (defaults to host-only for better security) + * - SameSite: Forced to 'lax' (allows cross-site navigation while preventing CSRF) + * - Secure/HttpOnly: Preserved from API + * + * @param req - The incoming request + * @param res - The response to modify + * @param session - The iron-session to update */ export const initSession = async (req: NextRequest, res: NextResponse, session: IronSession) => { log.debug({ session }, 'Initializing session'); @@ -151,7 +171,7 @@ export const initSession = async (req: NextRequest, res: NextResponse, session: const isUserIdentifiedAsBot = session.bot && isValidToken(session.token); const hasRefreshTokenHeader = req.headers.has('x-refresh-token'); const isTokenValid = isValidToken(session.token); - const isApiCookieHashPresent = apiCookieHash !== null; + const isApiCookieHashPresent = apiCookieHash !== ''; const isApiCookieHashMatching = apiCookieHash === session.apiCookieHash; const isValidSession = @@ -169,17 +189,68 @@ export const initSession = async (req: NextRequest, res: NextResponse, session: await botCheck(req, res); // bootstrap a new token, passing in the current session cookie value - const { token, headers } = (await bootstrap(adsSessionCookie)) ?? {}; + const bootstrapResult = await bootstrap(adsSessionCookie); + + if (!bootstrapResult) { + log.error({ + msg: 'Bootstrap failed, session will remain invalid', + hasIncomingCookie: !!adsSessionCookie, + }); + return res; + } + + const { token, headers } = bootstrapResult; // validate token, update session, forward cookies if (isValidToken(token)) { log.debug('Refreshed token is valid'); session.token = token; session.isAuthenticated = isAuthenticated(token); - const sessionCookieValue = setCookie.parse(headers.get('set-cookie') ?? '')[0].value; - res.cookies.set(process.env.ADS_SESSION_COOKIE_NAME, sessionCookieValue); - session.apiCookieHash = await hash(res.cookies.get(process.env.ADS_SESSION_COOKIE_NAME)?.value); + + // Parse the Set-Cookie header from the API + const setCookieHeader = headers.get('set-cookie'); + if (setCookieHeader) { + const parsedCookies = setCookie.parse(setCookieHeader); + const apiCookie = parsedCookies[0]; + + if (apiCookie) { + // Only update if the cookie value actually changed (prevents race conditions) + const currentCookieValue = adsSessionCookie; + const newCookieValue = apiCookie.value; + + if (currentCookieValue !== newCookieValue) { + log.debug({ + msg: 'Cookie value changed, synchronizing', + cookieChanged: true, + }); + + // Sanitize cookie attributes according to the Sidecar Session pattern + res.cookies.set(process.env.ADS_SESSION_COOKIE_NAME, newCookieValue, { + httpOnly: apiCookie.httpOnly ?? true, + secure: apiCookie.secure ?? process.env.NODE_ENV === 'production', + sameSite: apiCookie.sameSite === 'none' ? 'none' : 'lax', + path: apiCookie.path ?? '/', + maxAge: apiCookie.maxAge, + }); + + session.apiCookieHash = await hash(newCookieValue); + } else { + log.debug({ + msg: 'Cookie value unchanged, skipping sync', + cookieChanged: false, + }); + session.apiCookieHash = apiCookieHash; + } + } + } + await session.save(); log.debug('Saved to session'); + } else { + log.error({ + msg: 'Bootstrap returned invalid token', + hasToken: !!token, + tokenData: token ? { username: token.username, anonymous: token.anonymous } : null, + }); } }; diff --git a/src/middlewares/verifyMiddleware.ts b/src/middlewares/verifyMiddleware.ts index 39ccc3678..9bbf9558e 100644 --- a/src/middlewares/verifyMiddleware.ts +++ b/src/middlewares/verifyMiddleware.ts @@ -4,7 +4,7 @@ import { sessionConfig } from '@/config'; import { edgeLogger } from '@/logger'; import { ApiTargets } from '@/api/models'; import { IVerifyAccountResponse } from '@/api/user/types'; -import { createErrorHandler, ErrorSource } from '@/lib/errorHandler.edge'; +import { createErrorHandler, ErrorSource, ErrorSeverity } from '@/lib/errorHandler.edge'; const log = edgeLogger.child({}, { msgPrefix: '[verifyMiddleware] ' }); const handleMiddlewareError = createErrorHandler({ @@ -40,8 +40,17 @@ export const verifyMiddleware = async (req: NextRequest, res: NextResponse) => { log.debug({ msg: 'Verifying token', route, + hasToken: !!session.token, }); + if (!session.token?.access_token) { + handleMiddlewareError(new Error('No access token available for verification'), { + context: { route, hasSession: !!session, hasToken: !!session.token }, + severity: ErrorSeverity.ERROR, + }); + return redirect(newUrl, req, 'verify-account-failed'); + } + try { const url = `${process.env.API_HOST_SERVER}${ApiTargets.VERIFY}/${token}`; const headers = new Headers({ From fd6cc7fde335adcb7ad2e2544f231f9427238113 Mon Sep 17 00:00:00 2001 From: Tim Hostetler <6970899+thostetler@users.noreply.github.com> Date: Tue, 16 Dec 2025 15:53:37 -0500 Subject: [PATCH 2/2] add some middleware docs --- docs/authentication-and-middleware.md | 106 ++++++++++++++++++++++++++ 1 file changed, 106 insertions(+) create mode 100644 docs/authentication-and-middleware.md diff --git a/docs/authentication-and-middleware.md b/docs/authentication-and-middleware.md new file mode 100644 index 000000000..f80b06468 --- /dev/null +++ b/docs/authentication-and-middleware.md @@ -0,0 +1,106 @@ +# Middleware and Session Management Architecture + +## 1. System Architecture and Data Flow + +This document details the middleware, authentication, verification, and session management workflows for the Next.js application. The architecture is designed to securely manage user sessions by synchronizing with a remote, authoritative authentication system. + +The core of the architecture is a "Sidecar Session" pattern. This pattern uses Next.js middleware to intercept incoming requests and maintain a local, encrypted session cache (`scix_session`) that is kept in sync with an opaque, master session token (`session` cookie) issued by a separate backend service. + +The general data flow is as follows: + +1. **Request Interception:** All relevant incoming requests are first processed by the main Next.js middleware (`src/middleware.ts`). +2. **Session Validation:** The `initSession` middleware function is called. It compares a fingerprint of the master `session` cookie against a fingerprint stored in the local `scix_session` cookie. +3. **Fast Path (Cache Hit):** If the fingerprints match, the local session is considered valid. The request is passed to the application without any external calls. This is the default, high-performance path. +4. **Slow Path (Cache Miss):** If the fingerprints do not match (or if the local session is missing), the middleware initiates a synchronization process. +5. **Session Synchronization:** The middleware makes a server-to-server API call to the `/v1/user/bootstrap` endpoint, forwarding the master `session` cookie for validation. +6. **Update Local Session:** The `bootstrap` endpoint returns the authoritative user data. The middleware updates the local `scix_session` cookie with this new data and a new fingerprint of the master `session` cookie. +7. **Application Logic:** The request, now guaranteed to have a valid session, is passed to the Next.js application (pages, API routes, etc.). + +This architecture provides a balance of performance (by caching session data locally) and security (by relying on a single, authoritative source of truth for authentication). + +## 2. Core Technologies + +- **Next.js Middleware:** The entry point for all session management logic, defined in `src/middleware.ts`. +- **iron-session:** A library for creating stateless, encrypted session data stored in a cookie. It powers the local `scix_session`. +- **Web Crypto API (`SubtleCrypto`):** Used to generate SHA-1 hashes (fingerprints) of the master session cookie for fast, reliable comparisons. + +## 3. Detailed Authentication Workflow + +The primary logic is contained within the `initSession` middleware (`src/middlewares/initSession.ts`). + +1. **Cookie Inspection:** The middleware checks for the presence of the `session` cookie on the incoming request. If absent, the user is unauthenticated, and the workflow terminates. +2. **Fingerprint Generation:** A SHA-1 hash of the `session` cookie's value is computed. +3. **Local Session Verification (Fast Path):** + - The encrypted `scix_session` data is loaded. + - The newly computed fingerprint is compared to the fingerprint stored within `scix_session`. + - If the fingerprints match and the session's internal access token is not expired, the session is valid. The request proceeds to the application. +4. **Remote Session Synchronization (Slow Path):** + - Triggered if fingerprints mismatch, the `scix_session` is absent, or the token is expired. + - A `fetch` call is made to the `/v1/user/bootstrap` API endpoint. The `session` cookie is passed in the `Cookie` header of this request. +5. **Payload and Header Processing:** + - The `bootstrap` API validates the `session` cookie and responds with: + - A JSON payload (`IBootstrapPayload`) containing the authoritative user data (`IUserData`). + - A `Set-Cookie` header for a renewed `session` cookie. + - The middleware intercepts this API response. +6. **Local Session Update:** The `IUserData` and the new `session` fingerprint are saved into the encrypted `scix_session`. +7. **Cookie Header Manipulation:** The `Set-Cookie` header from the API response is modified before being sent to the browser: + - The `Domain` attribute is stripped to make it a host-only cookie. + - The `SameSite` attribute is explicitly set to `Lax`. +8. **Completion:** The request proceeds with a fully validated and hydrated session. + +## 4. Middleware Chain Breakdown + +The main middleware file (`src/middleware.ts`) orchestrates several smaller, single-purpose middleware functions. + +### `initSession` (`src/middlewares/initSession.ts`) + +The core of the authentication system. It ensures every request has a valid, synchronized user session by implementing the detailed workflow described above. + +### `legacyAppDetectionMiddleware` (`src/middlewares/legacyAppDetection.ts`) + +A UX enhancement middleware that inspects the `Referer` header. +- **Function:** If a user navigates from the legacy application's domain, it sets a temporary `legacyAppReferrer = true` flag in the `scix_session`. +- **Purpose:** This flag is consumed during Server-Side Rendering (`src/ssr-utils.ts`) to adjust the application's default state for a seamless user experience. The flag is cleared after use. + +## 5. Cookie Strategy + +Two primary cookies are used to manage authentication. + +| Cookie Name | Stored Data | Issuer | `SameSite` Policy | Purpose | +| ----------------- | -------------------------------------------- | --------------- | --------------------- | -------------------------------------------------- | +| `session` | Opaque, master authentication token | Backend API | `Lax` (set by middleware) | The authoritative source of truth for authentication. | +| `scix_session` | Encrypted `IUserData` and `session` fingerprint | Next.js Middleware | `Strict` (by config) | A performant, local cache of the user session. | + +The `SameSite` policies are intentionally different: +- `scix_session` is `Strict` for maximum security against CSRF attacks, as it only needs to be present for requests originating from the application itself. +- `session` is set to `Lax` to ensure it is sent during top-level navigation from the legacy domain to the new application, which is a primary user journey. + +## 6. Session Data Structure (`IUserData`) + +The `scix_session` cookie contains an `iron-session` encrypted JSON object based on the `IUserData` interface (`src/api/user/types.ts`). This structure includes: + +- User ID +- Email +- Access and Refresh Tokens +- Roles and Permissions +- Other non-sensitive user profile information + +## 7. Server-Side Rendering (SSR) Integration + +Session data is integrated into the Next.js SSR process to ensure pages are rendered with correct user information server-side, preventing client-side content flashing. + +**File:** `src/ssr-utils.ts` + +The `updateUserStateSSR` helper function is used within `getServerSideProps`. It reads the user data from the `req.session` object (populated by the middleware) and injects it as props into the page component, making the user state available before the page is sent to the client. + +## 8. Design Rationale and Context + +The "Sidecar Session" pattern was chosen to solve a specific technical challenge: enabling a gradual "Strangler Fig" migration from a legacy application on one top-level domain (`.harvard.edu`) to a new Next.js application on another (`.org`). + +Standard browser security measures prevent cookies from being shared across different top-level domains. This means the new application cannot directly read or validate the session cookie set by the legacy system. + +This architecture bridges that gap by: +1. Treating the legacy `session` cookie as an opaque token. +2. Using a backend API (`/bootstrap`) as a trusted "verifier" that can validate the legacy cookie. +3. Maintaining a local session cache (`scix_session`) in the Next.js application to avoid calling the verifier on every single request, thereby ensuring high performance. +4. Carefully manipulating the legacy cookie's `Domain` and `SameSite` attributes in the middleware to make cross-domain navigation seamless for the end-user.