diff --git a/Directory.Packages.props b/Directory.Packages.props index 566dca579..6d43beffa 100644 --- a/Directory.Packages.props +++ b/Directory.Packages.props @@ -23,6 +23,7 @@ + diff --git a/src/api/Elastic.Documentation.Api.Core/TelemetryConstants.cs b/src/api/Elastic.Documentation.Api.Core/TelemetryConstants.cs index 6a8d2683c..e6d8d4bd8 100644 --- a/src/api/Elastic.Documentation.Api.Core/TelemetryConstants.cs +++ b/src/api/Elastic.Documentation.Api.Core/TelemetryConstants.cs @@ -31,4 +31,10 @@ public static class TelemetryConstants /// Used to trace frontend telemetry proxying. /// public const string OtlpProxySourceName = "Elastic.Documentation.Api.OtlpProxy"; + + /// + /// ActivitySource name for distributed cache operations. + /// Used to trace cache hits, misses, and performance. + /// + public const string CacheSourceName = "Elastic.Documentation.Api.Cache"; } diff --git a/src/api/Elastic.Documentation.Api.Infrastructure/Aws/IParameterProvider.cs b/src/api/Elastic.Documentation.Api.Infrastructure/Aws/IParameterProvider.cs index c35dac454..b911b856d 100644 --- a/src/api/Elastic.Documentation.Api.Infrastructure/Aws/IParameterProvider.cs +++ b/src/api/Elastic.Documentation.Api.Infrastructure/Aws/IParameterProvider.cs @@ -4,6 +4,10 @@ namespace Elastic.Documentation.Api.Infrastructure.Aws; +/// +/// Abstraction for retrieving configuration parameters. +/// Infrastructure concern: Used by other Infrastructure adapters to get configuration. +/// public interface IParameterProvider { Task GetParam(string name, bool withDecryption = true, Cancel ctx = default); diff --git a/src/api/Elastic.Documentation.Api.Infrastructure/Caching/CacheKey.cs b/src/api/Elastic.Documentation.Api.Infrastructure/Caching/CacheKey.cs new file mode 100644 index 000000000..67e565463 --- /dev/null +++ b/src/api/Elastic.Documentation.Api.Infrastructure/Caching/CacheKey.cs @@ -0,0 +1,45 @@ +// Licensed to Elasticsearch B.V under one or more agreements. +// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information + +using System.Security.Cryptography; +using System.Text; + +namespace Elastic.Documentation.Api.Infrastructure.Caching; + +/// +/// Represents a cache key with automatic hashing of sensitive identifiers. +/// Prevents exposing sensitive data in cache keys (CodeQL security requirement). +/// +public sealed class CacheKey +{ + /// + /// Gets the hashed key string for use in cache operations. + /// + public string Value { get; } + + private CacheKey(string category, string identifier) + { + // Hash the identifier to prevent exposing sensitive data (CodeQL security requirement) + var bytes = Encoding.UTF8.GetBytes(identifier); + var hash = SHA256.HashData(bytes); + var hashBase64 = Convert.ToBase64String(hash); + // Use base64url encoding for cache key (URL-safe) + var hashBase64Url = hashBase64.Replace('+', '-').Replace('/', '_').TrimEnd('='); + Value = $"{category}:{hashBase64Url}"; + } + + /// + /// Creates a cache key from a category and identifier. + /// The identifier is automatically hashed to prevent exposing sensitive data. + /// + /// Cache category (e.g., "idtoken", "search") + /// Identifier that may contain sensitive data (will be hashed) + /// A CacheKey instance with the hashed key + public static CacheKey Create(string category, string identifier) => new(category, identifier); + + /// + /// Implicit conversion to string for convenience. + /// + public static implicit operator string(CacheKey key) => key.Value; +} diff --git a/src/api/Elastic.Documentation.Api.Infrastructure/Caching/DynamoDbDistributedCache.cs b/src/api/Elastic.Documentation.Api.Infrastructure/Caching/DynamoDbDistributedCache.cs new file mode 100644 index 000000000..3b6f70791 --- /dev/null +++ b/src/api/Elastic.Documentation.Api.Infrastructure/Caching/DynamoDbDistributedCache.cs @@ -0,0 +1,153 @@ +// Licensed to Elasticsearch B.V under one or more agreements. +// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information + +using System.Diagnostics; +using System.Globalization; +using Amazon.DynamoDBv2; +using Amazon.DynamoDBv2.Model; +using Elastic.Documentation.Api.Core; +using Microsoft.Extensions.Logging; + +namespace Elastic.Documentation.Api.Infrastructure.Caching; + +/// +/// DynamoDB implementation of for Lambda environments. +/// Provides distributed caching across all Lambda containers using DynamoDB as backing store. +/// Clean Code: Constructor injection (Dependency Inversion), small focused methods. +/// +public sealed class DynamoDbDistributedCache(IAmazonDynamoDB dynamoDb, string tableName, ILogger logger) : IDistributedCache +{ + private static readonly ActivitySource ActivitySource = new(TelemetryConstants.CacheSourceName); + + private readonly IAmazonDynamoDB _dynamoDb = dynamoDb; + private readonly string _tableName = tableName; + private readonly ILogger _logger = logger; + + // DynamoDB attribute names + private const string AttributeCacheKey = "CacheKey"; + private const string AttributeValue = "Value"; + private const string AttributeTtl = "TTL"; + + public async Task GetAsync(CacheKey key, Cancel ct = default) + { + var hashedKey = key.Value; + using var activity = ActivitySource.StartActivity("get cache", ActivityKind.Client); + _ = (activity?.SetTag("cache.key", hashedKey)); + _ = (activity?.SetTag("cache.table", _tableName)); + _ = (activity?.SetTag("cache.backend", "dynamodb")); + + try + { + var response = await _dynamoDb.GetItemAsync(new GetItemRequest + { + TableName = _tableName, + Key = new Dictionary + { + [AttributeCacheKey] = new AttributeValue { S = hashedKey } + } + }, ct); + + if (!response.IsItemSet) + { + _ = (activity?.SetTag("cache.hit", false)); + _logger.LogDebug("Cache miss for key: {CacheKey}", hashedKey); + return null; + } + + // DynamoDB TTL handles expiration automatically + // Items may still be returned briefly after expiration until DynamoDB deletes them + var value = response.Item.TryGetValue(AttributeValue, out var valueAttr) + ? valueAttr.S + : null; + + _ = (activity?.SetTag("cache.hit", value != null)); + if (value != null) + { + _logger.LogDebug("Cache hit for key: {CacheKey}", hashedKey); + } + + return value; + } + catch (ResourceNotFoundException ex) + { + // Table doesn't exist - return null gracefully + // Infrastructure should create table, but don't fail hard in dev + _ = (activity?.SetTag("cache.error", "table_not_found")); + _logger.LogWarning(ex, "DynamoDB table {TableName} not found. Cache operations will fail gracefully.", _tableName); + return null; + } + catch (ProvisionedThroughputExceededException ex) + { + _ = (activity?.SetTag("cache.error", "provisioned_throughput_exceeded")); + _logger.LogWarning(ex, "Provisioned throughput exceeded for DynamoDB cache table {TableName}.", _tableName); + return null; + } + catch (InternalServerErrorException ex) + { + _ = (activity?.SetTag("cache.error", "internal_server_error")); + _logger.LogError(ex, "Internal server error retrieving cache key {CacheKey} from DynamoDB", hashedKey); + return null; + } + catch (Exception ex) when (ex is not OperationCanceledException and not TaskCanceledException) + { + _ = (activity?.SetStatus(ActivityStatusCode.Error, ex.Message)); + _logger.LogError(ex, "Error retrieving cache key {CacheKey} from DynamoDB", hashedKey); + return null; // Fail gracefully + } + // Allow cancellation exceptions to propagate to respect request lifetimes + } + + public async Task SetAsync(CacheKey key, string value, TimeSpan ttl, Cancel ct = default) + { + var hashedKey = key.Value; + using var activity = ActivitySource.StartActivity("set cache", ActivityKind.Client); + _ = (activity?.SetTag("cache.key", hashedKey)); + _ = (activity?.SetTag("cache.table", _tableName)); + _ = (activity?.SetTag("cache.backend", "dynamodb")); + _ = (activity?.SetTag("cache.ttl", ttl.TotalSeconds)); + + try + { + var expiresAt = DateTimeOffset.UtcNow.Add(ttl); + var ttlTimestamp = expiresAt.ToUnixTimeSeconds(); + + _ = await _dynamoDb.PutItemAsync(new PutItemRequest + { + TableName = _tableName, + Item = new Dictionary + { + [AttributeCacheKey] = new AttributeValue { S = hashedKey }, + [AttributeValue] = new AttributeValue { S = value }, + [AttributeTtl] = new AttributeValue { N = ttlTimestamp.ToString(CultureInfo.InvariantCulture) } + } + }, ct); + + _logger.LogDebug("Cache set for key: {CacheKey}, TTL: {TTL}s", hashedKey, ttl.TotalSeconds); + } + catch (ResourceNotFoundException ex) + { + // Table doesn't exist - fail silently in dev, log in production + // Infrastructure should create table before deployment + _ = (activity?.SetTag("cache.error", "table_not_found")); + _logger.LogWarning(ex, "DynamoDB table {TableName} not found. Unable to cache key {CacheKey}.", _tableName, hashedKey); + } + catch (ProvisionedThroughputExceededException ex) + { + _ = (activity?.SetTag("cache.error", "provisioned_throughput_exceeded")); + _logger.LogWarning(ex, "Provisioned throughput exceeded for DynamoDB cache table {TableName}. Unable to cache key {CacheKey}.", _tableName, hashedKey); + } + catch (InternalServerErrorException ex) + { + _ = (activity?.SetTag("cache.error", "internal_server_error")); + _logger.LogError(ex, "Internal server error setting cache key {CacheKey} in DynamoDB", hashedKey); + } + catch (Exception ex) when (ex is not OperationCanceledException and not TaskCanceledException) + { + // Allow cancellation exceptions to propagate to respect request lifetimes + _ = activity?.SetStatus(ActivityStatusCode.Error, ex.Message); + _logger.LogError(ex, "Error setting cache key {CacheKey} in DynamoDB", hashedKey); + // Fail gracefully - don't throw + } + } +} diff --git a/src/api/Elastic.Documentation.Api.Infrastructure/Caching/IDistributedCache.cs b/src/api/Elastic.Documentation.Api.Infrastructure/Caching/IDistributedCache.cs new file mode 100644 index 000000000..de875cb36 --- /dev/null +++ b/src/api/Elastic.Documentation.Api.Infrastructure/Caching/IDistributedCache.cs @@ -0,0 +1,38 @@ +// Licensed to Elasticsearch B.V under one or more agreements. +// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information + +namespace Elastic.Documentation.Api.Infrastructure.Caching; + +/// +/// Abstraction for distributed caching across Lambda invocations. +/// Infrastructure concern: Used by other Infrastructure adapters for caching. +/// +/// +/// +/// Cache keys should be created using to automatically hash +/// sensitive identifiers and prevent exposing sensitive data in cache keys (CodeQL security requirement). +/// +/// +/// Key format: {category}:{hashed-identifier} (e.g., "idtoken:{hash}" where hash is SHA256 of the identifier) +/// +/// +public interface IDistributedCache +{ + /// + /// Retrieves a cached value by key. + /// + /// Cache key created using (format: {category}:{hashed-identifier}) + /// Cancellation token + /// Cached value as string, or null if not found or expired + Task GetAsync(CacheKey key, Cancel ct = default); + + /// + /// Stores a value in the cache with a time-to-live. + /// + /// Cache key created using (format: {category}:{hashed-identifier}) + /// Value to cache (typically JSON-serialized data) + /// Time-to-live duration + /// Cancellation token + Task SetAsync(CacheKey key, string value, TimeSpan ttl, Cancel ct = default); +} diff --git a/src/api/Elastic.Documentation.Api.Infrastructure/Caching/InMemoryDistributedCache.cs b/src/api/Elastic.Documentation.Api.Infrastructure/Caching/InMemoryDistributedCache.cs new file mode 100644 index 000000000..9733ff574 --- /dev/null +++ b/src/api/Elastic.Documentation.Api.Infrastructure/Caching/InMemoryDistributedCache.cs @@ -0,0 +1,57 @@ +// Licensed to Elasticsearch B.V under one or more agreements. +// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information + +using System.Collections.Concurrent; + +namespace Elastic.Documentation.Api.Infrastructure.Caching; + +/// +/// In-memory implementation of for local development. +/// Uses ConcurrentDictionary for thread-safe storage with TTL-based expiration. +/// Clean Code: Sealed class (not meant for inheritance), single responsibility. +/// +public sealed class InMemoryDistributedCache : IDistributedCache +{ + private readonly ConcurrentDictionary _cache = new(); + + /// + /// Immutable cache entry with value and expiration timestamp. + /// Clean Code: Record type ensures immutability. + /// + private sealed record CacheEntry(string Value, DateTimeOffset ExpiresAt); + + public Task GetAsync(CacheKey key, Cancel ct = default) + { + var hashedKey = key.Value; + if (_cache.TryGetValue(hashedKey, out var entry)) + { + if (IsExpired(entry)) + { + // Remove expired entry + _ = _cache.TryRemove(hashedKey, out _); + return Task.FromResult(null); + } + + return Task.FromResult(entry.Value); + } + + return Task.FromResult(null); + } + + public Task SetAsync(CacheKey key, string value, TimeSpan ttl, Cancel ct = default) + { + var hashedKey = key.Value; + var expiresAt = DateTimeOffset.UtcNow.Add(ttl); + var entry = new CacheEntry(value, expiresAt); + _ = _cache.AddOrUpdate(hashedKey, entry, (_, _) => entry); + return Task.CompletedTask; + } + + /// + /// Checks if a cache entry has expired. + /// Clean Code: Single-purpose helper method with intention-revealing name. + /// + private static bool IsExpired(CacheEntry entry) => + entry.ExpiresAt <= DateTimeOffset.UtcNow; +} diff --git a/src/api/Elastic.Documentation.Api.Infrastructure/Caching/MultiLayerCache.cs b/src/api/Elastic.Documentation.Api.Infrastructure/Caching/MultiLayerCache.cs new file mode 100644 index 000000000..273dd1edb --- /dev/null +++ b/src/api/Elastic.Documentation.Api.Infrastructure/Caching/MultiLayerCache.cs @@ -0,0 +1,131 @@ +// Licensed to Elasticsearch B.V under one or more agreements. +// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information + +using System.Collections.Concurrent; +using System.Diagnostics; +using Elastic.Documentation.Api.Core; +using Microsoft.Extensions.Logging; + +namespace Elastic.Documentation.Api.Infrastructure.Caching; + +/// +/// Multi-layer cache decorator implementing L1 (in-memory) + L2 (distributed) caching strategy. +/// Optimizes Lambda warm-start performance while maintaining cross-container cache sharing. +/// Design Pattern: Decorator - Adds L1 layer to existing cache implementation. +/// Caching Strategy: Cache-aside (read-through) and write-through patterns. +/// +public sealed class MultiLayerCache(IDistributedCache l2Cache, ILogger logger) : IDistributedCache +{ + private static readonly ActivitySource ActivitySource = new(TelemetryConstants.CacheSourceName); + + // L1 Cache: Static in-memory cache survives across Lambda warm starts + // Thread-safe for concurrent requests within same Lambda container + private static readonly ConcurrentDictionary L1Cache = new(); + + // L2 Cache: DynamoDB for cross-container sharing + private readonly IDistributedCache _l2Cache = l2Cache; + private readonly ILogger _logger = logger; + + /// + /// Immutable L1 cache entry with value and expiration timestamp. + /// Clean Code: Record type ensures immutability, intention-revealing name. + /// + private sealed record L1CacheEntry(string Value, DateTimeOffset ExpiresAt); + + public async Task GetAsync(CacheKey key, Cancel ct = default) + { + var hashedKey = key.Value; + using var activity = ActivitySource.StartActivity("get cache", ActivityKind.Internal); + _ = (activity?.SetTag("cache.key", hashedKey)); + _ = (activity?.SetTag("cache.backend", "multilayer")); + + // L1: Check in-memory cache first (fastest) + if (TryGetFromL1(hashedKey, out var value)) + { + _ = (activity?.SetTag("cache.l1.hit", true)); + _ = (activity?.SetTag("cache.l2.hit", false)); + _logger.LogDebug("L1 cache hit for key: {CacheKey}", hashedKey); + return value; + } + + _ = (activity?.SetTag("cache.l1.hit", false)); + + // L2: Check distributed cache (DynamoDB) + var l2Value = await _l2Cache.GetAsync(key, ct); + + if (l2Value != null) + { + _ = (activity?.SetTag("cache.l2.hit", true)); + _logger.LogDebug("L2 cache hit for key: {CacheKey}, populating L1", hashedKey); + + // Populate L1 cache for future requests in this container + // Use a reasonable TTL for L1 (1 hour) to match ID token lifetime + PopulateL1(hashedKey, l2Value, TimeSpan.FromHours(1)); + } + else + { + _ = (activity?.SetTag("cache.l2.hit", false)); + _logger.LogDebug("Cache miss (L1 and L2) for key: {CacheKey}", hashedKey); + } + + return l2Value; + } + + public async Task SetAsync(CacheKey key, string value, TimeSpan ttl, Cancel ct = default) + { + var hashedKey = key.Value; + using var activity = ActivitySource.StartActivity("set cache", ActivityKind.Internal); + _ = (activity?.SetTag("cache.key", hashedKey)); + _ = (activity?.SetTag("cache.backend", "multilayer")); + _ = (activity?.SetTag("cache.ttl", ttl.TotalSeconds)); + + // Write-through: Update both L1 and L2 + PopulateL1(hashedKey, value, ttl); + _logger.LogDebug("Writing to L1 and L2 cache for key: {CacheKey}, TTL: {TTL}s", hashedKey, ttl.TotalSeconds); + + await _l2Cache.SetAsync(key, value, ttl, ct); + } + + /// + /// Attempts to retrieve value from L1 cache. + /// Clean Code: Single Responsibility - only handles L1 retrieval logic. + /// + private static bool TryGetFromL1(string key, out string? value) + { + if (L1Cache.TryGetValue(key, out var entry)) + { + if (IsExpired(entry)) + { + // Remove expired entry from L1 + _ = L1Cache.TryRemove(key, out _); + value = null; + return false; + } + + value = entry.Value; + return true; + } + + value = null; + return false; + } + + /// + /// Populates L1 cache with value and expiration. + /// Clean Code: Single Responsibility - only handles L1 population logic. + /// + private static void PopulateL1(string key, string value, TimeSpan ttl) + { + var expiresAt = DateTimeOffset.UtcNow.Add(ttl); + var entry = new L1CacheEntry(value, expiresAt); + _ = L1Cache.AddOrUpdate(key, entry, (_, _) => entry); + } + + /// + /// Checks if L1 cache entry has expired. + /// Clean Code: Single-purpose helper with intention-revealing name. + /// + private static bool IsExpired(L1CacheEntry entry) => + entry.ExpiresAt <= DateTimeOffset.UtcNow; +} diff --git a/src/api/Elastic.Documentation.Api.Infrastructure/Elastic.Documentation.Api.Infrastructure.csproj b/src/api/Elastic.Documentation.Api.Infrastructure/Elastic.Documentation.Api.Infrastructure.csproj index 7724ea935..58e3c4880 100644 --- a/src/api/Elastic.Documentation.Api.Infrastructure/Elastic.Documentation.Api.Infrastructure.csproj +++ b/src/api/Elastic.Documentation.Api.Infrastructure/Elastic.Documentation.Api.Infrastructure.csproj @@ -17,6 +17,7 @@ + diff --git a/src/api/Elastic.Documentation.Api.Infrastructure/Gcp/GcpIdTokenProvider.cs b/src/api/Elastic.Documentation.Api.Infrastructure/Gcp/GcpIdTokenProvider.cs index e6df30897..bb731d8f8 100644 --- a/src/api/Elastic.Documentation.Api.Infrastructure/Gcp/GcpIdTokenProvider.cs +++ b/src/api/Elastic.Documentation.Api.Infrastructure/Gcp/GcpIdTokenProvider.cs @@ -2,29 +2,36 @@ // Elasticsearch B.V licenses this file to you under the Apache 2.0 License. // See the LICENSE file in the project root for more information -using System.Collections.Concurrent; using System.Security.Cryptography; using System.Text; using System.Text.Json; using System.Text.Json.Serialization; +using Elastic.Documentation.Api.Infrastructure.Caching; namespace Elastic.Documentation.Api.Infrastructure.Gcp; // This is a custom implementation to create an ID token for GCP. // Because Google.Api.Auth.OAuth2 is not compatible with AOT -public class GcpIdTokenProvider(HttpClient httpClient) : IGcpIdTokenProvider +// Clean Architecture: Depends on IDistributedCache abstraction from Infrastructure layer +public class GcpIdTokenProvider(IHttpClientFactory httpClientFactory, IDistributedCache cache) : IGcpIdTokenProvider { - // Cache tokens by target audience to avoid regenerating them on every request - private static readonly ConcurrentDictionary TokenCache = new(); - - private sealed record CachedToken(string Token, DateTimeOffset ExpiresAt); + private readonly IHttpClientFactory _httpClientFactory = httpClientFactory; + private readonly IDistributedCache _cache = cache; public async Task GenerateIdTokenAsync(string serviceAccount, string targetAudience, Cancel cancellationToken = default) { - // Check if we have a valid cached token - if (TokenCache.TryGetValue(targetAudience, out var cachedToken) && - cachedToken.ExpiresAt > DateTimeOffset.UtcNow.AddMinutes(1)) // Refresh 1 minute before expiry - return cachedToken.Token; + // Check distributed cache first (works across all Lambda containers) + // CacheKey automatically hashes the identifier to prevent exposing sensitive data + // DynamoDB ExpiresAt attribute handles expiration checking + var cacheKey = CacheKey.Create("idtoken", targetAudience); + var cachedToken = await _cache.GetAsync(cacheKey, cancellationToken); + + if (cachedToken != null) + { + // Cache implementation (DynamoDbDistributedCache) already checked ExpiresAt + // If we get here, the token is still valid + return cachedToken; + } // Read and parse service account key file using System.Text.Json source generation (AOT compatible) var serviceAccountJson = JsonSerializer.Deserialize(serviceAccount, GcpJsonContext.Default.ServiceAccountKey); @@ -72,10 +79,12 @@ public async Task GenerateIdTokenAsync(string serviceAccount, string tar // Exchange JWT for ID token var idToken = await ExchangeJwtForIdToken(jwt, targetAudience, cancellationToken); - var expiresAt = expirationTime.Subtract(TimeSpan.FromMinutes(1)); - _ = TokenCache.AddOrUpdate(targetAudience, - new CachedToken(idToken, expiresAt), - (_, _) => new CachedToken(idToken, expiresAt)); + // Cache the token in distributed cache (shared across all Lambda containers) + // Use 15-minute buffer for maximum safety against clock skew and edge cases + // DynamoDB will use ExpiresAt attribute for expiration checking and TTL for cleanup + var cacheExpiry = expirationTime.Subtract(TimeSpan.FromMinutes(15)); + var cacheTtl = cacheExpiry - now; + await _cache.SetAsync(cacheKey, idToken, cacheTtl, cancellationToken); return idToken; } @@ -89,6 +98,7 @@ private async Task ExchangeJwtForIdToken(string jwt, string targetAudien new KeyValuePair("target_audience", targetAudience) ]); + var httpClient = _httpClientFactory.CreateClient(); var response = await httpClient.PostAsync("https://oauth2.googleapis.com/token", requestContent, cancellationToken); _ = response.EnsureSuccessStatusCode(); @@ -107,6 +117,7 @@ private static string Base64UrlEncode(byte[] input) // Convert base64 to base64url encoding return base64.Replace('+', '-').Replace('/', '_').TrimEnd('='); } + } internal readonly record struct ServiceAccountKey( diff --git a/src/api/Elastic.Documentation.Api.Infrastructure/OpenTelemetry/OpenTelemetryExtensions.cs b/src/api/Elastic.Documentation.Api.Infrastructure/OpenTelemetry/OpenTelemetryExtensions.cs index d6572a751..26af4ee00 100644 --- a/src/api/Elastic.Documentation.Api.Infrastructure/OpenTelemetry/OpenTelemetryExtensions.cs +++ b/src/api/Elastic.Documentation.Api.Infrastructure/OpenTelemetry/OpenTelemetryExtensions.cs @@ -36,6 +36,7 @@ public static TracerProviderBuilder AddDocsApiTracing(this TracerProviderBuilder .AddSource(TelemetryConstants.AskAiSourceName) .AddSource(TelemetryConstants.StreamTransformerSourceName) .AddSource(TelemetryConstants.OtlpProxySourceName) + .AddSource(TelemetryConstants.CacheSourceName) .AddAspNetCoreInstrumentation(aspNetCoreOptions => { // Don't trace root API endpoint (health check) diff --git a/src/api/Elastic.Documentation.Api.Infrastructure/ServicesExtension.cs b/src/api/Elastic.Documentation.Api.Infrastructure/ServicesExtension.cs index dea5311e0..88ebaba3d 100644 --- a/src/api/Elastic.Documentation.Api.Infrastructure/ServicesExtension.cs +++ b/src/api/Elastic.Documentation.Api.Infrastructure/ServicesExtension.cs @@ -3,6 +3,7 @@ // See the LICENSE file in the project root for more information using System.ComponentModel.DataAnnotations; +using Amazon.DynamoDBv2; using Elastic.Documentation.Api.Core; using Elastic.Documentation.Api.Core.AskAi; using Elastic.Documentation.Api.Core.Search; @@ -11,6 +12,7 @@ using Elastic.Documentation.Api.Infrastructure.Adapters.Search; using Elastic.Documentation.Api.Infrastructure.Adapters.Telemetry; using Elastic.Documentation.Api.Infrastructure.Aws; +using Elastic.Documentation.Api.Infrastructure.Caching; using Elastic.Documentation.Api.Infrastructure.Gcp; using Microsoft.Extensions.Configuration; using Microsoft.Extensions.DependencyInjection; @@ -71,6 +73,7 @@ private static void AddElasticDocsApiUsecases(this IServiceCollection services, }); // Register AppEnvironment as a singleton for dependency injection _ = services.AddSingleton(new AppEnvironment { Current = appEnv }); + AddDistributedCache(services, appEnv); AddParameterProvider(services, appEnv); AddAskAiUsecase(services, appEnv); AddSearchUsecase(services, appEnv); @@ -121,6 +124,58 @@ private static void AddParameterProvider(IServiceCollection services, AppEnv app } } + private static void AddDistributedCache(IServiceCollection services, AppEnv appEnv) + { + var logger = GetLogger(services); + + switch (appEnv) + { + case AppEnv.Dev: + { + logger?.LogInformation("Configuring InMemoryDistributedCache for environment {AppEnvironment}", appEnv); + _ = services.AddSingleton(); + logger?.LogInformation("InMemoryDistributedCache registered for local development"); + break; + } + case AppEnv.Prod: + case AppEnv.Staging: + case AppEnv.Edge: + { + logger?.LogInformation("Configuring DynamoDB distributed cache for environment {AppEnvironment}", appEnv); + try + { + // Register AWS DynamoDB client + _ = services.AddSingleton(); + logger?.LogInformation("AmazonDynamoDB client registered"); + + // Register multi-layer cache (L1: in-memory + L2: DynamoDB) + _ = services.AddSingleton(sp => + { + var dynamoDb = sp.GetRequiredService(); + var tableName = $"docs-api-cache-{appEnv.ToStringFast()}"; + var dynamoLogger = sp.GetRequiredService>(); + var multiLogger = sp.GetRequiredService>(); + + var dynamoCache = new DynamoDbDistributedCache(dynamoDb, tableName, dynamoLogger); + var multiLayerCache = new MultiLayerCache(dynamoCache, multiLogger); + logger?.LogInformation("Multi-layer cache registered with DynamoDB table: {TableName}", tableName); + return multiLayerCache; + }); + } + catch (Exception ex) + { + logger?.LogError(ex, "Failed to configure distributed cache for environment {AppEnvironment}", appEnv); + throw; + } + break; + } + default: + { + throw new ArgumentOutOfRangeException(nameof(appEnv), appEnv, "Unsupported environment for distributed cache."); + } + } + } + private static void AddAskAiUsecase(IServiceCollection services, AppEnv appEnv) { var logger = GetLogger(services); @@ -128,8 +183,10 @@ private static void AddAskAiUsecase(IServiceCollection services, AppEnv appEnv) try { + // Register GcpIdTokenProvider with distributed cache dependency + // Clean: Let DI handle everything automatically _ = services.AddSingleton(); - logger?.LogInformation("GcpIdTokenProvider registered successfully"); + logger?.LogInformation("GcpIdTokenProvider registered with distributed cache support"); _ = services.AddScoped(); logger?.LogInformation("LlmGatewayOptions registered successfully"); diff --git a/tests/Elastic.Documentation.Api.Infrastructure.Tests/Caching/DistributedCacheTests.cs b/tests/Elastic.Documentation.Api.Infrastructure.Tests/Caching/DistributedCacheTests.cs new file mode 100644 index 000000000..a980e1b63 --- /dev/null +++ b/tests/Elastic.Documentation.Api.Infrastructure.Tests/Caching/DistributedCacheTests.cs @@ -0,0 +1,323 @@ +// Licensed to Elasticsearch B.V under one or more agreements. +// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information + +using System.Globalization; +using System.Text.Json; +using Amazon.DynamoDBv2; +using Amazon.DynamoDBv2.Model; +using Elastic.Documentation.Api.Infrastructure.Caching; +using Elastic.Documentation.Api.Infrastructure.Gcp; +using FakeItEasy; +using FluentAssertions; +using Microsoft.Extensions.Logging.Abstractions; + +namespace Elastic.Documentation.Api.Infrastructure.Tests.Caching; + +public class InMemoryDistributedCacheTests +{ + private readonly InMemoryDistributedCache _cache = new(); + + [Fact] + public async Task GetAsyncWhenKeyDoesNotExistReturnsNull() + { + // Arrange + var key = CacheKey.Create("test", "nonexistent-key"); + + // Act + var result = await _cache.GetAsync(key, TestContext.Current.CancellationToken); + + // Assert + result.Should().BeNull(); + } + + [Fact] + public async Task SetAndGetWhenKeyIsSetReturnsValue() + { + // Arrange + var key = CacheKey.Create("test", "test-key"); + const string value = "test-value"; + + // Act + await _cache.SetAsync(key, value, TimeSpan.FromMinutes(1), TestContext.Current.CancellationToken); + var result = await _cache.GetAsync(key, TestContext.Current.CancellationToken); + + // Assert + result.Should().Be(value); + } + + [Fact] + public async Task GetAsyncWhenEntryExpiredReturnsNull() + { + // Arrange + var key = CacheKey.Create("test", "expiring-key"); + + // Act + await _cache.SetAsync(key, "value", TimeSpan.FromMilliseconds(10), TestContext.Current.CancellationToken); + await Task.Delay(50, TestContext.Current.CancellationToken); + var result = await _cache.GetAsync(key, TestContext.Current.CancellationToken); + + // Assert + result.Should().BeNull("expired entries should be removed"); + } + + [Fact] + public async Task SetAsyncOverwritesExistingValue() + { + // Arrange + var key = CacheKey.Create("test", "key"); + + // Act + await _cache.SetAsync(key, "first", TimeSpan.FromMinutes(1), TestContext.Current.CancellationToken); + await _cache.SetAsync(key, "second", TimeSpan.FromMinutes(1), TestContext.Current.CancellationToken); + var result = await _cache.GetAsync(key, TestContext.Current.CancellationToken); + + // Assert + result.Should().Be("second"); + } +} + +public class MultiLayerCacheTests +{ + [Fact] + public async Task GetAsyncWhenL1HitDoesNotCallL2Again() + { + // Arrange + var fakeL2 = A.Fake(); + var cache = new MultiLayerCache(fakeL2, NullLogger.Instance); + var uniqueKey = CacheKey.Create("test", $"test-key-{Guid.NewGuid()}"); // Use unique key to avoid L1 cache pollution from other tests + + // Pre-populate L1 by setting a value + await cache.SetAsync(uniqueKey, "value", TimeSpan.FromMinutes(1), TestContext.Current.CancellationToken); + + // Act - Second get should hit L1 + var result1 = await cache.GetAsync(uniqueKey, TestContext.Current.CancellationToken); + var result2 = await cache.GetAsync(uniqueKey, TestContext.Current.CancellationToken); + + // Assert + result1.Should().Be("value"); + result2.Should().Be("value"); + // L2 should only be called once (during SetAsync), not on subsequent Gets + A.CallTo(() => fakeL2.GetAsync(A._, A._)) + .MustNotHaveHappened(); + } + + [Fact] + public async Task GetAsyncWhenL1MissCallsL2AndPopulatesL1() + { + // Arrange + var fakeL2 = A.Fake(); + var uniqueKey = CacheKey.Create("test", $"test-key-{Guid.NewGuid()}"); // Use unique key to avoid L1 cache pollution from other tests + A.CallTo(() => fakeL2.GetAsync(uniqueKey, A._)) + .Returns("l2-value"); + + var cache = new MultiLayerCache(fakeL2, NullLogger.Instance); + + // Act - First call misses L1, hits L2 + var result1 = await cache.GetAsync(uniqueKey, TestContext.Current.CancellationToken); + // Second call should hit L1 (populated from previous call) + var result2 = await cache.GetAsync(uniqueKey, TestContext.Current.CancellationToken); + + // Assert + result1.Should().Be("l2-value"); + result2.Should().Be("l2-value"); + A.CallTo(() => fakeL2.GetAsync(uniqueKey, A._)) + .MustHaveHappenedOnceExactly(); + } + + [Fact] + public async Task SetAsyncWritesToBothL1AndL2() + { + // Arrange + var fakeL2 = A.Fake(); + var cache = new MultiLayerCache(fakeL2, NullLogger.Instance); + var uniqueKey = CacheKey.Create("test", $"test-key-{Guid.NewGuid()}"); // Use unique key to avoid L1 cache pollution from other tests + + // Act + await cache.SetAsync(uniqueKey, "value", TimeSpan.FromMinutes(1), TestContext.Current.CancellationToken); + + // Get from cache (should hit L1) + var result = await cache.GetAsync(uniqueKey, TestContext.Current.CancellationToken); + + // Assert + result.Should().Be("value", "L1 should have the value"); + A.CallTo(() => fakeL2.SetAsync(uniqueKey, "value", TimeSpan.FromMinutes(1), A._)) + .MustHaveHappenedOnceExactly(); + } + + [Fact] + public async Task GetAsyncWhenBothCachesMissReturnsNull() + { + // Arrange + var fakeL2 = A.Fake(); + A.CallTo(() => fakeL2.GetAsync(A._, A._)) + .Returns((string?)null); + + var cache = new MultiLayerCache(fakeL2, NullLogger.Instance); + var key = CacheKey.Create("test", "missing-key"); + + // Act + var result = await cache.GetAsync(key, TestContext.Current.CancellationToken); + + // Assert + result.Should().BeNull(); + } +} + +public class DynamoDbDistributedCacheTests +{ + [Fact] + public async Task GetAsyncWhenItemExistsReturnsValue() + { + // Arrange + var fakeDynamoDb = A.Fake(); + var key = CacheKey.Create("test", "test-key"); + // Note: We no longer use ExpiresAt - DynamoDB TTL handles expiration automatically + + var response = new GetItemResponse + { + Item = new Dictionary + { + ["CacheKey"] = new AttributeValue { S = key.Value }, + ["Value"] = new AttributeValue { S = "test-value" }, + ["TTL"] = new AttributeValue { N = DateTimeOffset.UtcNow.AddMinutes(30).ToUnixTimeSeconds().ToString(CultureInfo.InvariantCulture) } + }, + IsItemSet = true + }; + + A.CallTo(() => fakeDynamoDb.GetItemAsync(A._, A._)) + .Returns(response); + + var cache = new DynamoDbDistributedCache(fakeDynamoDb, "test-table", NullLogger.Instance); + + // Act + var result = await cache.GetAsync(key, TestContext.Current.CancellationToken); + + // Assert + result.Should().Be("test-value"); + } + + [Fact] + public async Task GetAsyncWhenItemDoesNotExistReturnsNull() + { + // Arrange + var fakeDynamoDb = A.Fake(); + var key = CacheKey.Create("test", "missing-key"); + var response = new GetItemResponse { IsItemSet = false }; + + A.CallTo(() => fakeDynamoDb.GetItemAsync(A._, A._)) + .Returns(response); + + var cache = new DynamoDbDistributedCache(fakeDynamoDb, "test-table", NullLogger.Instance); + + // Act + var result = await cache.GetAsync(key, TestContext.Current.CancellationToken); + + // Assert + result.Should().BeNull(); + } + + [Fact] + public async Task SetAsyncCallsDynamoDbPutItem() + { + // Arrange + var fakeDynamoDb = A.Fake(); + var cache = new DynamoDbDistributedCache(fakeDynamoDb, "test-table", NullLogger.Instance); + var key = CacheKey.Create("test", "key"); + + // Act + await cache.SetAsync(key, "value", TimeSpan.FromMinutes(30), TestContext.Current.CancellationToken); + + // Assert + A.CallTo(() => fakeDynamoDb.PutItemAsync( + A.That.Matches(r => + r.TableName == "test-table" && + r.Item["CacheKey"].S == key.Value && + r.Item["Value"].S == "value" + ), + A._ + )).MustHaveHappenedOnceExactly(); + } + + [Fact] + public async Task GetAsyncWhenTableNotFoundReturnsNullGracefully() + { + // Arrange + var fakeDynamoDb = A.Fake(); + var key = CacheKey.Create("test", "key"); + var exception = new ResourceNotFoundException("Table not found"); + A.CallTo(() => fakeDynamoDb.GetItemAsync(A._, A._)) + .Throws(exception); + + var cache = new DynamoDbDistributedCache(fakeDynamoDb, "missing-table", NullLogger.Instance); + + // Act + var result = await cache.GetAsync(key, TestContext.Current.CancellationToken); + + // Assert + result.Should().BeNull("should handle missing table gracefully"); + } +} + +public class GcpIdTokenProviderCachingIntegrationTests +{ + [Fact] + public async Task GenerateIdTokenAsyncUsesCachedTokenWhenValid() + { + // Arrange + // Use HttpMessageHandler directly with method name matching to verify ExchangeJwtForIdToken was not called + // See: https://fakeiteasy.github.io/docs/8.1.0/Recipes/faking-http-client/ + var fakeHandler = A.Fake(); + using var httpClient = new HttpClient(fakeHandler); + var fakeHttpClientFactory = A.Fake(); + A.CallTo(() => fakeHttpClientFactory.CreateClient(A._)) + .Returns(httpClient); + + var cache = new InMemoryDistributedCache(); + + var provider = new GcpIdTokenProvider(fakeHttpClientFactory, cache); + const string targetAudience = "https://test-audience.googleapis.com"; + + // Pre-populate cache with valid token (TTL of 45 minutes - matches cache expiry logic) + const string cachedToken = "fake-cached-token"; + var cacheKey = CacheKey.Create("idtoken", targetAudience); + await cache.SetAsync(cacheKey, cachedToken, TimeSpan.FromMinutes(45), TestContext.Current.CancellationToken); + + // Act + var result = await provider.GenerateIdTokenAsync("{}", targetAudience, TestContext.Current.CancellationToken); + + // Assert + result.Should().Be("fake-cached-token", "should return cached token without calling Google OAuth"); + // Verify that ExchangeJwtForIdToken was not called (PostAsync -> SendAsync) + // Using method name matching since SendAsync is protected + A.CallTo(fakeHandler) + .WithReturnType>() + .Where(call => call.Method.Name == "SendAsync") + .MustNotHaveHappened(); + } + + [Fact] + public async Task GenerateIdTokenAsyncIgnoresExpiredCachedToken() + { + // Arrange + var cache = new InMemoryDistributedCache(); + const string targetAudience = "https://test.com"; + + // Pre-populate cache with token that has very short TTL (will expire quickly) + // InMemoryDistributedCache will remove it when expired + const string expiredToken = "expired-token"; + var cacheKey = CacheKey.Create("idtoken", targetAudience); + await cache.SetAsync(cacheKey, expiredToken, TimeSpan.FromMilliseconds(10), TestContext.Current.CancellationToken); + + // Wait for expiration + await Task.Delay(50, TestContext.Current.CancellationToken); + + // Act - Try to get the expired token + var cachedValue = await cache.GetAsync(cacheKey, TestContext.Current.CancellationToken); + + // Assert - Expired cache entry should return null + // GcpIdTokenProvider checks `if (cachedToken != null)` - when cache returns null, + // it will generate a new token, effectively ignoring the expired cached token + cachedValue.Should().BeNull("expired cache entries should return null, allowing provider to generate new token"); + } +} diff --git a/tests/Elastic.Documentation.Api.Infrastructure.Tests/Elastic.Documentation.Api.Infrastructure.Tests.csproj b/tests/Elastic.Documentation.Api.Infrastructure.Tests/Elastic.Documentation.Api.Infrastructure.Tests.csproj index 6513b2c7d..3e8bc29f7 100644 --- a/tests/Elastic.Documentation.Api.Infrastructure.Tests/Elastic.Documentation.Api.Infrastructure.Tests.csproj +++ b/tests/Elastic.Documentation.Api.Infrastructure.Tests/Elastic.Documentation.Api.Infrastructure.Tests.csproj @@ -10,6 +10,7 @@ +