Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement MsQuicConfiguration cache #99371

Merged
merged 12 commits into from
Mar 21, 2024
Original file line number Diff line number Diff line change
@@ -0,0 +1,225 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Collections.Generic;
using System.Collections.Concurrent;
using System.Collections.ObjectModel;
using System.Security.Authentication;
using System.Net.Security;
using System.Security.Cryptography.X509Certificates;
using System.Threading;
using Microsoft.Quic;

namespace System.Net.Quic;

internal static partial class MsQuicConfiguration
{
private const int CheckExpiredModulo = 32;

private static readonly ConcurrentDictionary<CacheKey, MsQuicSafeHandle> s_configurationCache = new();

private readonly struct CacheKey : IEquatable<CacheKey>
{
public readonly List<byte[]> CertificateThumbprints;
public readonly QUIC_CREDENTIAL_FLAGS Flags;
public readonly QUIC_SETTINGS Settings;
public readonly List<SslApplicationProtocol> ApplicationProtocols;
public readonly QUIC_ALLOWED_CIPHER_SUITE_FLAGS AllowedCipherSuites;

public CacheKey(List<byte[]> certificateThumbprints, QUIC_CREDENTIAL_FLAGS flags, QUIC_SETTINGS settings, List<SslApplicationProtocol> applicationProtocols, QUIC_ALLOWED_CIPHER_SUITE_FLAGS allowedCipherSuites)
rzikm marked this conversation as resolved.
Show resolved Hide resolved
{
CertificateThumbprints = certificateThumbprints;
Flags = flags;
Settings = settings;
ApplicationProtocols = applicationProtocols;
AllowedCipherSuites = allowedCipherSuites;
}

public override bool Equals(object? obj) => obj is CacheKey key && Equals(key);

public bool Equals(CacheKey other)
{
if (CertificateThumbprints.Count != other.CertificateThumbprints.Count)
{
return false;
}

for (int i = 0; i < CertificateThumbprints.Count; i++)
{
if (!CertificateThumbprints[i].AsSpan().SequenceEqual(other.CertificateThumbprints[i]))
{
return false;
}
}

if (ApplicationProtocols.Count != other.ApplicationProtocols.Count)
{
return false;
}

for (int i = 0; i < ApplicationProtocols.Count; i++)
{
if (ApplicationProtocols[i] != other.ApplicationProtocols[i])
{
return false;
}
}

return
Flags == other.Flags &&
Settings.Equals(other.Settings) &&
AllowedCipherSuites == other.AllowedCipherSuites;
}

public override int GetHashCode()
{
HashCode hash = default;

foreach (var thumbprint in CertificateThumbprints)
{
hash.AddBytes(thumbprint);
}

hash.Add(Flags);
hash.Add(Settings);

foreach (var protocol in ApplicationProtocols)
{
hash.AddBytes(protocol.Protocol.Span);
}

hash.Add(AllowedCipherSuites);

return hash.ToHashCode();
}
}

private static MsQuicSafeHandle? TryGetCachedConfigurationHandle(CacheKey key)
{
if (s_configurationCache.TryGetValue(key, out MsQuicSafeHandle? handle))
{
try
{
//
// This races with a potential cache cleanup, which may close the
// handle before we claim it.
//
bool ignore = false;
handle.DangerousAddRef(ref ignore);
if (NetEventSource.Log.IsEnabled()) NetEventSource.Info(null, $"Using cached MsQuicConfiguration {handle}.");
return handle;
}
catch (ObjectDisposedException)
{
// we lost the race, behave as if the handle was not in the cache.
}
rzikm marked this conversation as resolved.
Show resolved Hide resolved
}

return null;
}

private static void CacheConfigurationHandle(CacheKey key, ref MsQuicSafeHandle handle)
{
var cached = s_configurationCache.AddOrUpdate(
key,
(_, newHandle) =>
{
// The cache now holds the ownership of the handle.
bool ignore = false;
newHandle.DangerousAddRef(ref ignore);
if (NetEventSource.Log.IsEnabled()) NetEventSource.Info(null, $"Caching MsQuicConfiguration {newHandle}.");
return newHandle;
},
(_, existingHandle, newHandle) =>
{
// another thread was faster in creating the configuration, check if we can
// use the cached one
bool ignore = false;
try
{
//
// This also races with the cache cleanup but should be rare since the
// configuration was just added to the cache and is likely still being used.
//
existingHandle.DangerousAddRef(ref ignore);
if (NetEventSource.Log.IsEnabled()) NetEventSource.Info(null, $"Found existing MsQuicConfiguration {existingHandle} in cache.");
return existingHandle;
}
catch (ObjectDisposedException)
{
// we lost the race with cleanup, the existing configuration handle is closed,
// keep the one we created.
newHandle.DangerousAddRef(ref ignore);
if (NetEventSource.Log.IsEnabled()) NetEventSource.Info(null, $"Caching MsQuicConfiguration {newHandle}.");
return newHandle;
}
},
handle);

if (cached != handle)
{
if (NetEventSource.Log.IsEnabled()) NetEventSource.Info(null, $"Discarding MsQuicConfiguration {handle} (preferring cached {cached}).");
rzikm marked this conversation as resolved.
Show resolved Hide resolved
handle.Dispose();
handle = cached;
return;
}

if (s_configurationCache.Count % CheckExpiredModulo == 0)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This may lead to frequently throwing out and recreating entries once you have > 32 certs in use.
E.g. you're talking to 40 different hosts, and every time you reach 32 most of these objects will be idle so they're all thrown out, and you start from the beginning.

It may be worth adding slightly more logic here (e.g. skip throwing away entries that were used in the last second / use a timer instead of checking Count / allow only one CleanupCache call per X amount of time ...)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The strategy has been copied from the one we do in SslStream for Windows, I am not aware of any reported issues of too frequent cleanup (but maybe it just was not used for this sort of thing, cc @wfurt) and the code over there is ancient. How common is such a scenario?

My thoughts on this is to keep it simple, we are not trying to prevent all unnecessary allocations, only the frequent ones, if, say, app rotates 40 different configurations but does outbound connection once every second (i.e. 40s to rotate through all of them), then I don't think creating a fresh configuration makes a measurable dent in CPU usage.

If the app makes very frequent connections to multiple hosts and we should care about not doing extra work, then most of the configurations would still be in use and once there is a burst of connections and we stay above 32 connections, then there is no attempt for cleanup until the cache goes to 64 items, and so on.

I am not opposed to adding additional conditions for the cleanup, but adding additional conditions like "only 1 cleanup per X seconds" feels arbitrary without a more concrete evidence that it will help.

Another option would be making the cache cleanup size configurable via envvar so that we have a way out if some customer hits the issue.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IMHO, this seems sufficient atm and if this proves problematic, we can always revisit the logic here.

{
// let only one thread perform cleanup at a time
lock (s_configurationCache)
{
if (s_configurationCache.Count % CheckExpiredModulo == 0)
MihaZupan marked this conversation as resolved.
Show resolved Hide resolved
{
CleanupCachedCredentials();
}
}
}
}

private static void CleanupCachedCredentials()
rzikm marked this conversation as resolved.
Show resolved Hide resolved
{
KeyValuePair<CacheKey, MsQuicSafeHandle>[] toRemoveAttempt = s_configurationCache.ToArray();

if (NetEventSource.Log.IsEnabled()) NetEventSource.Info(null, $"Cleaning up MsQuicConfiguration cache, current size: {toRemoveAttempt.Length}.");

foreach (KeyValuePair<CacheKey, MsQuicSafeHandle> kvp in toRemoveAttempt)
{
var handle = kvp.Value;

//
// We can't directly get the current refcount of the handle, we know it's at least 1,
// so we decrement it and if it does not close, then it must be in use, so we increment
// it back.
//

handle.DangerousRelease();
bool inUse = false;
try
{
if (!handle.IsClosed)
{
// handle is in use, add the ref back.
// This add-ref races with QuicConnection.Dispose();
handle.DangerousAddRef(ref inUse);
}
}
catch (ObjectDisposedException)
{
// we lost the race, the handle is closed, we can proceed to remove it from
// the cache.
}

if (!inUse)
{
if (NetEventSource.Log.IsEnabled()) NetEventSource.Info(null, $"Removing cached MsQuicConfiguration {handle}.");
s_configurationCache.TryRemove(kvp.Key, out _);
// The handle is closed, but we did not call Dispose on it. Doing so would throw ODE,
// suppress finalization to prevent Dispose from being called in a Finalizer thread.
GC.SuppressFinalize(handle);
}
}

if (NetEventSource.Log.IsEnabled()) NetEventSource.Info(null, $"Cleaning up MsQuicConfiguration cache, new size: {s_configurationCache.Count}.");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

namespace System.Net.Quic;

internal static class MsQuicConfiguration
internal static partial class MsQuicConfiguration
{
private static bool HasPrivateKey(this X509Certificate certificate)
=> certificate is X509Certificate2 certificate2 && certificate2.Handle != IntPtr.Zero && certificate2.HasPrivateKey;
Expand Down Expand Up @@ -176,20 +176,49 @@ private static unsafe MsQuicSafeHandle Create(QuicConnectionOptions options, QUI
: 0; // 0 disables the timeout
}

QUIC_ALLOWED_CIPHER_SUITE_FLAGS allowedCipherSuites = QUIC_ALLOWED_CIPHER_SUITE_FLAGS.NONE;

if (cipherSuitesPolicy != null)
{
flags |= QUIC_CREDENTIAL_FLAGS.SET_ALLOWED_CIPHER_SUITES;
allowedCipherSuites = CipherSuitePolicyToFlags(cipherSuitesPolicy);
}

CacheKey cacheKey = new CacheKey(
certificate == null ? new List<byte[]>() : new List<byte[]> { certificate.GetCertHash() },
flags,
settings,
new List<SslApplicationProtocol>(alpnProtocols), // make defensive copy to prevent modification
allowedCipherSuites);

if (intermediates != null)
{
foreach (X509Certificate2 intermediate in intermediates)
{
cacheKey.CertificateThumbprints.Add(intermediate.GetCertHash());
}
}

MsQuicSafeHandle? configurationHandle = TryGetCachedConfigurationHandle(cacheKey);
if (configurationHandle != null)
{
return configurationHandle;
}

QUIC_HANDLE* handle;

using MsQuicBuffers msquicBuffers = new MsQuicBuffers();
msquicBuffers.Initialize(alpnProtocols, alpnProtocol => alpnProtocol.Protocol);
msquicBuffers.Initialize(cacheKey.ApplicationProtocols, alpnProtocol => alpnProtocol.Protocol);
ThrowHelper.ThrowIfMsQuicError(MsQuicApi.Api.ConfigurationOpen(
MsQuicApi.Api.Registration,
msquicBuffers.Buffers,
(uint)alpnProtocols.Count,
(uint)msquicBuffers.Count,
&settings,
(uint)sizeof(QUIC_SETTINGS),
(void*)IntPtr.Zero,
&handle),
"ConfigurationOpen failed");
MsQuicSafeHandle configurationHandle = new MsQuicSafeHandle(handle, SafeHandleType.Configuration);
configurationHandle = new MsQuicSafeHandle(handle, SafeHandleType.Configuration);

try
{
Expand All @@ -198,8 +227,7 @@ private static unsafe MsQuicSafeHandle Create(QuicConnectionOptions options, QUI

if (cipherSuitesPolicy != null)
{
config.Flags |= QUIC_CREDENTIAL_FLAGS.SET_ALLOWED_CIPHER_SUITES;
config.AllowedCipherSuites = CipherSuitePolicyToFlags(cipherSuitesPolicy);
config.AllowedCipherSuites = allowedCipherSuites;
}

int status;
Expand Down Expand Up @@ -273,6 +301,8 @@ private static unsafe MsQuicSafeHandle Create(QuicConnectionOptions options, QUI
throw;
}

CacheConfigurationHandle(cacheKey, ref configurationHandle);

return configurationHandle;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -677,7 +677,8 @@ public async ValueTask DisposeAsync()
_handle.Dispose();
_shutdownTokenSource.Dispose();

_configuration?.Dispose();
// don't dispose the handle, just release refcount because it may be cached
_configuration?.DangerousRelease();

// Dispose remote certificate only if it hasn't been accessed via getter, in which case the accessing code becomes the owner of the certificate lifetime.
if (!_remoteCertificateExposed)
Expand Down