Skip to content

Kestrel stops serving https (http2) requests after reboot under load #21183

@dv00d00

Description

@dv00d00

Describe the bug

I am running Kestrel as an edge server in the Digital Ocean ( Ubuntu Docker 5:19.03.1~3 on 18.04) via docker-compose

version: '3.5'
services:

  db:
    build: ./docker/db
    command: ["--default-authentication-plugin=mysql_native_password", "--character-set-server=utf8"]
    volumes:
      - dotnet_db_data:/var/lib/mysql
    networks:
      - default
    restart: always
        
  dotnet:
    depends_on:
      - db
    build: .
    environment:
      ASPNETCORE_URLS: "http://+:80;https://+:443"
      ASPNETCORE_ENVIRONMENT: Production
      APPLICATION_NAME: "${APPLICATION_NAME}"
    volumes:
      - ./docker/dotnet_certs:/etc/dotnet_certs
    networks:
      - default
    ports:
      - "80:80"
      - "443:443"
    restart: always

volumes:
  dotnet_db_data: {}

networks:
  default:
    driver: bridge

The container is built with mcr.microsoft.com/dotnet/core/sdk:3.1 and mcr.microsoft.com/dotnet/core/aspnet:3.1

I am using Compression and ResponseCaching middlewares in the request pipeline.

        public void ConfigureServices(IServiceCollection services)
        {
            var deployment = DeploymentResolver.Resolve(Environment, Configuration);
            services.AddSingleton<Deployment>(deployment);
            
            services.AddHttpContextAccessor();
            services.TryAddScoped<IUserAgentService, UserAgentService>();
            services.TryAddScoped<IDeviceService, DeviceService>();

            var mvc = services.AddMvc(options =>
            {
                options.CacheProfiles.Add(DefinedCacheProfiles.Default, new CacheProfile
                {
                    VaryByHeader = DefinedCacheProfiles.PlatformDetectHeader,
                    Duration = 60 * 5
                });

                if (Environment.IsStaging())
                {
                    options.Filters.Add(new BasicAuth("test"));
                }
            })
            .AddJsonOptions(options => options.JsonSerializerOptions.PropertyNamingPolicy = null); ;

            if (Environment.IsDevelopment())
            {
                mvc.AddRazorRuntimeCompilation();
            }

            services.AddDbContext<TrackDbContext>(options =>
            {
                options.UseMySql(Configuration.GetConnectionString("DefaultConnection"),
                    mysql =>
                    {
                        mysql.ServerVersion(new Version(8, 0, 16), ServerType.MySql);
                        mysql.MigrationsHistoryTable("migrations_history");
                    });
            });

            services.AddResponseCaching(options => { options.UseCaseSensitivePaths = false; });
            services.AddMemoryCache(options => { });
            
            services.AddHttpClient();
            services.RemoveAll<IHttpMessageHandlerBuilderFilter>();
           
            services.Configure<GzipCompressionProviderOptions>(options =>
            {
                options.Level = CompressionLevel.Optimal;
            });

            services.AddResponseCompression(options =>
            {
                options.EnableForHttps = true;
                options.MimeTypes = new[]
                {
                    "application/atom+xml",
                    "application/javascript",
                    "application/json",
                    "application/ld+json",
                    "application/manifest+json",
                    "application/rss+xml",
                    "application/vnd.geo+json",
                    "application/vnd.ms-fontobject",
                    "application/x-font-ttf",
                    "application/x-web-app-manifest+json",
                    "application/xhtml+xml",
                    "application/xml",
                    "font/opentype",
                    "font/woff2",
                    "image/bmp",
                    "image/svg+xml",
                    "image/x-icon",
                    "text/html",
                    "text/cache-manifest",
                    "text/css",
                    "text/plain",
                    "text/vcard",
                    "text/vnd.rim.location.xloc",
                    "text/vtt",
                    "text/x-component",
                    "text/x-cross-domain-policy",
                };

                options.Providers.Add<BrotliCompressionProvider>();
                options.Providers.Add<GzipCompressionProvider>();
            });

            if (!Environment.IsDevelopment())
            {
                services.AddFluffySpoonLetsEncrypt(new LetsEncryptOptions
                {
                    Email =  "email@emails.com", 
                    UseStaging = false, 
                    Domains = new[] { deployment.DNS },
                    TimeUntilExpiryBeforeRenewal = TimeSpan.FromDays(30), 
                    TimeAfterIssueDateBeforeRenewal = TimeSpan.FromDays(7), 
                    CertificateSigningRequest = new CsrInfo(),
                    KeyAlgorithm = KeyAlgorithm.ES256,
                });
                
                services.AddFluffySpoonLetsEncryptFileCertificatePersistence("//etc/dotnet_certs/main");
                services.AddFluffySpoonLetsEncryptMemoryChallengePersistence();
            }
        }

        public void Configure(IApplicationBuilder app, IWebHostEnvironment env)
        {
            if (!env.IsProduction())
            {
                app.UseDeveloperExceptionPage();
                app.UseDatabaseErrorPage();
            }

            app.UseFluffySpoonLetsEncrypt();
            
            app.UseHsts();
            app.UseHttpsRedirection();
            app.UseStatusCodePagesWithReExecute("/status/{0}");
            
            app.Use((context, next) =>
            {
                context.SetEndpoint(null);
                return next();
            });
            
            app.Map("/metrics", metricsApp =>
            {
                const string userName = "monitoring";
                const string password = "long-and-secure-password-for-monitoring-2";
                var valid = "Basic " + Convert.ToBase64String(Encoding.UTF8.GetBytes(userName + ":" + password));

                metricsApp.Use((context, next) =>
                {
                    if (context.Request.Headers["Authorization"] == valid)
                    {
                        return next();
                    }

                    context.Response.Headers["WWW-Authenticate"] = "Basic";
                    context.Response.StatusCode = (int) HttpStatusCode.Unauthorized;
                    return Task.CompletedTask;
                });
                
                metricsApp.UseMetricServer("");
            });
            
            app.UseResponseCompression();

            var cacheValue = $"public, max-age={30 * 24 * 60 * 60}";
            app.UseStaticFiles(new StaticFileOptions
            {
                OnPrepareResponse = ctx => { 
                    ctx.Context.Response.Headers.Append("Cache-Control", cacheValue); 
                }
            });

            app.UseRouting();
            
            app.UseHttpMetrics(options =>
            {
                options.InProgress.Enabled = false;
            });
            
            app.Use(DetectDevice);
            
            app.UseResponseCaching();

            app.UseEndpoints(endpoints => { endpoints.MapControllers(); });
        }

The issue was not appearing before we have started receiving increased volume of traffic (eg before 1 rps after 8 rps).

Deployment process loads latest commit from repo, builds container on the host and launches new instance docker-compose -f prod.yml up -d --build

This process restarts the running Kestrel container and after the restart, newly started instance is not handling any requests.

Cpu is low during this period (normal avg 10%, broken avg 10%).

After a series of reboots server starts to handle requests again.

To Reproduce

I am able to consistently reproduce the issues with the syntetic traffic on our staging env:

    class Program
    {
        private static readonly string[] Urls = new[]
        {
            "https://server.com/homepage",
            "https://server.com/homepage2",
            "https://server.com/homepage3",
            
        };
        
        static async Task Main(string[] args)
        {
            for (int i = 0; i < 50; i++)
            {
                Go(i + 1);
            }

            Console.ReadLine();
        }
        
        static async Task Go(int id)
        {
            Console.WriteLine("Running: " + id);
            var random = new Random();

            while (true)
            {
                try
                {
                    var httpClient = new HttpClient();
                    var request = new HttpRequestMessage(HttpMethod.Get, Urls[random.Next(Urls.Length)] );
                    request.Headers.Authorization = new AuthenticationHeaderValue("Basic", "YWRtaW46bG9uZy1hbmQtc2VjdXJlLXBhc3N3b3JkLWZvci1hZG1pbjI=");
                    var response = await httpClient.SendAsync(request, HttpCompletionOption.ResponseContentRead);
                    var str = await response.Content.ReadAsStringAsync();
                    // Console.WriteLine("OK");
                }
                catch (Exception e)
                {
                    Console.WriteLine(e.Message);
                    
                    if (e.Message.Contains("SSL"))
                        Console.WriteLine(e.ToString());
                    // await Task.Delay(10);
                }
            }
        }
    }

While the fake load is running I am shutting the stack down and bringing it up again. Repro rate is around 90%

Further technical details

  • ASP.NET Core version: 3.1
  • Include the output of dotnet --info:
Host (useful for support):
  Version: 3.1.1
  Commit:  a1388f194c

.NET Core SDKs installed:
  No SDKs were found.

.NET Core runtimes installed:
  Microsoft.AspNetCore.App 3.1.1 [/usr/share/dotnet/shared/Microsoft.AspNetCore.App]
  Microsoft.NETCore.App 3.1.1 [/usr/share/dotnet/shared/Microsoft.NETCore.App]

Metadata

Metadata

Assignees

Labels

affected-fewThis issue impacts only small number of customersarea-networkingIncludes servers, yarp, json patch, bedrock, websockets, http client factory, and http abstractionsbugThis issue describes a behavior which is not expected - a bug.severity-majorThis label is used by an internal tool

Type

No type

Projects

No projects

Relationships

None yet

Development

No branches or pull requests

Issue actions