Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Activation Repartitioning #8877

Merged
merged 4 commits into from
Jul 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions Directory.Packages.props
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,11 @@
<PackageVersion Include="Azure.Messaging.EventHubs" Version="5.9.3" />
<PackageVersion Include="Azure.Storage.Blobs" Version="12.18.0" />
<PackageVersion Include="Azure.Storage.Queues" Version="12.16.0" />
<!-- Aspire -->
<PackageVersion Include="Aspire.Hosting.AppHost" Version="8.0.1" />
<PackageVersion Include="Aspire.Hosting.Orleans" Version="8.0.1" />
<PackageVersion Include="Aspire.Hosting.Redis" Version="8.0.1" />
<PackageVersion Include="Aspire.StackExchange.Redis" Version="8.0.1" />
<!-- 3rd party packages -->
<PackageVersion Include="Google.Cloud.PubSub.V1" Version="1.0.0-beta13" />
<PackageVersion Include="AWSSDK.DynamoDBv2" Version="3.7.300.6" />
Expand Down
19 changes: 19 additions & 0 deletions Orleans.sln
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,14 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Orleans.Streaming.AdoNet",
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Benchmarks.AdoNet", "test\Benchmarks.AdoNet\Benchmarks.AdoNet.csproj", "{B8F43537-2D2E-42A0-BE67-5E07E4313AEA}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "playground", "playground", "{A41DE3D1-F8AA-4234-BE6F-3C9646A1507A}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "DashboardToy", "DashboardToy", "{316CDCC7-323F-4264-9FC9-667662BB1F80}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DashboardToy.Frontend", "playground\DashboardToy\DashboardToy.Frontend\DashboardToy.Frontend.csproj", "{C4DD4F96-3EC6-47C6-97AA-9B14F0F2099B}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DashboardToy.AppHost", "playground\DashboardToy\DashboardToy.AppHost\DashboardToy.AppHost.csproj", "{84B44F1D-B7FE-40E3-82F0-730A55AC8613}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Expand Down Expand Up @@ -587,6 +595,14 @@ Global
{B8F43537-2D2E-42A0-BE67-5E07E4313AEA}.Debug|Any CPU.Build.0 = Debug|Any CPU
{B8F43537-2D2E-42A0-BE67-5E07E4313AEA}.Release|Any CPU.ActiveCfg = Release|Any CPU
{B8F43537-2D2E-42A0-BE67-5E07E4313AEA}.Release|Any CPU.Build.0 = Release|Any CPU
{C4DD4F96-3EC6-47C6-97AA-9B14F0F2099B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{C4DD4F96-3EC6-47C6-97AA-9B14F0F2099B}.Debug|Any CPU.Build.0 = Debug|Any CPU
{C4DD4F96-3EC6-47C6-97AA-9B14F0F2099B}.Release|Any CPU.ActiveCfg = Release|Any CPU
{C4DD4F96-3EC6-47C6-97AA-9B14F0F2099B}.Release|Any CPU.Build.0 = Release|Any CPU
{84B44F1D-B7FE-40E3-82F0-730A55AC8613}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{84B44F1D-B7FE-40E3-82F0-730A55AC8613}.Debug|Any CPU.Build.0 = Debug|Any CPU
{84B44F1D-B7FE-40E3-82F0-730A55AC8613}.Release|Any CPU.ActiveCfg = Release|Any CPU
{84B44F1D-B7FE-40E3-82F0-730A55AC8613}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
Expand Down Expand Up @@ -695,6 +711,9 @@ Global
{A073C0EE-8732-42F9-A22E-D47034E25076} = {4CD3AA9E-D937-48CA-BB6C-158E12257D23}
{2B994F33-16CF-4679-936A-5AEABC529D2C} = {EB2EDE59-5021-42EE-A97A-D59939B39C66}
{B8F43537-2D2E-42A0-BE67-5E07E4313AEA} = {2CAB7894-777C-42B1-8B1E-322868CE92C7}
{316CDCC7-323F-4264-9FC9-667662BB1F80} = {A41DE3D1-F8AA-4234-BE6F-3C9646A1507A}
{C4DD4F96-3EC6-47C6-97AA-9B14F0F2099B} = {316CDCC7-323F-4264-9FC9-667662BB1F80}
{84B44F1D-B7FE-40E3-82F0-730A55AC8613} = {316CDCC7-323F-4264-9FC9-667662BB1F80}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {7BFB3429-B5BB-4DB1-95B4-67D77A864952}
Expand Down
51 changes: 48 additions & 3 deletions distributed-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,15 @@ variables:
clusterId: '{{ "now" | date: "%s" }}'
serviceId: '{{ "now" | date: "%s" }}'
secretSource: KeyVault
framework: net7.0
framework: net8.0

jobs:
server:
source:
localFolder: Artifacts/DistributedTests/DistributedTests.Server/{{framework}}
executable: DistributedTests.Server.exe
readyStateText: Orleans Silo started.
framework: net7.0
framework: net8.0
arguments: "{{configurator}} --clusterId {{clusterId}} --serviceId {{serviceId}} --secretSource {{secretSource}} {{configuratorOptions}}"
onConfigure:
- if (job.endpoints.Count > 0) {
Expand All @@ -21,7 +21,7 @@ jobs:
localFolder: Artifacts/DistributedTests/DistributedTests.Client/{{framework}}
executable: DistributedTests.Client.exe
waitForExit: true
framework: net7.0
framework: net8.0
arguments: "{{command}} --clusterId {{clusterId}} --serviceId {{serviceId}} --secretSource {{secretSource}} {{commandOptions}}"
onConfigure:
- if (job.endpoints.Count > 0) {
Expand All @@ -46,6 +46,22 @@ scenarios:
requestsPerBlock: 500
duration: 120
commandOptions: "--numWorkers {{numWorkers}} --blocksPerWorker {{blocksPerWorker}} --requestsPerBlock {{requestsPerBlock}} --duration {{duration}}"
fanout:
server:
job: server
variables:
instances: 10
configurator: SimpleSilo
client:
job: client
variables:
command: fan-out
instances: 1
numWorkers: 1
blocksPerWorker: 0
requestsPerBlock: 50
duration: 240
commandOptions: "--numWorkers {{numWorkers}} --blocksPerWorker {{blocksPerWorker}} --requestsPerBlock {{requestsPerBlock}} --duration {{duration}}"
streaming:
server:
job: server
Expand Down Expand Up @@ -125,6 +141,35 @@ scenarios:
duration: 180
commandOptions: "--numWorkers {{numWorkers}} --blocksPerWorker {{blocksPerWorker}} --requestsPerBlock {{requestsPerBlock}} --duration {{duration}}"

counters:
- provider: Microsoft.Orleans
values:
- name: app-requests
measurement: orleans-counter/requests-per-second
description: Request rate

- name: activation-count
measurement: orleans-counter/grain-activation-count
description: Total number of grains

results:
# Microsoft.Orleans counters
- name: orleans-counter/requests-per-second
measurement: orleans-counter/requests-per-second
description: Request rate
format: "n0"
aggregate: max
reduce: max
- name: orleans-counter/requests-per-second/95
measurement: orleans-counter/requests-per-second
description: Request rate
format: "n0"
aggregate: percentile95
reduce: max
- name: activation-count
measurement: orleans-counter/grain-activation-count
description: Active grains

profiles:
local:
variables:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net8.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>

<!-- Uncomment this once Aspire no longer requires a 'dotnet workload install aspire' to build -->
<!--<IsAspireHost>true</IsAspireHost>-->
<UserSecretsId>6a521b87-2bf9-4af8-b7c7-4947536e1d50</UserSecretsId>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Aspire.Hosting.AppHost" />
<PackageReference Include="Aspire.Hosting.Orleans" />
<PackageReference Include="Aspire.Hosting.Redis" />
</ItemGroup>

<ItemGroup>
<ProjectReference Include="..\DashboardToy.Frontend\DashboardToy.Frontend.csproj" />
</ItemGroup>

</Project>
16 changes: 16 additions & 0 deletions playground/DashboardToy/DashboardToy.AppHost/Program.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
using Projects;

var builder = DistributedApplication.CreateBuilder(args);
var redis = builder.AddRedis("orleans-redis");

var orleans = builder.AddOrleans("cluster")
.WithClustering(redis);

/*
// Comment this out once Aspire no longer requires a 'workload' to build.
builder.AddProject<DashboardToy_Frontend>("frontend")
.WithReference(orleans)
.WithReplicas(5);
*/

builder.Build().Run();
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
{
"$schema": "https://json.schemastore.org/launchsettings.json",
"profiles": {
"https": {
"commandName": "Project",
"dotnetRunMessages": true,
"launchBrowser": true,
"applicationUrl": "https://localhost:17234;http://localhost:15087",
"environmentVariables": {
"ASPNETCORE_ENVIRONMENT": "Development",
"DOTNET_ENVIRONMENT": "Development",
"DOTNET_DASHBOARD_OTLP_ENDPOINT_URL": "https://localhost:21284",
"DOTNET_RESOURCE_SERVICE_ENDPOINT_URL": "https://localhost:22143"
}
},
"http": {
"commandName": "Project",
"dotnetRunMessages": true,
"launchBrowser": true,
"applicationUrl": "http://localhost:15087",
"environmentVariables": {
"ASPNETCORE_ENVIRONMENT": "Development",
"DOTNET_ENVIRONMENT": "Development",
"DOTNET_DASHBOARD_OTLP_ENDPOINT_URL": "http://localhost:19030",
"DOTNET_RESOURCE_SERVICE_ENDPOINT_URL": "http://localhost:20232"
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"Logging": {
"LogLevel": {
"Default": "Information",
"Microsoft.AspNetCore": "Warning"
}
}
}
9 changes: 9 additions & 0 deletions playground/DashboardToy/DashboardToy.AppHost/appsettings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"Logging": {
"LogLevel": {
"Default": "Information",
"Microsoft.AspNetCore": "Warning",
"Aspire.Hosting.Dcp": "Warning"
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
<Project Sdk="Microsoft.NET.Sdk.Web">

<PropertyGroup>
<TargetFramework>net8.0</TargetFramework>
<Nullable>enable</Nullable>
<ImplicitUsings>enable</ImplicitUsings>
<OrleansBuildTimeCodeGen>true</OrleansBuildTimeCodeGen>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Aspire.StackExchange.Redis" />
</ItemGroup>

<ItemGroup>
<ProjectReference Include="..\..\..\src\Orleans.Runtime\Orleans.Runtime.csproj" />
<ProjectReference Include="..\..\..\src\Redis\Orleans.Clustering.Redis\Orleans.Clustering.Redis.csproj" />
</ItemGroup>

</Project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
using System.Runtime.InteropServices;
using Orleans.Core.Internal;

namespace DashboardToy.Frontend.Data;

public class ClusterDiagnosticsService(IGrainFactory grainFactory)
{
private readonly Dictionary<SiloAddress, int> _hostKeys = [];
private readonly Dictionary<SiloAddress, HostDetails> _hostDetails = [];
private readonly Dictionary<GrainId, GrainDetails> _grainDetails = []; // Grain to host id
private readonly Dictionary<Key, ulong> _edges = [];
private readonly IManagementGrain _managementGrain = grainFactory.GetGrain<IManagementGrain>(0);
private readonly record struct GrainDetails(int GrainKey, int HostKey);
private readonly record struct HostDetails(int HostKey, int ActivationCount);
private int _version;

public async ValueTask<CallGraph> GetGrainCallFrequencies()
{
var loaderGrain = grainFactory.GetGrain<ILoaderGrain>("root");
var loaderGrainType = loaderGrain.GetGrainId().Type;
var resetCount = await loaderGrain.GetResetCount();
if (resetCount > _version)
{
_version = resetCount;
await ResetAsync();
}

_edges.Clear();
var maxEdgeValue = 0;
var maxActivationCount = 0;

var silos = (await _managementGrain.GetHosts(onlyActive: true)).Keys.Order();
foreach (var silo in silos)
{
var hostKey = GetHostVertex(silo);
var activationCount = 0;
foreach (var activation in await _managementGrain.GetDetailedGrainStatistics(hostsIds: [silo]))
{
if (activation.GrainId.Type.Equals(loaderGrainType)) continue;
if (activation.GrainId.IsSystemTarget()) continue;
var details = GetGrainVertex(activation.GrainId, hostKey);
_grainDetails[activation.GrainId] = new(details.GrainKey, hostKey);
++activationCount;
}

maxActivationCount = Math.Max(maxActivationCount, activationCount);
_hostDetails[silo] = new(hostKey, activationCount);
}

foreach (var edge in await _managementGrain.GetGrainCallFrequencies())
{
if (edge.TargetGrain.Type.Equals(loaderGrainType) || edge.SourceGrain.Type.Equals(loaderGrainType)) continue;
if (edge.TargetGrain.IsSystemTarget() || edge.SourceGrain.IsSystemTarget()) continue;
var sourceHostId = GetHostVertex(edge.SourceHost);
var targetHostId = GetHostVertex(edge.TargetHost);
var sourceVertex = GetGrainVertex(edge.SourceGrain, sourceHostId);
var targetVertex = GetGrainVertex(edge.TargetGrain, targetHostId);
maxEdgeValue = Math.Max(maxEdgeValue, (int)edge.CallCount);
UpdateEdge(new(sourceVertex.GrainKey, targetVertex.GrainKey), edge.CallCount);
}

var grainIds = new List<GraphNode>(_grainDetails.Count);
CollectionsMarshal.SetCount(grainIds, _grainDetails.Count);
foreach ((var grainId, var (grainKey, hostKey)) in _grainDetails)
{
grainIds[grainKey] = new(grainId.ToString(), grainId.Key.ToString()!, hostKey, 1.0);
}

var hostIds = new List<HostNode>(_hostKeys.Count);
CollectionsMarshal.SetCount(hostIds, _hostKeys.Count);
foreach ((var hostId, var key) in _hostKeys)
{
var details = _hostDetails[hostId];
hostIds[key] = new(hostId.ToString(), details.ActivationCount);
}

var edges = new List<GraphEdge>();

foreach (var edge in _edges)
{
edges.Add(new(edge.Key.Source, edge.Key.Target, edge.Value));
}

return new(grainIds, hostIds, edges, maxEdgeValue, maxActivationCount);
}

internal async ValueTask ResetAsync()
{
var fanoutType = grainFactory.GetGrain<IFanOutGrain>(0, "0").GetGrainId().Type;
foreach (var activation in await _managementGrain.GetDetailedGrainStatistics())
{
if (!activation.GrainId.Type.Equals(fanoutType)) continue;
await grainFactory.GetGrain<IGrainManagementExtension>(activation.GrainId).DeactivateOnIdle();
}

Reset();
}

internal void Reset()
{
_hostKeys.Clear();
_hostDetails.Clear();
_grainDetails.Clear();
_edges.Clear();
}

private GrainDetails GetGrainVertex(GrainId grainId, int hostKey)
{
ref var key = ref CollectionsMarshal.GetValueRefOrAddDefault(_grainDetails, grainId, out var exists);
if (!exists)
{
key = new (_grainDetails.Count - 1, hostKey);
}

return key;
}

private int GetHostVertex(SiloAddress silo)
{
ref var key = ref CollectionsMarshal.GetValueRefOrAddDefault(_hostKeys, silo, out var exists);
if (!exists)
{
key = _hostKeys.Count - 1;
}

return key;
}

private void UpdateEdge(Key key, ulong increment)
{
ref var count = ref CollectionsMarshal.GetValueRefOrAddDefault(_edges, key, out var exists);
count += increment;
}
}

public record class CallGraph(List<GraphNode> GrainIds, List<HostNode> HostIds, List<GraphEdge> Edges, int MaxEdgeValue, int MaxActivationCount);

public record struct HostNode(string Name, int ActivationCount);
public record struct GraphNode(string Name, string Key, int Host, double Weight);
public record struct Key(int Source, int Target);
public record struct GraphEdge(int Source, int Target, double Weight);
Loading
Loading