Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
b2914ee
init aws compute scanner
zacharyblasczyk Nov 13, 2024
6bf07aa
fix
zacharyblasczyk Nov 16, 2024
2d826d3
inital working version
zacharyblasczyk Nov 19, 2024
af49150
merge fix
zacharyblasczyk Nov 21, 2024
92f77cc
Merge remote-tracking branch 'origin' into zacharyb/aws-compute-scanner
zacharyblasczyk Nov 21, 2024
20ed6fd
Merge remote-tracking branch 'origin' into zacharyb/aws-compute-scanner
zacharyblasczyk Nov 21, 2024
3571fa5
Merge remote-tracking branch 'origin' into zacharyb/aws-compute-scanner
zacharyblasczyk Nov 22, 2024
358d68a
Merge remote-tracking branch 'origin' into zacharyb/aws-compute-scanner
zacharyblasczyk Nov 26, 2024
bb28c24
Namespace checkpoint
zacharyblasczyk Nov 26, 2024
6c3932f
fix merge conflict
zacharyblasczyk Nov 27, 2024
34be313
merge fix
zacharyblasczyk Nov 27, 2024
0b3ea0d
working scanner that needs a bit of cleanup
zacharyblasczyk Nov 27, 2024
91f56af
clean up
jsbroks Nov 27, 2024
810cc1e
clean up and fix
zacharyblasczyk Nov 27, 2024
8200455
clean up
zacharyblasczyk Nov 27, 2024
6b7c558
fix build
zacharyblasczyk Nov 28, 2024
9ebdb86
merge fix
zacharyblasczyk Dec 2, 2024
b6cfbe6
cleanup
zacharyblasczyk Dec 2, 2024
50ffdba
least access
zacharyblasczyk Dec 2, 2024
4302b5e
Update apps/event-worker/src/target-scan/index.ts
zacharyblasczyk Dec 2, 2024
46dadc3
Update apps/event-worker/src/target-scan/index.ts
zacharyblasczyk Dec 2, 2024
257e502
mergeing
zacharyblasczyk Dec 3, 2024
428480b
simplify the logic
zacharyblasczyk Dec 3, 2024
f88e980
fix lint
zacharyblasczyk Dec 3, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions apps/event-worker/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@
"with-env": "dotenv -e ../../.env --"
},
"dependencies": {
"@aws-sdk/client-ec2": "^3.701.0",
"@aws-sdk/client-eks": "^3.699.0",
"@aws-sdk/client-sts": "^3.699.0",
"@ctrlplane/db": "workspace:*",
"@ctrlplane/job-dispatch": "workspace:*",
"@ctrlplane/logger": "workspace:*",
Expand All @@ -20,6 +23,7 @@
"@kubernetes/client-node": "^0.22.0",
"@octokit/auth-app": "^7.1.0",
"@octokit/rest": "catalog:",
"@smithy/types": "^3.7.1",
"@t3-oss/env-core": "catalog:",
"bullmq": "catalog:",
"cron": "^3.1.7",
Expand All @@ -29,6 +33,7 @@
"lodash": "catalog:",
"ms": "^2.1.3",
"semver": "^7.6.2",
"ts-is-present": "^1.2.2",
"uuid": "^10.0.0",
"zod": "catalog:"
},
Expand Down
12 changes: 8 additions & 4 deletions apps/event-worker/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,19 @@ import { logger } from "@ctrlplane/logger";

import { createDispatchExecutionJobWorker } from "./job-dispatch/index.js";
import { redis } from "./redis.js";
import { createResourceScanWorker } from "./target-scan/index.js";
import {
createAwsResourceScanWorker,
createGoogleResourceScanWorker,
} from "./target-scan/index.js";

const resourceScanWorker = createResourceScanWorker();
const resourceGoogleScanWorker = createGoogleResourceScanWorker();
const resourceAwsScanWorker = createAwsResourceScanWorker();
const dispatchExecutionJobWorker = createDispatchExecutionJobWorker();

const shutdown = () => {
logger.warn("Exiting...");

resourceScanWorker.close();
resourceAwsScanWorker.close();
resourceGoogleScanWorker.close();
dispatchExecutionJobWorker.close();

redis.quit();
Expand Down
59 changes: 59 additions & 0 deletions apps/event-worker/src/target-scan/aws.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import type { Credentials } from "@aws-sdk/client-sts";
import type { AwsCredentialIdentity } from "@smithy/types";
import { EC2Client } from "@aws-sdk/client-ec2";
import { EKSClient } from "@aws-sdk/client-eks";
import { AssumeRoleCommand, STSClient } from "@aws-sdk/client-sts";

const sourceClient = new STSClient({ region: "us-east-1" });

export class AwsCredentials {
static from(credentials: Credentials) {
return new AwsCredentials(credentials);
}

private constructor(private readonly credentials: Credentials) {}

toIdentity(): AwsCredentialIdentity {
if (
this.credentials.AccessKeyId == null ||
this.credentials.SecretAccessKey == null
)
throw new Error("Missing required AWS credentials");

return {
accessKeyId: this.credentials.AccessKeyId,
secretAccessKey: this.credentials.SecretAccessKey,
sessionToken: this.credentials.SessionToken ?? undefined,
};
}

ec2(region?: string) {
return new EC2Client({ region, credentials: this.toIdentity() });
}

eks(region?: string) {
return new EKSClient({ region, credentials: this.toIdentity() });
}

sts(region?: string) {
return new STSClient({ region, credentials: this.toIdentity() });
}
}

export const assumeWorkspaceRole = async (roleArn: string) =>
assumeRole(sourceClient, roleArn);

export const assumeRole = async (
client: STSClient,
roleArn: string,
): Promise<AwsCredentials> => {
const { Credentials: CustomerCredentials } = await client.send(
new AssumeRoleCommand({
RoleArn: roleArn,
RoleSessionName: "CtrlplaneScanner",
}),
);
if (CustomerCredentials == null)
throw new Error(`Failed to assume AWS role ${roleArn}`);
return AwsCredentials.from(CustomerCredentials);
};
186 changes: 186 additions & 0 deletions apps/event-worker/src/target-scan/eks.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
import type { Cluster, EKSClient } from "@aws-sdk/client-eks";
import type { STSClient } from "@aws-sdk/client-sts";
import type { ResourceProviderAws, Workspace } from "@ctrlplane/db/schema";
import type { KubernetesClusterAPIV1 } from "@ctrlplane/validators/resources";
import { DescribeRegionsCommand } from "@aws-sdk/client-ec2";
import {
DescribeClusterCommand,
ListClustersCommand,
} from "@aws-sdk/client-eks";
import _ from "lodash";
import { isPresent } from "ts-is-present";

import { logger } from "@ctrlplane/logger";
import { ReservedMetadataKey } from "@ctrlplane/validators/conditions";

import type { AwsCredentials } from "./aws.js";
import { omitNullUndefined } from "../utils.js";
import { assumeRole, assumeWorkspaceRole } from "./aws.js";

const log = logger.child({ label: "resource-scan/eks" });

const convertEksClusterToKubernetesResource = (
accountId: string,
cluster: Cluster,
): KubernetesClusterAPIV1 => {
const region = cluster.endpoint?.split(".")[2];

const partition =
cluster.arn?.split(":")[1] ??
(region?.startsWith("us-gov-") ? "aws-us-gov" : "aws");

const appUrl = `https://${
partition === "aws-us-gov"
? `console.${region}.${partition}`
: "console.aws.amazon"
}.com/eks/home?region=${region}#/clusters/${cluster.name}`;

const version = cluster.version!;
const [major, minor] = version.split(".");

return {
name: cluster.name ?? "",
identifier: `aws/${accountId}/eks/${cluster.name}`,
version: "kubernetes/v1" as const,
kind: "ClusterAPI" as const,
config: {
name: cluster.name!,
auth: {
method: "aws/eks" as const,
region: region!,
clusterName: cluster.name!,
},
status: cluster.status ?? "UNKNOWN",
server: {
certificateAuthorityData: cluster.certificateAuthority?.data,
endpoint: cluster.endpoint!,
},
},
metadata: omitNullUndefined({
[ReservedMetadataKey.Links]: JSON.stringify({ "AWS Console": appUrl }),
[ReservedMetadataKey.ExternalId]: cluster.arn ?? "",
[ReservedMetadataKey.KubernetesFlavor]: "eks",
[ReservedMetadataKey.KubernetesVersion]: cluster.version,

"aws/arn": cluster.arn,
"aws/region": region,
"aws/platform-version": cluster.platformVersion,

"kubernetes/status": cluster.status,
"kubernetes/version-major": major,
"kubernetes/version-minor": minor,

...(cluster.tags ?? {}),
}),
};
};

const getAwsRegions = async (credentials: AwsCredentials) =>
credentials
.ec2()
.send(new DescribeRegionsCommand({}))
.then(({ Regions = [] }) => Regions.map((region) => region.RegionName));

const getClusters = async (client: EKSClient) =>
client
.send(new ListClustersCommand({}))
.then((response) => response.clusters ?? []);

const createEksClusterScannerForRegion = (
client: AwsCredentials,
customerRoleArn: string,
) => {
const accountId = /arn:aws:iam::(\d+):/.exec(customerRoleArn)?.[1];
if (accountId == null) throw new Error("Missing account ID");

return async (region: string) => {
const eksClient = client.eks(region);
const clusters = await getClusters(eksClient);
log.info(
`Found ${clusters.length} clusters for ${customerRoleArn} in region ${region}`,
);

return _.chain(clusters)
.map((name) =>
eksClient
.send(new DescribeClusterCommand({ name }))
.then(({ cluster }) => cluster),
)
.thru((promises) => Promise.all(promises))
.value()
.then((clusterDetails) =>
clusterDetails
.filter(isPresent)
.map((cluster) =>
convertEksClusterToKubernetesResource(accountId, cluster),
),
);
Comment on lines +103 to +117
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Handle API call failures individually to prevent entire region scan from failing.

If any DescribeClusterCommand fails, it will reject the entire Promise.all, causing the scan for the region to fail. Handle each API call failure to ensure the scan continues even if some cluster descriptions fail.

Apply this diff to handle errors in cluster descriptions:

 return _.chain(clusters)
   .map((name) =>
     eksClient
       .send(new DescribeClusterCommand({ name }))
       .then(({ cluster }) => cluster)
+      .catch((error) => {
+        log.error(`Failed to describe cluster ${name} in region ${region}: ${error}`);
+        return null; // Skip this cluster and continue
+      }),
   )
-  .thru((promises) => Promise.all(promises))
+  .thru((promises) => Promise.all(promises))
   .value()
   .then((clusterDetails) =>
     clusterDetails
       .filter(isPresent)
       .map((cluster) =>
         convertEksClusterToKubernetesResource(accountId, cluster),
       ),
   );

Committable suggestion skipped: line range outside the PR's diff.

};
};

const scanEksClustersByAssumedRole = async (
workspaceClient: STSClient,
customerRoleArn: string,
) => {
const client = await assumeRole(workspaceClient, customerRoleArn);
const regions = await getAwsRegions(client);

log.info(
`Scanning ${regions.length} AWS regions for EKS clusters in account ${customerRoleArn}`,
);

const regionalClusterScanner = createEksClusterScannerForRegion(
client,
customerRoleArn,
);

return _.chain(regions)
.filter(isPresent)
.map(regionalClusterScanner)
.thru((promises) => Promise.all(promises))
.value()
.then((results) => results.flat());
};

export const getEksResources = async (
workspace: Workspace,
config: ResourceProviderAws,
) => {
const { awsRoleArn: workspaceRoleArn } = workspace;
if (workspaceRoleArn == null) return [];

log.info(
`Scanning for EKS cluters with assumed role arns ${config.awsRoleArns.join(", ")} using role ${workspaceRoleArn}`,
{
workspaceId: workspace.id,
config,
workspaceRoleArn,
},
);

const credentials = await assumeWorkspaceRole(workspaceRoleArn);
const workspaceStsClient = credentials.sts();

const resources = await _.chain(config.awsRoleArns)
.map((customerRoleArn) =>
scanEksClustersByAssumedRole(workspaceStsClient, customerRoleArn),
)
.thru((promises) => Promise.all(promises))
.value()
.then((results) => results.flat())
.then((resources) =>
resources.map((resource) => ({
Comment on lines +164 to +172
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Use Promise.allSettled to handle failures across multiple AWS roles gracefully.

Using Promise.all will cause the entire scan to fail if any role scanning fails. By using Promise.allSettled, you can collect successful results while logging or handling failures individually.

Apply this diff to use Promise.allSettled:

 const resources = await _.chain(config.awsRoleArns)
   .map((customerRoleArn) =>
     scanEksClustersByAssumedRole(workspaceStsClient, customerRoleArn)
+      .then((result) => ({ status: 'fulfilled', value: result }))
+      .catch((error) => {
+        log.error(`Failed to scan EKS clusters for role ${customerRoleArn}: ${error}`);
+        return { status: 'rejected', reason: error };
+      }),
   )
-  .thru((promises) => Promise.all(promises))
+  .thru((promises) => Promise.allSettled(promises))
   .value()
-  .then((results) => results.flat())
-  .then((resources) =>
+  .then((results) =>
+    results
+      .filter((result) => result.status === 'fulfilled')
+      .flatMap((result) => result.value),
+  )
+  .then((resources) =>
     resources.map((resource) => ({
       ...resource,
       workspaceId: workspace.id,
       providerId: config.resourceProviderId,
     })),
   );
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
const resources = await _.chain(config.awsRoleArns)
.map((customerRoleArn) =>
scanEksClustersByAssumedRole(workspaceStsClient, customerRoleArn),
)
.thru((promises) => Promise.all(promises))
.value()
.then((results) => results.flat())
.then((resources) =>
resources.map((resource) => ({
const resources = await _.chain(config.awsRoleArns)
.map((customerRoleArn) =>
scanEksClustersByAssumedRole(workspaceStsClient, customerRoleArn)
.then((result) => ({ status: 'fulfilled', value: result }))
.catch((error) => {
log.error(`Failed to scan EKS clusters for role ${customerRoleArn}: ${error}`);
return { status: 'rejected', reason: error };
}),
)
.thru((promises) => Promise.allSettled(promises))
.value()
.then((results) =>
results
.filter((result) => result.status === 'fulfilled')
.flatMap((result) => result.value),
)
.then((resources) =>
resources.map((resource) => ({
...resource,
workspaceId: workspace.id,
providerId: config.resourceProviderId,
})),
);

...resource,
workspaceId: workspace.id,
providerId: config.resourceProviderId,
})),
);

const resourceTypes = _.countBy(resources, (resource) =>
[resource.kind, resource.version].join("/"),
);

log.info(`Found ${resources.length} resources`, { resourceTypes });

return resources;
};
Loading
Loading