From d5955051f1c7c4567d2ff44bb31c06e3af48b4cc Mon Sep 17 00:00:00 2001 From: tevko Date: Tue, 6 May 2025 14:45:19 -0500 Subject: [PATCH 01/42] make arch ready --- README.md | 30 +- cdk/lib/cdk-stack.ts | 618 ++++++++++++++++++++++-------------- docker-compose.yml | 9 +- scripts/after_install.sh | 19 +- scripts/application_stop.sh | 89 +++++- 5 files changed, 502 insertions(+), 263 deletions(-) diff --git a/README.md b/README.md index 8e9e4b0ec7..781221325f 100644 --- a/README.md +++ b/README.md @@ -79,7 +79,7 @@ cp example.env .env ```sh -docker compose --profile postgres up --build +docker compose --profile postgres --profile local-services up --build ``` If you get a permission error, try running this command with `sudo`. @@ -89,7 +89,7 @@ To avoid having to use `sudo` in the future (on a Linux or Windows machine with Once you've built the docker images, you can run without `--build`, which may be faster. Run ```sh -docker compose --profile postgres up +docker compose --profile postgres --profile local-services up ``` or simply @@ -105,14 +105,14 @@ If you have only changed configuration values in .env, you can recreate your con fully rebuilding them with `--force-recreate`. For example: ```sh -docker compose --profile postgres down -docker compose --profile postgres up --force-recreate +docker compose --profile postgres --profile local-services down +docker compose --profile postgres --profile local-services up --force-recreate ``` To see what the environment of your containers is going to look like, run: ```sh -docker compose --profile postgres convert +docker compose --profile postgres --profile local-services convert ``` #### Using a local or remote (non-docker) database @@ -139,6 +139,22 @@ make PROD start make PROD start-rebuild ``` +### Running without Local Cloud Service Emulators +If you want to run the stack without the local MinIO and DynamoDB services (e.g., to test connecting to real AWS services configured in your .env file), simply omit the --profile local-services flag. + +Example: Run with the containerized DB but connect to external/real cloud services: + +```sh +docker compose --profile postgres up +``` + +Example: Run with an external DB and external/real cloud services (closest to production): + +```sh +docker compose up +``` + + ### Testing out your instance You can now test your setup by visiting `http://localhost:80/home`. @@ -146,7 +162,7 @@ You can now test your setup by visiting `http://localhost:80/home`. Once the index page loads, you can create an account using the `/createuser` path. You'll be logged in right away; email validation is not required. -When you're done working, you can end the process using `Ctrl+C`, or typing `docker compose --profile postgres down` +When you're done working, you can end the process using `Ctrl+C`, or typing `docker compose --profile postgres --profile local-services down` if you are running in "detached mode". ### Updating the system @@ -227,7 +243,7 @@ git config --local include.path ../.gitconfig #### Running as a background process -If you would like to run docker compose as a background process, run the `up` commands with the `--detach` flag, and use `docker compose --profile postgres down` to stop. +If you would like to run docker compose as a background process, run the `up` commands with the `--detach` flag, and use `docker compose --profile postgres --profile local-services down` to stop. #### Using Docker Machine as your development environment diff --git a/cdk/lib/cdk-stack.ts b/cdk/lib/cdk-stack.ts index c444da9cb6..59614df540 100644 --- a/cdk/lib/cdk-stack.ts +++ b/cdk/lib/cdk-stack.ts @@ -11,9 +11,11 @@ import * as secretsmanager from 'aws-cdk-lib/aws-secretsmanager'; import * as sns from 'aws-cdk-lib/aws-sns'; import * as subscriptions from 'aws-cdk-lib/aws-sns-subscriptions'; import * as cloudwatch from 'aws-cdk-lib/aws-cloudwatch'; +import * as cloudwatch_actions from 'aws-cdk-lib/aws-cloudwatch-actions'; // Import actions submodule import * as acm from 'aws-cdk-lib/aws-certificatemanager'; import * as ecr from 'aws-cdk-lib/aws-ecr'; import * as ssm from 'aws-cdk-lib/aws-ssm'; +import * as efs from 'aws-cdk-lib/aws-efs'; // Import EFS module import { Construct } from 'constructs'; interface PolisStackProps extends cdk.StackProps { @@ -25,6 +27,7 @@ interface PolisStackProps extends cdk.StackProps { mathWorkerKeyPairName?: string; // Key pair for math worker delphiSmallKeyPairName?: string; // Key pair for small Delphi instances delphiLargeKeyPairName?: string; // Key pair for large Delphi instance + ollamaKeyPairName?: string; // Key pair for Ollama instance - NEW } export class CdkStack extends cdk.Stack { @@ -32,10 +35,14 @@ export class CdkStack extends cdk.Stack { super(scope, id, props); const defaultSSHRange = '0.0.0.0/0'; + const ollamaPort = 11434; + const ollamaModelDirectory = '/efs/ollama-models'; + const ollamaNamespace = 'OllamaMetrics'; // Custom namespace for GPU metrics + // --- VPC Configuration const vpc = new ec2.Vpc(this, 'Vpc', { maxAzs: 2, - natGateways: 0, + natGateways: 1, // Use 1 for non-prod/cost saving, 2+ for prod HA subnetConfiguration: [ { cidrMask: 24, @@ -45,7 +52,12 @@ export class CdkStack extends cdk.Stack { { cidrMask: 24, name: 'Private', - subnetType: ec2.SubnetType.PRIVATE_ISOLATED, // Use PRIVATE_ISOLATED + subnetType: ec2.SubnetType.PRIVATE_ISOLATED, + }, + { + cidrMask: 24, + name: 'PrivateWithEgress', + subnetType: ec2.SubnetType.PRIVATE_WITH_EGRESS, }, ] }); @@ -53,11 +65,13 @@ export class CdkStack extends cdk.Stack { const alarmTopic = new sns.Topic(this, 'AlarmTopic', { displayName: 'Polis Application Alarms', }); - alarmTopic.addSubscription(new subscriptions.EmailSubscription('tim@compdemocracy.org')); + const logGroup = new logs.LogGroup(this, 'LogGroup', { + retention: logs.RetentionDays.ONE_MONTH, + removalPolicy: cdk.RemovalPolicy.DESTROY, + }); - const logGroup = new logs.LogGroup(this, 'LogGroup'); - + // --- Instance Types & AMIs const instanceTypeWeb = ec2.InstanceType.of(ec2.InstanceClass.T3, ec2.InstanceSize.MEDIUM); const machineImageWeb = new ec2.AmazonLinuxImage({ generation: ec2.AmazonLinuxGeneration.AMAZON_LINUX_2023 }); const instanceTypeMathWorker = ec2.InstanceType.of(ec2.InstanceClass.R8G, ec2.InstanceSize.XLARGE4); @@ -65,84 +79,106 @@ export class CdkStack extends cdk.Stack { generation: ec2.AmazonLinuxGeneration.AMAZON_LINUX_2023, cpuType: ec2.AmazonLinuxCpuType.ARM_64, }); - - // Delphi small instance (cost efficient) + // Delphi small instance const instanceTypeDelphiSmall = ec2.InstanceType.of(ec2.InstanceClass.T3, ec2.InstanceSize.LARGE); - const machineImageDelphiSmall = new ec2.AmazonLinuxImage({ - generation: ec2.AmazonLinuxGeneration.AMAZON_LINUX_2023 + const machineImageDelphiSmall = new ec2.AmazonLinuxImage({ + generation: ec2.AmazonLinuxGeneration.AMAZON_LINUX_2023 }); - - // Delphi large instance (performance optimized) + // Delphi large instance const instanceTypeDelphiLarge = ec2.InstanceType.of(ec2.InstanceClass.C6G, ec2.InstanceSize.XLARGE4); const machineImageDelphiLarge = new ec2.AmazonLinuxImage({ generation: ec2.AmazonLinuxGeneration.AMAZON_LINUX_2023, cpuType: ec2.AmazonLinuxCpuType.ARM_64 }); + // Ollama Instance + const instanceTypeOllama = ec2.InstanceType.of(ec2.InstanceClass.G4DN, ec2.InstanceSize.XLARGE); // x86_64 GPU instance + const machineImageOllama = ec2.MachineImage.genericLinux({ + 'us-east-1': 'ami-08e0cf6df13ae3ddb', + }); + // --- Security Groups const webSecurityGroup = new ec2.SecurityGroup(this, 'WebSecurityGroup', { vpc, description: 'Allow HTTP and SSH access to web instances', - allowAllOutbound: true, + allowAllOutbound: true }); - const mathWorkerSecurityGroup = new ec2.SecurityGroup(this, 'MathWorkerSG', { vpc, description: 'Security group for Polis math worker', - allowAllOutbound: true, + allowAllOutbound: true }); - + // Delphi Security Group const delphiSecurityGroup = new ec2.SecurityGroup(this, 'DelphiSecurityGroup', { vpc, - description: 'Security group for Delphi worker instances', - allowAllOutbound: true, + description: 'SG for Delphi instances', + allowAllOutbound: true }); - + // Ollama Security Group + const ollamaSecurityGroup = new ec2.SecurityGroup(this, 'OllamaSecurityGroup', { + vpc, + description: 'SG for Ollama instance', + allowAllOutbound: true + }); + // EFS Security Group + const efsSecurityGroup = new ec2.SecurityGroup(this, 'EfsSecurityGroup', { + vpc, + description: 'SG for EFS mount targets', + allowAllOutbound: false + }); + + // Allow Delphi -> Ollama + ollamaSecurityGroup.addIngressRule( + delphiSecurityGroup, + ec2.Port.tcp(ollamaPort), + `Allow Delphi access on ${ollamaPort}` + ); + // Allow Ollama -> EFS + efsSecurityGroup.addIngressRule( + ollamaSecurityGroup, + ec2.Port.tcp(2049), // NFS port + 'Allow NFS from Ollama instances' + ); + + // Conditional SSH Access if (props.enableSSHAccess) { - webSecurityGroup.addIngressRule(ec2.Peer.ipv4(props.sshAllowedIpRange || defaultSSHRange), ec2.Port.tcp(22), 'Allow SSH access'); - mathWorkerSecurityGroup.addIngressRule(ec2.Peer.ipv4(props.sshAllowedIpRange || defaultSSHRange), ec2.Port.tcp(22), 'Allow SSH access'); - delphiSecurityGroup.addIngressRule(ec2.Peer.ipv4(props.sshAllowedIpRange || defaultSSHRange), ec2.Port.tcp(22), 'Allow SSH access'); + const sshPeer = ec2.Peer.ipv4(props.sshAllowedIpRange || defaultSSHRange); + webSecurityGroup.addIngressRule(sshPeer, ec2.Port.tcp(22), 'Allow SSH access'); + mathWorkerSecurityGroup.addIngressRule(sshPeer, ec2.Port.tcp(22), 'Allow SSH access'); + delphiSecurityGroup.addIngressRule(sshPeer, ec2.Port.tcp(22), 'Allow SSH access'); // NEW + ollamaSecurityGroup.addIngressRule(sshPeer, ec2.Port.tcp(22), 'Allow SSH access'); // NEW } - // Key Pair Creation - let webKeyPair: ec2.IKeyPair | undefined; - if (props.enableSSHAccess) { - webKeyPair = props.webKeyPairName - ? ec2.KeyPair.fromKeyPairName(this, 'WebKeyPair', props.webKeyPairName) - : new ec2.KeyPair(this, 'WebKeyPair'); - } + webSecurityGroup.addIngressRule(ec2.Peer.ipv4(props.sshAllowedIpRange || defaultSSHRange), ec2.Port.tcp(22), 'Allow SSH'); // Control SSH separately + webSecurityGroup.addIngressRule(ec2.Peer.anyIpv4(), ec2.Port.tcp(80), 'Allow HTTP from anywhere'); + webSecurityGroup.addIngressRule(ec2.Peer.anyIpv4(), ec2.Port.tcp(443), 'Allow HTTPS from anywhere'); - let mathWorkerKeyPair: ec2.IKeyPair | undefined; - if (props.enableSSHAccess) { - mathWorkerKeyPair = props.mathWorkerKeyPairName - ? ec2.KeyPair.fromKeyPairName(this, 'MathWorkerKeyPair', props.mathWorkerKeyPairName) - : new ec2.KeyPair(this, 'MathWorkerKeyPair'); - } - let delphiSmallKeyPair: ec2.IKeyPair | undefined; - if (props.enableSSHAccess) { - delphiSmallKeyPair = props.delphiSmallKeyPairName - ? ec2.KeyPair.fromKeyPairName(this, 'DelphiSmallKeyPair', props.delphiSmallKeyPairName) - : new ec2.KeyPair(this, 'DelphiSmallKeyPair'); - } + // --- Key Pairs + const getKeyPair = (name: string, requestedName?: string): ec2.IKeyPair | undefined => { + if (!props.enableSSHAccess) return undefined; + return requestedName + ? ec2.KeyPair.fromKeyPairName(this, name, requestedName) + : new ec2.KeyPair(this, name); + }; + const webKeyPair = getKeyPair('WebKeyPair', props.webKeyPairName); + const mathWorkerKeyPair = getKeyPair('MathWorkerKeyPair', props.mathWorkerKeyPairName); + const delphiSmallKeyPair = getKeyPair('DelphiSmallKeyPair', props.delphiSmallKeyPairName); + const delphiLargeKeyPair = getKeyPair('DelphiLargeKeyPair', props.delphiLargeKeyPairName); + const ollamaKeyPair = getKeyPair('OllamaKeyPair', props.ollamaKeyPairName); - let delphiLargeKeyPair: ec2.IKeyPair | undefined; - if (props.enableSSHAccess) { - delphiLargeKeyPair = props.delphiLargeKeyPairName - ? ec2.KeyPair.fromKeyPairName(this, 'DelphiLargeKeyPair', props.delphiLargeKeyPairName) - : new ec2.KeyPair(this, 'DelphiLargeKeyPair'); - } + // --- IAM Role const instanceRole = new iam.Role(this, 'InstanceRole', { assumedBy: new iam.ServicePrincipal('ec2.amazonaws.com'), managedPolicies: [ - iam.ManagedPolicy.fromAwsManagedPolicyName('AmazonSSMManagedInstanceCore'), - iam.ManagedPolicy.fromAwsManagedPolicyName('service-role/AmazonEC2RoleforAWSCodeDeploy'), - iam.ManagedPolicy.fromAwsManagedPolicyName('SecretsManagerReadWrite'), - iam.ManagedPolicy.fromAwsManagedPolicyName('AmazonEC2ContainerRegistryReadOnly'), - iam.ManagedPolicy.fromAwsManagedPolicyName('CloudWatchLogsFullAccess') + iam.ManagedPolicy.fromAwsManagedPolicyName('AmazonSSMManagedInstanceCore'), + iam.ManagedPolicy.fromAwsManagedPolicyName('service-role/AmazonEC2RoleforAWSCodeDeploy'), + iam.ManagedPolicy.fromAwsManagedPolicyName('SecretsManagerReadWrite'), + iam.ManagedPolicy.fromAwsManagedPolicyName('AmazonEC2ContainerRegistryReadOnly'), + iam.ManagedPolicy.fromAwsManagedPolicyName('CloudWatchLogsFullAccess'), + iam.ManagedPolicy.fromAwsManagedPolicyName('CloudWatchAgentServerPolicy'), ], }); - instanceRole.addToPolicy(new iam.PolicyStatement({ actions: ['s3:PutObject', 's3:PutObjectAcl', 's3:AbortMultipartUpload'], resources: ['arn:aws:s3:::*', 'arn:aws:s3:::*/*'], @@ -152,11 +188,11 @@ export class CdkStack extends cdk.Stack { const codeDeployRole = new iam.Role(this, 'CodeDeployRole', { assumedBy: new iam.ServicePrincipal('codedeploy.amazonaws.com'), managedPolicies: [ - iam.ManagedPolicy.fromAwsManagedPolicyName('service-role/AWSCodeDeployRole'), + iam.ManagedPolicy.fromAwsManagedPolicyName('service-role/AWSCodeDeployRole'), ], }); - // ALB Security Group - Allow HTTP/HTTPS from anywhere + // ALB Security Group const lbSecurityGroup = new ec2.SecurityGroup(this, 'LBSecurityGroup', { vpc, description: 'Security group for the load balancer', @@ -165,83 +201,44 @@ export class CdkStack extends cdk.Stack { lbSecurityGroup.addIngressRule(ec2.Peer.anyIpv4(), ec2.Port.tcp(80), 'Allow HTTP from anywhere'); lbSecurityGroup.addIngressRule(ec2.Peer.anyIpv4(), ec2.Port.tcp(443), 'Allow HTTPS from anywhere'); - // things are dockerized so we need ECR - const ecrWebRepository = new ecr.Repository(this, 'PolisRepositoryServer', { - repositoryName: 'polis/server', - removalPolicy: cdk.RemovalPolicy.RETAIN, - imageScanOnPush: true, - }); - - ecrWebRepository.addToResourcePolicy(new iam.PolicyStatement({ // allow docker pull from anywhere - sid: 'AllowPublicPull', - effect: iam.Effect.ALLOW, - principals: [new iam.AnyPrincipal()], - actions: [ - "ecr:BatchCheckLayerAvailability", - "ecr:GetDownloadUrlForLayer", - "ecr:BatchGetImage", - ], - })); - - const ecrMathRepository = new ecr.Repository(this, 'PolisRepositoryMath', { - repositoryName: 'polis/math', - removalPolicy: cdk.RemovalPolicy.RETAIN, - imageScanOnPush: true, - }); - - ecrMathRepository.addToResourcePolicy(new iam.PolicyStatement({ - sid: 'AllowPublicPull', - effect: iam.Effect.ALLOW, - principals: [new iam.AnyPrincipal()], - actions: [ - "ecr:BatchCheckLayerAvailability", - "ecr:GetDownloadUrlForLayer", - "ecr:BatchGetImage", - ], - })); - - const ecrDelphiRepository = new ecr.Repository(this, 'PolisRepositoryDelphi', { - repositoryName: 'polis/delphi', - removalPolicy: cdk.RemovalPolicy.RETAIN, - imageScanOnPush: true, - }); - - ecrDelphiRepository.addToResourcePolicy(new iam.PolicyStatement({ - sid: 'AllowPublicPull', - effect: iam.Effect.ALLOW, - principals: [new iam.AnyPrincipal()], - actions: [ - "ecr:BatchCheckLayerAvailability", - "ecr:GetDownloadUrlForLayer", - "ecr:BatchGetImage", - ], - })); - - ecrWebRepository.grantPull(instanceRole); - ecrMathRepository.grantPull(instanceRole); - ecrDelphiRepository.grantPull(instanceRole); + // --- ECR Repositories + const createEcrRepo = (name: string): ecr.Repository => { + const repo = new ecr.Repository(this, `PolisRepository${name}`, { + repositoryName: `polis/${name.toLowerCase()}`, + removalPolicy: cdk.RemovalPolicy.RETAIN, + imageScanOnPush: true, + }); + + repo.addToResourcePolicy(new iam.PolicyStatement({ + sid: 'AllowPublicPull', + effect: iam.Effect.ALLOW, + principals: [new iam.AnyPrincipal()], + actions: [ + "ecr:BatchCheckLayerAvailability", + "ecr:GetDownloadUrlForLayer", + "ecr:BatchGetImage", + ], + })); + repo.grantPull(instanceRole); // Grant pull to the shared instance role + return repo; + }; + const ecrWebRepository = createEcrRepo('Server'); + const ecrMathRepository = createEcrRepo('Math'); + const ecrDelphiRepository = createEcrRepo('Delphi'); + // --- SSM Parameter for Image Tag const imageTagParameter = new ssm.StringParameter(this, 'ImageTagParameter', { parameterName: '/polis/image-tag', stringValue: 'initial-tag', //CI/CD will update this }); - - // --- Web ASG --- - webSecurityGroup.addIngressRule(ec2.Peer.ipv4(props.sshAllowedIpRange || defaultSSHRange), ec2.Port.tcp(22), 'Allow SSH'); // Control SSH separately - webSecurityGroup.addIngressRule(ec2.Peer.anyIpv4(), ec2.Port.tcp(80), 'Allow HTTP from anywhere'); - webSecurityGroup.addIngressRule(ec2.Peer.anyIpv4(), ec2.Port.tcp(443), 'Allow HTTPS from anywhere'); - - // --- Postgres --- - + // --- Postgres (PG17, GP2 in 'Private' ISOLATED subnet) --- const dbSubnetGroup = new rds.SubnetGroup(this, 'DatabaseSubnetGroup', { vpc, subnetGroupName: 'PolisDatabaseSubnetGroup', description: 'Subnet group for the postgres database', - vpcSubnets: { - subnetType: ec2.SubnetType.PRIVATE_ISOLATED, - }, - removalPolicy: cdk.RemovalPolicy.DESTROY, + vpcSubnets: { subnetGroupName: 'Private' }, + removalPolicy: cdk.RemovalPolicy.RETAIN, }); const db = new rds.DatabaseInstance(this, 'Database', { @@ -258,26 +255,41 @@ export class CdkStack extends cdk.Stack { subnetGroup: dbSubnetGroup, }); + // SSM Parameters for DB connection const dbSecretArnParam = new ssm.StringParameter(this, 'DBSecretArnParameter', { parameterName: '/polis/db-secret-arn', stringValue: db.secret!.secretArn, description: 'SSM Parameter storing the ARN of the Polis Database Secret', }); - const dbHostParam = new ssm.StringParameter(this, 'DBHostParameter', { parameterName: '/polis/db-host', stringValue: db.dbInstanceEndpointAddress, description: 'SSM Parameter storing the Polis Database Host', }); - const dbPortParam = new ssm.StringParameter(this, 'DBPortParameter', { parameterName: '/polis/db-port', stringValue: db.dbInstanceEndpointPort, description: 'SSM Parameter storing the Polis Database Port', }); + + // --- EFS for Ollama Models + const fileSystem = new efs.FileSystem(this, 'OllamaModelFileSystem', { + vpc, + encrypted: true, + lifecyclePolicy: efs.LifecyclePolicy.AFTER_14_DAYS, + performanceMode: efs.PerformanceMode.GENERAL_PURPOSE, + throughputMode: efs.ThroughputMode.ELASTIC, + removalPolicy: cdk.RemovalPolicy.RETAIN, + securityGroup: efsSecurityGroup, + vpcSubnets: { subnetGroupName: 'PrivateWithEgress' }, + }); + + + // --- User Data Scripts (Optimized function used by all) --- + // Generic User Data function (Works with NAT Gateway for internet) const usrdata = (CLOUDWATCH_LOG_GROUP_NAME: string, service: string, instanceSize?: string) => { - let ld; + let ld: ec2.UserData; ld = ec2.UserData.forLinux(); ld.addCommands( '#!/bin/bash', @@ -321,43 +333,127 @@ EOF`, return ld; }; - // --- Launch Templates --- + const ollamaUsrData = ec2.UserData.forLinux(); + const cwAgentConfigPath = '/opt/aws/amazon-cloudwatch-agent/etc/amazon-cloudwatch-agent.json'; + ollamaUsrData.addCommands( + ...usrdata(logGroup.logGroupName, "ollama").render().split('\n').filter(line => line.trim() !== ''), + 'echo "Installing EFS utilities for Ollama..."', + 'sudo dnf install -y amazon-efs-utils nfs-utils', + 'echo "Starting Ollama specific setup..."', + 'echo "Configuring CloudWatch Agent for GPU metrics..."', + `sudo tee ${cwAgentConfigPath} << EOF +{ + "agent": { "metrics_collection_interval": 60, "run_as_user": "root" }, + "metrics": { + "append_dimensions": { "AutoScalingGroupName": "\${aws:AutoScalingGroupName}", "ImageId": "\${aws:ImageId}", "InstanceId": "\${aws:InstanceId}", "InstanceType": "\${aws:InstanceType}" }, + "metrics_collected": { + "nvidia_gpu": { "measurement": [ {"name": "utilization_gpu", "unit": "Percent"}, {"name": "utilization_memory", "unit": "Percent"}, {"name": "memory_total", "unit": "Megabytes"}, {"name": "memory_used", "unit": "Megabytes"}, {"name": "memory_free", "unit": "Megabytes"}, {"name": "power_draw", "unit": "Watts"}, {"name": "temperature_gpu", "unit": "Count"} ], "metrics_collection_interval": 60, "nvidia_smi_path": "/usr/bin/nvidia-smi", "metrics_aggregation_interval": 60, "namespace": "${ollamaNamespace}" }, + "disk": { "measurement": [ "used_percent" ], "metrics_collection_interval": 60, "resources": [ "/" ] }, + "mem": { "measurement": [ "mem_used_percent" ], "metrics_collection_interval": 60 } + } + } +} +EOF`, + 'sudo /opt/aws/amazon-cloudwatch-agent/bin/amazon-cloudwatch-agent-ctl -a fetch-config -m ec2 -c file:${cwAgentConfigPath} -s', + 'sudo systemctl enable amazon-cloudwatch-agent', + 'echo "CloudWatch Agent configured and started."', + 'echo "Mounting EFS filesystem ${fileSystem.fileSystemId}..."', + `sudo mkdir -p ${ollamaModelDirectory}`, + `sudo mount -t efs -o tls ${fileSystem.fileSystemId}:/ ${ollamaModelDirectory}`, + `echo "${fileSystem.fileSystemId}:/ ${ollamaModelDirectory} efs _netdev,tls 0 0" | sudo tee -a /etc/fstab`, + `sudo chown ec2-user:ec2-user ${ollamaModelDirectory}`, + 'echo "EFS mounted successfully."', + 'echo "Starting Ollama container..."', + 'sudo docker run -d --name ollama \\', + ' --gpus all \\', + ' -p 0.0.0.0:11434:11434 \\', + ` -v ${ollamaModelDirectory}:/root/.ollama \\`, + ' --restart unless-stopped \\', + ' ollama/ollama serve', + '(', + ' echo "Waiting for Ollama service (background task)..."', + ' sleep 60', + ' echo "Pulling default Ollama model (llama3.1:8b) in background..."', + ' sudo docker exec ollama ollama pull llama3.1:8b || echo "Failed to pull default model initially, may need manual pull later."', + ' echo "Background model pull task finished."', + ') &', + 'disown', + 'echo "Ollama setup script finished."' + ); + + + // --- Launch Templates const webLaunchTemplate = new ec2.LaunchTemplate(this, 'WebLaunchTemplate', { machineImage: machineImageWeb, userData: usrdata(logGroup.logGroupName, "server"), instanceType: instanceTypeWeb, securityGroup: webSecurityGroup, - keyPair: props.enableSSHAccess ? webKeyPair : undefined, // Conditionally add key pair + keyPair: webKeyPair, role: instanceRole, }); - const mathWorkerLaunchTemplate = new ec2.LaunchTemplate(this, 'MathWorkerLaunchTemplate', { machineImage: machineImageMathWorker, userData: usrdata(logGroup.logGroupName, "math"), instanceType: instanceTypeMathWorker, securityGroup: mathWorkerSecurityGroup, - keyPair: props.enableSSHAccess ? mathWorkerKeyPair : undefined, + keyPair: mathWorkerKeyPair, role: instanceRole, }); - + // Delphi Small Launch Template const delphiSmallLaunchTemplate = new ec2.LaunchTemplate(this, 'DelphiSmallLaunchTemplate', { machineImage: machineImageDelphiSmall, userData: usrdata(logGroup.logGroupName, "delphi", "small"), instanceType: instanceTypeDelphiSmall, securityGroup: delphiSecurityGroup, - keyPair: props.enableSSHAccess ? delphiSmallKeyPair : undefined, + keyPair: delphiSmallKeyPair, role: instanceRole, }); - + // Delphi Large Launch Template const delphiLargeLaunchTemplate = new ec2.LaunchTemplate(this, 'DelphiLargeLaunchTemplate', { machineImage: machineImageDelphiLarge, userData: usrdata(logGroup.logGroupName, "delphi", "large"), instanceType: instanceTypeDelphiLarge, securityGroup: delphiSecurityGroup, - keyPair: props.enableSSHAccess ? delphiLargeKeyPair : undefined, + keyPair: delphiLargeKeyPair, role: instanceRole, }); + // Ollama Launch Template + const ollamaLaunchTemplate = new ec2.LaunchTemplate(this, 'OllamaLaunchTemplate', { + machineImage: machineImageOllama, + userData: ollamaUsrData, + instanceType: instanceTypeOllama, + securityGroup: ollamaSecurityGroup, + keyPair: ollamaKeyPair, + role: instanceRole, + blockDevices: [ + { + deviceName: '/dev/xvda', // Adjust if needed for DLAMI + volume: ec2.BlockDeviceVolume.ebs(100, { + volumeType: ec2.EbsDeviceVolumeType.GP3, + deleteOnTermination: true, + }), + }, + ], + }); + + + // --- Auto Scaling Groups + const commonAsgProps = { vpc, role: instanceRole }; + + // Ollama ASG + const asgOllama = new autoscaling.AutoScalingGroup(this, 'AsgOllama', { + vpc, + launchTemplate: ollamaLaunchTemplate, + minCapacity: 1, + maxCapacity: 3, + desiredCapacity: 1, + vpcSubnets: { subnetGroupName: 'PrivateWithEgress' }, + healthCheck: autoscaling.HealthCheck.ec2({ grace: cdk.Duration.minutes(10) }), + }); + asgOllama.node.addDependency(logGroup); + asgOllama.node.addDependency(fileSystem); // Ensure EFS is ready before instances start + // Web ASG const asgWeb = new autoscaling.AutoScalingGroup(this, 'Asg', { vpc, launchTemplate: webLaunchTemplate, @@ -368,123 +464,140 @@ EOF`, healthCheck: autoscaling.HealthCheck.elb({grace: cdk.Duration.minutes(5)}) }); + // Math Worker ASG const asgMathWorker = new autoscaling.AutoScalingGroup(this, 'AsgMathWorker', { vpc, launchTemplate: mathWorkerLaunchTemplate, minCapacity: 1, desiredCapacity: 1, maxCapacity: 5, - vpcSubnets: { - subnetType: ec2.SubnetType.PUBLIC, - }, + vpcSubnets: { subnetType: ec2.SubnetType.PUBLIC }, healthCheck: autoscaling.HealthCheck.ec2({ grace: cdk.Duration.minutes(2) }), }); - const mathWorkerCpuMetric = new cloudwatch.Metric({ - namespace: 'AWS/EC2', - metricName: 'CPUUtilization', - dimensionsMap: { - AutoScalingGroupName: asgMathWorker.autoScalingGroupName, - }, - statistic: 'Average', // default, config if necessary - period: cdk.Duration.minutes(10), - }); - - asgMathWorker.scaleToTrackMetric('CpuTracking', { - metric: mathWorkerCpuMetric, - targetValue: 50, // Target 50% CPU utilization - disableScaleIn: true, // unneeded hosts will be disabled manualy - }); - - // Delphi Small Instance Auto Scaling Group + // Delphi Small ASG const asgDelphiSmall = new autoscaling.AutoScalingGroup(this, 'AsgDelphiSmall', { vpc, launchTemplate: delphiSmallLaunchTemplate, minCapacity: 1, - desiredCapacity: 2, + desiredCapacity: 1, maxCapacity: 5, - vpcSubnets: { - subnetType: ec2.SubnetType.PUBLIC, - }, - healthCheck: autoscaling.HealthCheck.ec2({ grace: cdk.Duration.minutes(2) }), + vpcSubnets: { subnetType: ec2.SubnetType.PRIVATE_WITH_EGRESS }, + healthCheck: autoscaling.HealthCheck.ec2({ grace: cdk.Duration.minutes(5) }), }); - - // Delphi Large Instance Auto Scaling Group + + // Delphi Large ASG const asgDelphiLarge = new autoscaling.AutoScalingGroup(this, 'AsgDelphiLarge', { vpc, launchTemplate: delphiLargeLaunchTemplate, - minCapacity: 0, + minCapacity: 1, desiredCapacity: 1, maxCapacity: 3, - vpcSubnets: { - subnetType: ec2.SubnetType.PUBLIC, - }, - healthCheck: autoscaling.HealthCheck.ec2({ grace: cdk.Duration.minutes(2) }), + vpcSubnets: { subnetType: ec2.SubnetType.PRIVATE_WITH_EGRESS }, + healthCheck: autoscaling.HealthCheck.ec2({ grace: cdk.Duration.minutes(5) }), }); - - // CPU metrics for Delphi small instances - const delphiSmallCpuMetric = new cloudwatch.Metric({ + + + // --- Scaling Policies & Alarms + const mathWorkerCpuMetric = new cloudwatch.Metric({ namespace: 'AWS/EC2', metricName: 'CPUUtilization', dimensionsMap: { - AutoScalingGroupName: asgDelphiSmall.autoScalingGroupName, + AutoScalingGroupName: asgMathWorker.autoScalingGroupName }, statistic: 'Average', - period: cdk.Duration.minutes(5), + period: cdk.Duration.minutes(10), }); - - // CPU metrics for Delphi large instances - const delphiLargeCpuMetric = new cloudwatch.Metric({ - namespace: 'AWS/EC2', - metricName: 'CPUUtilization', - dimensionsMap: { - AutoScalingGroupName: asgDelphiLarge.autoScalingGroupName, - }, + asgMathWorker.scaleToTrackMetric('CpuTracking', { + metric: mathWorkerCpuMetric, + targetValue: 50, + }); + + // Add Delphi CPU Scaling Policies & Alarms + const createDelphiCpuScaling = (asg: autoscaling.AutoScalingGroup, name: string, target: number): cloudwatch.Metric => { + const cpuMetric = new cloudwatch.Metric({ + namespace: 'AWS/EC2', + metricName: 'CPUUtilization', + dimensionsMap: { AutoScalingGroupName: asg.autoScalingGroupName }, + statistic: 'Average', + period: cdk.Duration.minutes(5), + }); + asg.scaleToTrackMetric(`${name}CpuTracking`, { + metric: cpuMetric, + targetValue: target + }); + + // High CPU Alarm + const alarm = new cloudwatch.Alarm(this, `${name}HighCpuAlarm`, { + metric: cpuMetric, + threshold: 80, // Alert if CPU > 80% + evaluationPeriods: 2, // for 2 consecutive periods (10 minutes total) + datapointsToAlarm: 2, // Ensure 2 datapoints are breaching + comparisonOperator: cloudwatch.ComparisonOperator.GREATER_THAN_THRESHOLD, + alarmDescription: `Alert when ${name} instances CPU exceeds 80% for 10 minutes`, + treatMissingData: cloudwatch.TreatMissingData.IGNORE, // Or BREACHING/NOT_BREACHING as appropriate + }); + // Add SNS action to the alarm + alarm.addAlarmAction(new cloudwatch_actions.SnsAction(alarmTopic)); + return cpuMetric; + }; + const delphiSmallCpuMetric = createDelphiCpuScaling(asgDelphiSmall, 'DelphiSmall', 60); // Target 60% CPU + const delphiLargeCpuMetric = createDelphiCpuScaling(asgDelphiLarge, 'DelphiLarge', 60); // Target 60% CPU + + // Add Ollama GPU Scaling Policy + const ollamaGpuMetric = new cloudwatch.Metric({ + namespace: ollamaNamespace, // Custom namespace from CW Agent config + metricName: 'utilization_gpu', // GPU utilization metric name from CW Agent config + dimensionsMap: { AutoScalingGroupName: asgOllama.autoScalingGroupName }, statistic: 'Average', - period: cdk.Duration.minutes(5), - }); - - // Scale small Delphi instances based on CPU usage - asgDelphiSmall.scaleToTrackMetric('DelphiSmallCpuTracking', { - metric: delphiSmallCpuMetric, - targetValue: 60, // Target 60% CPU utilization - scaleInCooldown: cdk.Duration.minutes(5), - scaleOutCooldown: cdk.Duration.minutes(2), - }); - - // Scale large Delphi instances based on CPU usage - asgDelphiLarge.scaleToTrackMetric('DelphiLargeCpuTracking', { - metric: delphiLargeCpuMetric, - targetValue: 60, // Target 60% CPU utilization - scaleInCooldown: cdk.Duration.minutes(5), - scaleOutCooldown: cdk.Duration.minutes(2), - }); - - // CloudWatch alarms for Delphi small instances - const delphiSmallHighCpuAlarm = new cloudwatch.Alarm(this, 'DelphiSmallHighCpuAlarm', { - metric: delphiSmallCpuMetric, - threshold: 80, - evaluationPeriods: 2, - datapointsToAlarm: 2, - comparisonOperator: cloudwatch.ComparisonOperator.GREATER_THAN_THRESHOLD, - alarmDescription: 'Alert when Delphi small instances CPU exceeds 80% for 10 minutes', - }); - - delphiSmallHighCpuAlarm.addAlarmAction(new cdk.aws_cloudwatch_actions.SnsAction(alarmTopic)); - - // CloudWatch alarms for Delphi large instances - const delphiLargeHighCpuAlarm = new cloudwatch.Alarm(this, 'DelphiLargeHighCpuAlarm', { - metric: delphiLargeCpuMetric, - threshold: 80, - evaluationPeriods: 2, - datapointsToAlarm: 2, - comparisonOperator: cloudwatch.ComparisonOperator.GREATER_THAN_THRESHOLD, - alarmDescription: 'Alert when Delphi large instances CPU exceeds 80% for 10 minutes', - }); - - delphiLargeHighCpuAlarm.addAlarmAction(new cdk.aws_cloudwatch_actions.SnsAction(alarmTopic)); - - // DEPLOY STUFF + period: cdk.Duration.minutes(1), + }); + asgOllama.scaleToTrackMetric('OllamaGpuScaling', { + metric: ollamaGpuMetric, + targetValue: 75, + cooldown: cdk.Duration.minutes(5), // Prevent flapping + disableScaleIn: false, // Allow scaling down + estimatedInstanceWarmup: cdk.Duration.minutes(5), // Time until instance contributes metrics meaningfully + }); + + // --- Ollama Network Load Balancer (Internal, in Private+Egress) + const ollamaNlb = new elbv2.NetworkLoadBalancer(this, 'OllamaNlb', { + vpc, + internetFacing: false, // Internal only + crossZoneEnabled: true, + // Place NLB interfaces in PRIVATE_WITH_EGRESS subnets alongside Ollama instances + vpcSubnets: { subnetType: ec2.SubnetType.PRIVATE_WITH_EGRESS }, + }); + const ollamaListener = ollamaNlb.addListener('OllamaListener', { + port: ollamaPort, + protocol: elbv2.Protocol.TCP, + }); + const ollamaTargetGroup = new elbv2.NetworkTargetGroup(this, 'OllamaTargetGroup', { + vpc, + port: ollamaPort, + protocol: elbv2.Protocol.TCP, + targetType: elbv2.TargetType.INSTANCE, + targets: [asgOllama], + healthCheck: { + protocol: elbv2.Protocol.TCP, + interval: cdk.Duration.seconds(30), + healthyThresholdCount: 2, + unhealthyThresholdCount: 2, + }, + deregistrationDelay: cdk.Duration.seconds(60), + }); + ollamaListener.addTargetGroups('OllamaTg', ollamaTargetGroup); + + // Secret for Ollama NLB endpoint + const ollamaServiceSecret = new secretsmanager.Secret(this, 'OllamaServiceSecret', { + secretName: '/polis/ollama-service-url', + description: 'URL for the internal Ollama service endpoint (NLB)', + // Store the NLB DNS name and port + secretStringValue: cdk.SecretValue.unsafePlainText(`http://${ollamaNlb.loadBalancerDnsName}:${ollamaPort}`), + }); + ollamaServiceSecret.grantRead(instanceRole); + + // --- DEPLOY STUFF const application = new codedeploy.ServerApplication(this, 'CodeDeployApplication', { applicationName: 'PolisApplication', }); @@ -493,13 +606,13 @@ EOF`, bucketName: `polis-deployment-packages-${cdk.Stack.of(this).account}-${cdk.Stack.of(this).region}`, removalPolicy: cdk.RemovalPolicy.DESTROY, autoDeleteObjects: true, - versioned: true, + versioned: true, publicReadAccess: false, blockPublicAccess: s3.BlockPublicAccess.BLOCK_ALL, }); - deploymentBucket.grantRead(instanceRole); - + + // Deployment Group const deploymentGroup = new codedeploy.ServerDeploymentGroup(this, 'DeploymentGroup', { application, deploymentGroupName: 'PolisDeploymentGroup', @@ -507,15 +620,17 @@ EOF`, deploymentConfig: codedeploy.ServerDeploymentConfig.ONE_AT_A_TIME, role: codeDeployRole, installAgent: true, + // Consider load balancer integration for blue/green (more complex) }); - // Allow traffic from the web ASG to the database + + // --- DB Access Rules db.connections.allowFrom(asgWeb, ec2.Port.tcp(5432), 'Allow database access from web ASG'); db.connections.allowFrom(asgMathWorker, ec2.Port.tcp(5432), 'Allow database access from math ASG'); db.connections.allowFrom(asgDelphiSmall, ec2.Port.tcp(5432), 'Allow database access from Delphi small ASG'); db.connections.allowFrom(asgDelphiLarge, ec2.Port.tcp(5432), 'Allow database access from Delphi large ASG'); - // ELB + // --- Application Load Balancer const lb = new elbv2.ApplicationLoadBalancer(this, 'Lb', { vpc, internetFacing: true, @@ -540,7 +655,6 @@ EOF`, defaultTargetGroups: [webTargetGroup], }); - // ACM Certificate Request const certificate = new acm.Certificate(this, 'WebAppCertificate', { domainName: 'pol.is', validation: acm.CertificateValidation.fromDns(), @@ -553,28 +667,48 @@ EOF`, defaultTargetGroups: [webTargetGroup], }); - // Web Server - Target Tracking Scaling based on ALB Request Count const webScalingPolicy = asgWeb.scaleOnRequestCount('WebScalingPolicy', { targetRequestsPerMinute: 600, - disableScaleIn: true, // unneeded hosts will be disabled manualy }); + // --- Secrets & Dependencies --- const webAppEnvVarsSecret = new secretsmanager.Secret(this, 'WebAppEnvVarsSecret', { secretName: 'polis-web-app-env-vars', description: 'Environment variables for the Polis web application', }); + const clientAdminEnvVarsSecret = new secretsmanager.Secret(this, 'ClientAdminEnvVarsSecret', { + secretName: 'polis-client-admin-env-vars', + description: 'Environment variables for the Polis client-admin web application', + }); + + const clientReportEnvVarsSecret = new secretsmanager.Secret(this, 'ClientReportEnvVarsSecret', { + secretName: 'polis-client-report-env-vars', + description: 'Environment variables for the Polis client-report web application', + }); + webAppEnvVarsSecret.grantRead(instanceRole); + clientAdminEnvVarsSecret.grantRead(instanceRole); + clientReportEnvVarsSecret.grantRead(instanceRole); + + // Dependencies (Add ASGs to loops/lists) + const addDbDependency = (asg: autoscaling.IAutoScalingGroup) => asg.node.addDependency(db); + const addLogDependency = (asg: autoscaling.IAutoScalingGroup) => asg.node.addDependency(logGroup); + const addSecretDependency = (asg: autoscaling.IAutoScalingGroup) => asg.node.addDependency(webAppEnvVarsSecret); - asgWeb.node.addDependency(logGroup); - asgWeb.node.addDependency(webAppEnvVarsSecret); - asgMathWorker.node.addDependency(logGroup); - asgMathWorker.node.addDependency(webAppEnvVarsSecret); - asgDelphiSmall.node.addDependency(logGroup); - asgDelphiSmall.node.addDependency(webAppEnvVarsSecret); - asgDelphiLarge.node.addDependency(logGroup); - asgDelphiLarge.node.addDependency(webAppEnvVarsSecret); - asgWeb.node.addDependency(db); - asgMathWorker.node.addDependency(db); - asgDelphiSmall.node.addDependency(db); - asgDelphiLarge.node.addDependency(db); + // Apply common dependencies to all ASGs + [asgWeb, asgMathWorker, asgDelphiSmall, asgDelphiLarge, asgOllama].forEach(asg => { + addLogDependency(asg); + addSecretDependency(asg); + // Only add DB dependency if the service needs it + if (asg !== asgOllama) { // Assuming Ollama doesn't directly need DB creds + addDbDependency(asg); + } + }); + asgOllama.node.addDependency(fileSystem); + + // --- Outputs + new cdk.CfnOutput(this, 'LoadBalancerDNS', { value: lb.loadBalancerDnsName, description: 'Public DNS name of the Application Load Balancer' }); + new cdk.CfnOutput(this, 'OllamaNlbDnsName', { value: ollamaNlb.loadBalancerDnsName, description: 'Internal DNS Name for the Ollama Network Load Balancer'}); + new cdk.CfnOutput(this, 'OllamaServiceSecretArn', { value: ollamaServiceSecret.secretArn, description: 'ARN of the Secret containing the Ollama service URL' }); + new cdk.CfnOutput(this, 'EfsFileSystemId', { value: fileSystem.fileSystemId, description: 'ID of the EFS File System for Ollama models' }); } -} +} \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index 68fadbc645..cbd587eee5 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -49,6 +49,7 @@ services: volumes: # Persist logs to a volume, so they can be accessed after the container is stopped. - server-logs:/app/logs + restart: unless-stopped math: image: 050917022930.dkr.ecr.us-east-1.amazonaws.com/polis/math:latest @@ -64,6 +65,7 @@ services: - WEBSERVER_PASS=${WEBSERVER_PASS} networks: - "polis-net" + restart: unless-stopped delphi: image: 050917022930.dkr.ecr.us-east-1.amazonaws.com/polis/delphi:latest @@ -79,7 +81,7 @@ services: - DYNAMODB_ENDPOINT=${DYNAMODB_ENDPOINT:-http://dynamodb:8000} - POLL_INTERVAL=${POLL_INTERVAL:-2} # Ollama connection - - OLLAMA_HOST=http://ollama:11434 + - OLLAMA_HOST=${OLLAMA_HOST:-http://ollama:11434} - OLLAMA_MODEL=${OLLAMA_MODEL:-llama3.1:8b} # AWS environment variables (will be provided in prod) - AWS_REGION=${AWS_REGION:-us-east-1} @@ -111,6 +113,7 @@ services: limits: memory: ${DELPHI_CONTAINER_MEMORY:-4g} cpus: ${DELPHI_CONTAINER_CPUS:-2} + restart: unless-stopped postgres: restart: always @@ -179,6 +182,8 @@ services: user: root labels: polis_tag: ${TAG:-dev} + profiles: + - local-services # Ollama for LLM processing ollama: @@ -213,6 +218,8 @@ services: command: server /data --console-address ":9001" networks: - "polis-net" + profiles: + - local-services networks: polis-net: diff --git a/scripts/after_install.sh b/scripts/after_install.sh index 30e76a63c2..cd11404499 100644 --- a/scripts/after_install.sh +++ b/scripts/after_install.sh @@ -9,7 +9,7 @@ GIT_BRANCH="stable" if [ ! -d "polis" ]; then echo "Cloning public repository from $GIT_REPO_URL, branch: $GIT_BRANCH (HTTPS - Public Repo)" - git clone -b "$GIT_BRANCH" "$GIT_REPO_URL" polis + git clone --depth 1 -b "$GIT_BRANCH" "$GIT_REPO_URL" polis else echo "Polis directory already exists, skipping cloning, pulling instead" fi @@ -73,9 +73,6 @@ echo "Constructed DATABASE_URL: $DATABASE_URL" echo "Appending DATABASE_URL to .env" echo "DATABASE_URL=$DATABASE_URL" >> .env -echo "--- Final .env file content (Appended DATABASE_URL) ---" -cat .env - SERVICE_FROM_FILE=$(cat /tmp/service_type.txt) echo "DEBUG: Service type read from /tmp/service_type.txt: [$SERVICE_FROM_FILE]" @@ -97,6 +94,20 @@ elif [ "$SERVICE_FROM_FILE" == "math" ]; then /usr/local/bin/docker-compose up -d math --build --force-recreate elif [ "$SERVICE_FROM_FILE" == "delphi" ]; then echo "Starting docker-compose up for 'delphi' service" + echo "Fetching Ollama Service URL for Delphi..." + OLLAMA_URL=$(aws secretsmanager get-secret-value --secret-id /polis/ollama-service-url --query SecretString --output text --region us-east-1) + + if [ -z "$OLLAMA_URL" ]; then + echo "Error: Could not retrieve Ollama Service URL from Secrets Manager: /polis/ollama-service-url" + exit 1 + fi + echo "Retrieved Ollama Service URL." + + # Append OLLAMA_HOST to .env + echo "Appending OLLAMA_HOST to .env for Delphi" + # Using printf for slightly safer appending in case of special characters in URL + printf "\nOLLAMA_HOST=%s\n" "$OLLAMA_URL" >> .env + echo "OLLAMA_HOST appended." # Check if instance size file exists if [ -f "/tmp/instance_size.txt" ]; then diff --git a/scripts/application_stop.sh b/scripts/application_stop.sh index bbfacddf64..847502cc3a 100644 --- a/scripts/application_stop.sh +++ b/scripts/application_stop.sh @@ -1,10 +1,81 @@ #!/bin/bash -set -e -set -x - - cd /opt/polis -if [ "$SERVICE" = "server" ]; then - /usr/local/bin/docker-compose stop server -elif [ "$SERVICE" = "math" ]; then - /usr/local/bin/docker-compose stop math -fi \ No newline at end of file +# This script runs during the ApplicationStop lifecycle event in CodeDeploy. +# It stops the relevant Docker containers based on the instance's role. + +set -e # Exit immediately if a command exits with a non-zero status. +set -x # Print commands and their arguments as they are executed. + +echo "Executing ApplicationStop hook..." + +# --- Configuration --- +# Directory where the docker-compose.yml file for the *current* deployment resides +# Adjust this path if your deployment process places files elsewhere +DEPLOY_DIR="/opt/polis/polis" +# File indicating the role of this instance (created by UserData/AfterInstall) +SERVICE_TYPE_FILE="/tmp/service_type.txt" + +# --- Determine Service Type --- +if [ -f "$SERVICE_TYPE_FILE" ]; then + SERVICE_TYPE=$(cat "$SERVICE_TYPE_FILE") + echo "Detected service type: $SERVICE_TYPE" +else + echo "Warning: Service type file not found at $SERVICE_TYPE_FILE. Assuming nothing specific needs to be stopped by this script." + # Exit cleanly as we don't know what to stop, or maybe the instance role changed. + # CodeDeploy will likely proceed, and the AfterInstall script handles cleanup anyway. + exit 0 +fi + +# --- Stop Services based on Type --- + +# Check if the deployment directory exists (where docker-compose.yml should be) +if [ -d "$DEPLOY_DIR" ]; then + cd "$DEPLOY_DIR" + echo "Changed directory to $DEPLOY_DIR" + + # Check if docker-compose command exists + if ! command -v /usr/local/bin/docker-compose &> /dev/null; then + echo "Error: docker-compose command not found at /usr/local/bin/docker-compose. Cannot stop services." + # Exit with error because compose is expected if the directory exists and type isn't ollama + if [ "$SERVICE_TYPE" != "ollama" ]; then + exit 1 + fi + fi + + if [ "$SERVICE_TYPE" == "server" ]; then + echo "Stopping server-related services (server, nginx-proxy, file-server)..." + # Stop services related to the 'server' type instance (as started in AfterInstall) + /usr/local/bin/docker-compose stop server nginx-proxy file-server || echo "Warning: Failed to stop server component(s), might already be stopped." + # Optional: Use 'down' if you want to remove networks etc. during stop, but 'stop' is usually sufficient here. + # /usr/local/bin/docker-compose down --remove-orphans server nginx-proxy file-server || echo "Warning..." + + elif [ "$SERVICE_TYPE" == "math" ]; then + echo "Stopping math service..." + /usr/local/bin/docker-compose stop math || echo "Warning: Failed to stop math service, might already be stopped." + + elif [ "$SERVICE_TYPE" == "delphi" ]; then + echo "Stopping delphi service..." + /usr/local/bin/docker-compose stop delphi || echo "Warning: Failed to stop delphi service, might already be stopped." + + elif [ "$SERVICE_TYPE" == "ollama" ]; then + echo "Stopping standalone ollama container..." + # Ollama runs via `docker run`, not compose on this instance type + # Check if the container exists before trying to stop it + if docker ps -q --filter name=^/ollama$ | grep -q .; then + docker stop ollama || echo "Warning: Failed to stop ollama container, might already be stopped." + else + echo "Ollama container 'ollama' not found or not running." + fi + # No docker-compose actions needed for the ollama instance type + + else + echo "Warning: Unknown service type '$SERVICE_TYPE' found in $SERVICE_TYPE_FILE. No specific services stopped." + # Avoid running a generic 'down' as it might affect unrelated containers if any exist + fi + +else + echo "Warning: Deployment directory $DEPLOY_DIR not found. Assuming no services need stopping." + # Exit cleanly if the directory isn't there, as nothing from this app could be running + exit 0 +fi + +echo "ApplicationStop hook finished successfully for service type: $SERVICE_TYPE." \ No newline at end of file From 6f8136a6d4cdacd110cd0c06aab114ecba6f636c Mon Sep 17 00:00:00 2001 From: tevko Date: Wed, 7 May 2025 11:07:58 -0500 Subject: [PATCH 02/42] begin splitting up cdk code --- cdk/ec2.ts | 25 +++++++++++ cdk/lib/cdk-stack.ts | 100 ++++++++++-------------------------------- cdk/securityGroups.ts | 40 +++++++++++++++++ cdk/vpc.ts | 22 ++++++++++ 4 files changed, 110 insertions(+), 77 deletions(-) create mode 100644 cdk/ec2.ts create mode 100644 cdk/securityGroups.ts create mode 100644 cdk/vpc.ts diff --git a/cdk/ec2.ts b/cdk/ec2.ts new file mode 100644 index 0000000000..2f553c7e8f --- /dev/null +++ b/cdk/ec2.ts @@ -0,0 +1,25 @@ +import * as ec2 from 'aws-cdk-lib/aws-ec2'; + +export const instanceTypeWeb = ec2.InstanceType.of(ec2.InstanceClass.T3, ec2.InstanceSize.MEDIUM); +export const machineImageWeb = new ec2.AmazonLinuxImage({ generation: ec2.AmazonLinuxGeneration.AMAZON_LINUX_2023 }); +export const instanceTypeMathWorker = ec2.InstanceType.of(ec2.InstanceClass.R8G, ec2.InstanceSize.XLARGE4); +export const machineImageMathWorker = new ec2.AmazonLinuxImage({ + generation: ec2.AmazonLinuxGeneration.AMAZON_LINUX_2023, + cpuType: ec2.AmazonLinuxCpuType.ARM_64, +}); +// Delphi small instance +export const instanceTypeDelphiSmall = ec2.InstanceType.of(ec2.InstanceClass.T3, ec2.InstanceSize.LARGE); +export const machineImageDelphiSmall = new ec2.AmazonLinuxImage({ + generation: ec2.AmazonLinuxGeneration.AMAZON_LINUX_2023 +}); +// Delphi large instance +export const instanceTypeDelphiLarge = ec2.InstanceType.of(ec2.InstanceClass.C6G, ec2.InstanceSize.XLARGE4); +export const machineImageDelphiLarge = new ec2.AmazonLinuxImage({ + generation: ec2.AmazonLinuxGeneration.AMAZON_LINUX_2023, + cpuType: ec2.AmazonLinuxCpuType.ARM_64 +}); +// Ollama Instance +export const instanceTypeOllama = ec2.InstanceType.of(ec2.InstanceClass.G4DN, ec2.InstanceSize.XLARGE); // x86_64 GPU instance +export const machineImageOllama = ec2.MachineImage.genericLinux({ + 'us-east-1': 'ami-08e0cf6df13ae3ddb', +}); \ No newline at end of file diff --git a/cdk/lib/cdk-stack.ts b/cdk/lib/cdk-stack.ts index 59614df540..9d7b000145 100644 --- a/cdk/lib/cdk-stack.ts +++ b/cdk/lib/cdk-stack.ts @@ -17,6 +17,21 @@ import * as ecr from 'aws-cdk-lib/aws-ecr'; import * as ssm from 'aws-cdk-lib/aws-ssm'; import * as efs from 'aws-cdk-lib/aws-efs'; // Import EFS module import { Construct } from 'constructs'; +// custom constructs for code organization +import createPolisVPC from '../vpc'; +import { + instanceTypeWeb, + machineImageWeb, + instanceTypeMathWorker, + machineImageMathWorker, + instanceTypeDelphiSmall, + machineImageDelphiSmall, + instanceTypeDelphiLarge, + machineImageDelphiLarge, + instanceTypeOllama, + machineImageOllama +} from '../ec2'; +import createSecurityGroups from '../securityGroups'; interface PolisStackProps extends cdk.StackProps { enableSSHAccess?: boolean; // Make optional, default to false @@ -38,29 +53,7 @@ export class CdkStack extends cdk.Stack { const ollamaPort = 11434; const ollamaModelDirectory = '/efs/ollama-models'; const ollamaNamespace = 'OllamaMetrics'; // Custom namespace for GPU metrics - - // --- VPC Configuration - const vpc = new ec2.Vpc(this, 'Vpc', { - maxAzs: 2, - natGateways: 1, // Use 1 for non-prod/cost saving, 2+ for prod HA - subnetConfiguration: [ - { - cidrMask: 24, - name: 'Public', - subnetType: ec2.SubnetType.PUBLIC, - }, - { - cidrMask: 24, - name: 'Private', - subnetType: ec2.SubnetType.PRIVATE_ISOLATED, - }, - { - cidrMask: 24, - name: 'PrivateWithEgress', - subnetType: ec2.SubnetType.PRIVATE_WITH_EGRESS, - }, - ] - }); + const vpc = createPolisVPC(this); const alarmTopic = new sns.Topic(this, 'AlarmTopic', { displayName: 'Polis Application Alarms', @@ -71,60 +64,13 @@ export class CdkStack extends cdk.Stack { removalPolicy: cdk.RemovalPolicy.DESTROY, }); - // --- Instance Types & AMIs - const instanceTypeWeb = ec2.InstanceType.of(ec2.InstanceClass.T3, ec2.InstanceSize.MEDIUM); - const machineImageWeb = new ec2.AmazonLinuxImage({ generation: ec2.AmazonLinuxGeneration.AMAZON_LINUX_2023 }); - const instanceTypeMathWorker = ec2.InstanceType.of(ec2.InstanceClass.R8G, ec2.InstanceSize.XLARGE4); - const machineImageMathWorker = new ec2.AmazonLinuxImage({ - generation: ec2.AmazonLinuxGeneration.AMAZON_LINUX_2023, - cpuType: ec2.AmazonLinuxCpuType.ARM_64, - }); - // Delphi small instance - const instanceTypeDelphiSmall = ec2.InstanceType.of(ec2.InstanceClass.T3, ec2.InstanceSize.LARGE); - const machineImageDelphiSmall = new ec2.AmazonLinuxImage({ - generation: ec2.AmazonLinuxGeneration.AMAZON_LINUX_2023 - }); - // Delphi large instance - const instanceTypeDelphiLarge = ec2.InstanceType.of(ec2.InstanceClass.C6G, ec2.InstanceSize.XLARGE4); - const machineImageDelphiLarge = new ec2.AmazonLinuxImage({ - generation: ec2.AmazonLinuxGeneration.AMAZON_LINUX_2023, - cpuType: ec2.AmazonLinuxCpuType.ARM_64 - }); - // Ollama Instance - const instanceTypeOllama = ec2.InstanceType.of(ec2.InstanceClass.G4DN, ec2.InstanceSize.XLARGE); // x86_64 GPU instance - const machineImageOllama = ec2.MachineImage.genericLinux({ - 'us-east-1': 'ami-08e0cf6df13ae3ddb', - }); - - // --- Security Groups - const webSecurityGroup = new ec2.SecurityGroup(this, 'WebSecurityGroup', { - vpc, - description: 'Allow HTTP and SSH access to web instances', - allowAllOutbound: true - }); - const mathWorkerSecurityGroup = new ec2.SecurityGroup(this, 'MathWorkerSG', { - vpc, - description: 'Security group for Polis math worker', - allowAllOutbound: true - }); - // Delphi Security Group - const delphiSecurityGroup = new ec2.SecurityGroup(this, 'DelphiSecurityGroup', { - vpc, - description: 'SG for Delphi instances', - allowAllOutbound: true - }); - // Ollama Security Group - const ollamaSecurityGroup = new ec2.SecurityGroup(this, 'OllamaSecurityGroup', { - vpc, - description: 'SG for Ollama instance', - allowAllOutbound: true - }); - // EFS Security Group - const efsSecurityGroup = new ec2.SecurityGroup(this, 'EfsSecurityGroup', { - vpc, - description: 'SG for EFS mount targets', - allowAllOutbound: false - }); + const { + webSecurityGroup, + mathWorkerSecurityGroup, + delphiSecurityGroup, + ollamaSecurityGroup, + efsSecurityGroup, + } = createSecurityGroups(vpc, this); // Allow Delphi -> Ollama ollamaSecurityGroup.addIngressRule( diff --git a/cdk/securityGroups.ts b/cdk/securityGroups.ts new file mode 100644 index 0000000000..ad6c577117 --- /dev/null +++ b/cdk/securityGroups.ts @@ -0,0 +1,40 @@ +import * as ec2 from 'aws-cdk-lib/aws-ec2'; +import { Construct } from 'constructs'; + +export default (vpc: ec2.IVpc, self: Construct) => { + const webSecurityGroup = new ec2.SecurityGroup(self, 'WebSecurityGroup', { + vpc, + description: 'Allow HTTP and SSH access to web instances', + allowAllOutbound: true + }); + const mathWorkerSecurityGroup = new ec2.SecurityGroup(self, 'MathWorkerSG', { + vpc, + description: 'Security group for Polis math worker', + allowAllOutbound: true + }); + // Delphi Security Group + const delphiSecurityGroup = new ec2.SecurityGroup(self, 'DelphiSecurityGroup', { + vpc, + description: 'SG for Delphi instances', + allowAllOutbound: true + }); + // Ollama Security Group + const ollamaSecurityGroup = new ec2.SecurityGroup(self, 'OllamaSecurityGroup', { + vpc, + description: 'SG for Ollama instance', + allowAllOutbound: true + }); + // EFS Security Group + const efsSecurityGroup = new ec2.SecurityGroup(self, 'EfsSecurityGroup', { + vpc, + description: 'SG for EFS mount targets', + allowAllOutbound: false + }); + return { + webSecurityGroup, + mathWorkerSecurityGroup, + delphiSecurityGroup, + ollamaSecurityGroup, + efsSecurityGroup, + } +} \ No newline at end of file diff --git a/cdk/vpc.ts b/cdk/vpc.ts new file mode 100644 index 0000000000..c9b20dfede --- /dev/null +++ b/cdk/vpc.ts @@ -0,0 +1,22 @@ +import * as ec2 from 'aws-cdk-lib/aws-ec2'; +export default (self: any) => new ec2.Vpc(self, 'Vpc', { + maxAzs: 2, + natGateways: 1, // Use 1 for non-prod/cost saving, 2+ for prod HA + subnetConfiguration: [ + { + cidrMask: 24, + name: 'Public', + subnetType: ec2.SubnetType.PUBLIC, + }, + { + cidrMask: 24, + name: 'Private', + subnetType: ec2.SubnetType.PRIVATE_ISOLATED, + }, + { + cidrMask: 24, + name: 'PrivateWithEgress', + subnetType: ec2.SubnetType.PRIVATE_WITH_EGRESS, + }, + ] +}); \ No newline at end of file From a6021644c2136d9d39bd831e178e05261d6dbfac Mon Sep 17 00:00:00 2001 From: tevko Date: Wed, 7 May 2025 15:02:49 -0500 Subject: [PATCH 03/42] organize cdk --- cdk/autoscaling.ts | 153 +++++++++++ cdk/codedeploy.ts | 44 +++ cdk/db.ts | 48 ++++ cdk/dns.ts | 59 +++++ cdk/ecr.ts | 39 +++ cdk/iamRoles.ts | 29 ++ cdk/launchTemplates.ts | 189 +++++++++++++ cdk/lib/cdk-stack.ts | 590 ++++++++--------------------------------- cdk/secrets.ts | 50 ++++ 9 files changed, 723 insertions(+), 478 deletions(-) create mode 100644 cdk/autoscaling.ts create mode 100644 cdk/codedeploy.ts create mode 100644 cdk/db.ts create mode 100644 cdk/dns.ts create mode 100644 cdk/ecr.ts create mode 100644 cdk/iamRoles.ts create mode 100644 cdk/launchTemplates.ts create mode 100644 cdk/secrets.ts diff --git a/cdk/autoscaling.ts b/cdk/autoscaling.ts new file mode 100644 index 0000000000..24404e5fac --- /dev/null +++ b/cdk/autoscaling.ts @@ -0,0 +1,153 @@ + +import { Construct } from "constructs"; +import * as ec2 from 'aws-cdk-lib/aws-ec2'; +import * as autoscaling from 'aws-cdk-lib/aws-autoscaling'; +import * as cdk from 'aws-cdk-lib'; +import * as cloudwatch from 'aws-cdk-lib/aws-cloudwatch'; +import * as cloudwatch_actions from 'aws-cdk-lib/aws-cloudwatch-actions'; + +export default ( + self: Construct, + vpc: cdk.aws_ec2.Vpc, + instanceRole: cdk.aws_iam.Role, + ollamaLaunchTemplate: cdk.aws_ec2.LaunchTemplate, + logGroup: cdk.aws_logs.LogGroup, + fileSystem: cdk.aws_efs.FileSystem, + webLaunchTemplate: cdk.aws_ec2.LaunchTemplate, + mathWorkerLaunchTemplate: cdk.aws_ec2.LaunchTemplate, + delphiSmallLaunchTemplate: cdk.aws_ec2.LaunchTemplate, + delphiLargeLaunchTemplate: cdk.aws_ec2.LaunchTemplate, + ollamaNamespace: string, + alarmTopic: cdk.aws_sns.Topic +) => { + const commonAsgProps = { vpc, role: instanceRole }; + + // Ollama ASG + const asgOllama = new autoscaling.AutoScalingGroup(self, 'AsgOllama', { + vpc, + launchTemplate: ollamaLaunchTemplate, + minCapacity: 1, + maxCapacity: 3, + desiredCapacity: 1, + vpcSubnets: { subnetGroupName: 'PrivateWithEgress' }, + healthCheck: autoscaling.HealthCheck.ec2({ grace: cdk.Duration.minutes(10) }), + }); + asgOllama.node.addDependency(logGroup); + asgOllama.node.addDependency(fileSystem); // Ensure EFS is ready before instances start + + // Web ASG + const asgWeb = new autoscaling.AutoScalingGroup(self, 'Asg', { + vpc, + launchTemplate: webLaunchTemplate, + minCapacity: 2, + maxCapacity: 10, + desiredCapacity: 2, + vpcSubnets: { subnetType: ec2.SubnetType.PUBLIC }, + healthCheck: autoscaling.HealthCheck.elb({grace: cdk.Duration.minutes(5)}) + }); + + // Math Worker ASG + const asgMathWorker = new autoscaling.AutoScalingGroup(self, 'AsgMathWorker', { + vpc, + launchTemplate: mathWorkerLaunchTemplate, + minCapacity: 1, + desiredCapacity: 1, + maxCapacity: 5, + vpcSubnets: { subnetType: ec2.SubnetType.PUBLIC }, + healthCheck: autoscaling.HealthCheck.ec2({ grace: cdk.Duration.minutes(2) }), + }); + + // Delphi Small ASG + const asgDelphiSmall = new autoscaling.AutoScalingGroup(self, 'AsgDelphiSmall', { + vpc, + launchTemplate: delphiSmallLaunchTemplate, + minCapacity: 1, + desiredCapacity: 1, + maxCapacity: 5, + vpcSubnets: { subnetType: ec2.SubnetType.PRIVATE_WITH_EGRESS }, + healthCheck: autoscaling.HealthCheck.ec2({ grace: cdk.Duration.minutes(5) }), + }); + + // Delphi Large ASG + const asgDelphiLarge = new autoscaling.AutoScalingGroup(self, 'AsgDelphiLarge', { + vpc, + launchTemplate: delphiLargeLaunchTemplate, + minCapacity: 1, + desiredCapacity: 1, + maxCapacity: 3, + vpcSubnets: { subnetType: ec2.SubnetType.PRIVATE_WITH_EGRESS }, + healthCheck: autoscaling.HealthCheck.ec2({ grace: cdk.Duration.minutes(5) }), + }); + + + // --- Scaling Policies & Alarms + const mathWorkerCpuMetric = new cloudwatch.Metric({ + namespace: 'AWS/EC2', + metricName: 'CPUUtilization', + dimensionsMap: { + AutoScalingGroupName: asgMathWorker.autoScalingGroupName + }, + statistic: 'Average', + period: cdk.Duration.minutes(10), + }); + asgMathWorker.scaleToTrackMetric('CpuTracking', { + metric: mathWorkerCpuMetric, + targetValue: 50, + }); + + // Add Delphi CPU Scaling Policies & Alarms + const createDelphiCpuScaling = (asg: autoscaling.AutoScalingGroup, name: string, target: number): cloudwatch.Metric => { + const cpuMetric = new cloudwatch.Metric({ + namespace: 'AWS/EC2', + metricName: 'CPUUtilization', + dimensionsMap: { AutoScalingGroupName: asg.autoScalingGroupName }, + statistic: 'Average', + period: cdk.Duration.minutes(5), + }); + asg.scaleToTrackMetric(`${name}CpuTracking`, { + metric: cpuMetric, + targetValue: target + }); + + // High CPU Alarm + const alarm = new cloudwatch.Alarm(self, `${name}HighCpuAlarm`, { + metric: cpuMetric, + threshold: 80, // Alert if CPU > 80% + evaluationPeriods: 2, // for 2 consecutive periods (10 minutes total) + datapointsToAlarm: 2, // Ensure 2 datapoints are breaching + comparisonOperator: cloudwatch.ComparisonOperator.GREATER_THAN_THRESHOLD, + alarmDescription: `Alert when ${name} instances CPU exceeds 80% for 10 minutes`, + treatMissingData: cloudwatch.TreatMissingData.IGNORE, // Or BREACHING/NOT_BREACHING as appropriate + }); + // Add SNS action to the alarm + alarm.addAlarmAction(new cloudwatch_actions.SnsAction(alarmTopic)); + return cpuMetric; + }; + const delphiSmallCpuMetric = createDelphiCpuScaling(asgDelphiSmall, 'DelphiSmall', 60); // Target 60% CPU + const delphiLargeCpuMetric = createDelphiCpuScaling(asgDelphiLarge, 'DelphiLarge', 60); // Target 60% CPU + + // Add Ollama GPU Scaling Policy + const ollamaGpuMetric = new cloudwatch.Metric({ + namespace: ollamaNamespace, // Custom namespace from CW Agent config + metricName: 'utilization_gpu', // GPU utilization metric name from CW Agent config + dimensionsMap: { AutoScalingGroupName: asgOllama.autoScalingGroupName }, + statistic: 'Average', + period: cdk.Duration.minutes(1), + }); + asgOllama.scaleToTrackMetric('OllamaGpuScaling', { + metric: ollamaGpuMetric, + targetValue: 75, + cooldown: cdk.Duration.minutes(5), // Prevent flapping + disableScaleIn: false, // Allow scaling down + estimatedInstanceWarmup: cdk.Duration.minutes(5), // Time until instance contributes metrics meaningfully + }); + + return { + asgOllama, + asgWeb, + asgMathWorker, + asgDelphiSmall, + asgDelphiLarge, + commonAsgProps + } +} \ No newline at end of file diff --git a/cdk/codedeploy.ts b/cdk/codedeploy.ts new file mode 100644 index 0000000000..e9880ed36d --- /dev/null +++ b/cdk/codedeploy.ts @@ -0,0 +1,44 @@ +import { Construct } from "constructs"; +import * as cdk from 'aws-cdk-lib'; +import * as codedeploy from 'aws-cdk-lib/aws-codedeploy'; +import * as s3 from 'aws-cdk-lib/aws-s3'; + +export default ( + self: Construct, + instanceRole: cdk.aws_iam.Role, + asgWeb: cdk.aws_autoscaling.AutoScalingGroup, + asgMathWorker: cdk.aws_autoscaling.AutoScalingGroup, + asgDelphiSmall: cdk.aws_autoscaling.AutoScalingGroup, + asgDelphiLarge: cdk.aws_autoscaling.AutoScalingGroup, + codeDeployRole: cdk.aws_iam.Role +) => { + const application = new codedeploy.ServerApplication(self, 'CodeDeployApplication', { + applicationName: 'PolisApplication', + }); + + const deploymentBucket = new s3.Bucket(self, 'DeploymentPackageBucket', { + bucketName: `polis-deployment-packages-${cdk.Stack.of(self).account}-${cdk.Stack.of(self).region}`, + removalPolicy: cdk.RemovalPolicy.DESTROY, + autoDeleteObjects: true, + versioned: true, + publicReadAccess: false, + blockPublicAccess: s3.BlockPublicAccess.BLOCK_ALL, + }); + deploymentBucket.grantRead(instanceRole); + + // Deployment Group + const deploymentGroup = new codedeploy.ServerDeploymentGroup(self, 'DeploymentGroup', { + application, + deploymentGroupName: 'PolisDeploymentGroup', + autoScalingGroups: [asgWeb, asgMathWorker, asgDelphiSmall, asgDelphiLarge], + deploymentConfig: codedeploy.ServerDeploymentConfig.ONE_AT_A_TIME, + role: codeDeployRole, + installAgent: true, + }); + + return { + application, + deploymentBucket, + deploymentGroup + } +} \ No newline at end of file diff --git a/cdk/db.ts b/cdk/db.ts new file mode 100644 index 0000000000..28dd3e6cad --- /dev/null +++ b/cdk/db.ts @@ -0,0 +1,48 @@ +import { Construct } from "constructs"; +import * as ssm from 'aws-cdk-lib/aws-ssm'; +import * as rds from 'aws-cdk-lib/aws-rds'; +import * as cdk from 'aws-cdk-lib'; +import * as ec2 from 'aws-cdk-lib/aws-ec2'; + +export default (self: Construct, vpc: cdk.aws_ec2.IVpc) => { + const dbSubnetGroup = new rds.SubnetGroup(self, 'DatabaseSubnetGroup', { + vpc, + subnetGroupName: 'PolisDatabaseSubnetGroup', + description: 'Subnet group for the postgres database', + vpcSubnets: { subnetGroupName: 'Private' }, + removalPolicy: cdk.RemovalPolicy.RETAIN, + }); + + const db = new rds.DatabaseInstance(self, 'Database', { + engine: rds.DatabaseInstanceEngine.postgres({version: rds.PostgresEngineVersion.VER_17 }), + instanceType: ec2.InstanceType.of(ec2.InstanceClass.T3, ec2.InstanceSize.LARGE), + vpc, + allocatedStorage: 20, + storageType: rds.StorageType.GP2, + credentials: rds.Credentials.fromGeneratedSecret('dbUser'), + databaseName: 'polisdb', + removalPolicy: cdk.RemovalPolicy.SNAPSHOT, + deletionProtection: true, + publiclyAccessible: false, + subnetGroup: dbSubnetGroup, + }); + + // SSM Parameters for DB connection + const dbSecretArnParam = new ssm.StringParameter(self, 'DBSecretArnParameter', { + parameterName: '/polis/db-secret-arn', + stringValue: db.secret!.secretArn, + description: 'SSM Parameter storing the ARN of the Polis Database Secret', + }); + const dbHostParam = new ssm.StringParameter(self, 'DBHostParameter', { + parameterName: '/polis/db-host', + stringValue: db.dbInstanceEndpointAddress, + description: 'SSM Parameter storing the Polis Database Host', + }); + const dbPortParam = new ssm.StringParameter(self, 'DBPortParameter', { + parameterName: '/polis/db-port', + stringValue: db.dbInstanceEndpointPort, + description: 'SSM Parameter storing the Polis Database Port', + }); + + return { dbSubnetGroup, db, dbSecretArnParam, dbHostParam, dbPortParam } +} diff --git a/cdk/dns.ts b/cdk/dns.ts new file mode 100644 index 0000000000..3baddecc28 --- /dev/null +++ b/cdk/dns.ts @@ -0,0 +1,59 @@ +import { Construct } from "constructs"; +import * as cdk from 'aws-cdk-lib'; +import * as elbv2 from 'aws-cdk-lib/aws-elasticloadbalancingv2'; +import * as acm from 'aws-cdk-lib/aws-certificatemanager'; + +export default ( + self: Construct, + vpc: cdk.aws_ec2.Vpc, + lbSecurityGroup: cdk.aws_ec2.SecurityGroup, + asgWeb: cdk.aws_autoscaling.AutoScalingGroup +) => { + const lb = new elbv2.ApplicationLoadBalancer(self, 'Lb', { + vpc, + internetFacing: true, + securityGroup: lbSecurityGroup, // Use the dedicated ALB security group + idleTimeout: cdk.Duration.seconds(300), + }); + + const webTargetGroup = new elbv2.ApplicationTargetGroup(self, 'WebAppTargetGroup', { + vpc, + port: 80, + protocol: elbv2.ApplicationProtocol.HTTP, + targets: [asgWeb], + healthCheck: { + path: "/api/v3/testConnection", + interval: cdk.Duration.seconds(300) + } + }); + + const httpListener = lb.addListener('HttpListener', { + port: 80, + open: true, + defaultTargetGroups: [webTargetGroup], + }); + + const certificate = new acm.Certificate(self, 'WebAppCertificate', { + domainName: 'pol.is', + validation: acm.CertificateValidation.fromDns(), + }); + + const httpsListener = lb.addListener('HttpsListener', { + port: 443, + certificates: [certificate], + open: true, + defaultTargetGroups: [webTargetGroup], + }); + + const webScalingPolicy = asgWeb.scaleOnRequestCount('WebScalingPolicy', { + targetRequestsPerMinute: 600, + }); + + return { + lb, + webTargetGroup, + httpListener, + httpsListener, + webScalingPolicy + } +} \ No newline at end of file diff --git a/cdk/ecr.ts b/cdk/ecr.ts new file mode 100644 index 0000000000..2733c401e3 --- /dev/null +++ b/cdk/ecr.ts @@ -0,0 +1,39 @@ +import { Construct } from "constructs"; +import * as iam from 'aws-cdk-lib/aws-iam'; +import * as ecr from 'aws-cdk-lib/aws-ecr'; +import * as cdk from 'aws-cdk-lib'; +import * as ssm from 'aws-cdk-lib/aws-ssm'; + +export default (self: Construct, instanceRole: iam.IGrantable) => { + const createEcrRepo = (name: string): ecr.Repository => { + const repo = new ecr.Repository(self, `PolisRepository${name}`, { + repositoryName: `polis/${name.toLowerCase()}`, + removalPolicy: cdk.RemovalPolicy.RETAIN, + imageScanOnPush: true, + }); + + repo.addToResourcePolicy(new iam.PolicyStatement({ + sid: 'AllowPublicPull', + effect: iam.Effect.ALLOW, + principals: [new iam.AnyPrincipal()], + actions: [ + "ecr:BatchCheckLayerAvailability", + "ecr:GetDownloadUrlForLayer", + "ecr:BatchGetImage", + ], + })); + repo.grantPull(instanceRole); // Grant pull to the shared instance role + return repo; + }; + const ecrWebRepository = createEcrRepo('Server'); + const ecrMathRepository = createEcrRepo('Math'); + const ecrDelphiRepository = createEcrRepo('Delphi'); + + // --- SSM Parameter for Image Tag + const imageTagParameter = new ssm.StringParameter(self, 'ImageTagParameter', { + parameterName: '/polis/image-tag', + stringValue: 'initial-tag', //CI/CD will update this + }); + + return { ecrWebRepository, ecrMathRepository, ecrDelphiRepository, imageTagParameter } +} \ No newline at end of file diff --git a/cdk/iamRoles.ts b/cdk/iamRoles.ts new file mode 100644 index 0000000000..5d628275a7 --- /dev/null +++ b/cdk/iamRoles.ts @@ -0,0 +1,29 @@ +import * as iam from 'aws-cdk-lib/aws-iam'; +import { Construct } from 'constructs'; + +export default (self: Construct) => { + const instanceRole = new iam.Role(self, 'InstanceRole', { + assumedBy: new iam.ServicePrincipal('ec2.amazonaws.com'), + managedPolicies: [ + iam.ManagedPolicy.fromAwsManagedPolicyName('AmazonSSMManagedInstanceCore'), + iam.ManagedPolicy.fromAwsManagedPolicyName('service-role/AmazonEC2RoleforAWSCodeDeploy'), + iam.ManagedPolicy.fromAwsManagedPolicyName('SecretsManagerReadWrite'), + iam.ManagedPolicy.fromAwsManagedPolicyName('AmazonEC2ContainerRegistryReadOnly'), + iam.ManagedPolicy.fromAwsManagedPolicyName('CloudWatchLogsFullAccess'), + iam.ManagedPolicy.fromAwsManagedPolicyName('CloudWatchAgentServerPolicy'), + ], + }); + instanceRole.addToPolicy(new iam.PolicyStatement({ + actions: ['s3:PutObject', 's3:PutObjectAcl', 's3:AbortMultipartUpload'], + resources: ['arn:aws:s3:::*', 'arn:aws:s3:::*/*'], + })); + + // IAM Role for CodeDeploy + const codeDeployRole = new iam.Role(self, 'CodeDeployRole', { + assumedBy: new iam.ServicePrincipal('codedeploy.amazonaws.com'), + managedPolicies: [ + iam.ManagedPolicy.fromAwsManagedPolicyName('service-role/AWSCodeDeployRole'), + ], + }); + return { instanceRole, codeDeployRole } +} \ No newline at end of file diff --git a/cdk/launchTemplates.ts b/cdk/launchTemplates.ts new file mode 100644 index 0000000000..3645725271 --- /dev/null +++ b/cdk/launchTemplates.ts @@ -0,0 +1,189 @@ +import * as ec2 from 'aws-cdk-lib/aws-ec2'; +import * as cdk from 'aws-cdk-lib'; +import { Construct } from 'constructs'; + + +export default ( + self: Construct, + logGroup: cdk.aws_logs.LogGroup, + ollamaNamespace: string, + ollamaModelDirectory: string, + fileSystem: cdk.aws_efs.FileSystem, + machineImageWeb: ec2.IMachineImage, + instanceTypeWeb: ec2.InstanceType, + webSecurityGroup: ec2.ISecurityGroup, + webKeyPair: ec2.IKeyPair | undefined, + instanceRole: cdk.aws_iam.IRole, + machineImageMathWorker: ec2.IMachineImage, + instanceTypeMathWorker: ec2.InstanceType, + mathWorkerSecurityGroup: ec2.ISecurityGroup, + mathWorkerKeyPair: ec2.IKeyPair | undefined, + machineImageDelphiSmall: ec2.IMachineImage, + instanceTypeDelphiSmall: ec2.InstanceType, + delphiSmallKeyPair: ec2.IKeyPair | undefined, + machineImageDelphiLarge: ec2.IMachineImage, + instanceTypeDelphiLarge: ec2.InstanceType, + delphiSecurityGroup: ec2.ISecurityGroup, + delphiLargeKeyPair: ec2.IKeyPair | undefined, + machineImageOllama: ec2.IMachineImage, + instanceTypeOllama: ec2.InstanceType, + ollamaKeyPair: ec2.IKeyPair | undefined, + ollamaSecurityGroup: ec2.ISecurityGroup +) => { + // Generic User Data function (Works with NAT Gateway for internet) + const usrdata = (CLOUDWATCH_LOG_GROUP_NAME: string, service: string, instanceSize?: string) => { + let ld: ec2.UserData; + ld = ec2.UserData.forLinux(); + ld.addCommands( + '#!/bin/bash', + 'set -e', + 'set -x', + `echo "Writing service type '${service}' to /tmp/service_type.txt"`, + `echo "${service}" > /tmp/service_type.txt`, + `echo "Contents of /tmp/service_type.txt: $(cat /tmp/service_type.txt)"`, + // If instanceSize is provided, write it to a file + instanceSize ? `echo "Writing instance size '${instanceSize}' to /tmp/instance_size.txt"` : '', + instanceSize ? `echo "${instanceSize}" > /tmp/instance_size.txt` : '', + instanceSize ? `echo "Contents of /tmp/instance_size.txt: $(cat /tmp/instance_size.txt)"` : '', + 'sudo yum update -y', + 'sudo yum install -y amazon-cloudwatch-agent -y', + 'sudo dnf install -y wget ruby docker', + 'sudo systemctl start docker', + 'sudo systemctl enable docker', + 'sudo usermod -a -G docker ec2-user', + 'sudo curl -L https://github.com/docker/compose/releases/latest/download/docker-compose-$(uname -s)-$(uname -m) -o /usr/local/bin/docker-compose', + 'sudo chmod +x /usr/local/bin/docker-compose', + 'docker-compose --version', // Verify installation + 'sudo yum install -y jq', + `export SERVICE=${service}`, + instanceSize ? `export INSTANCE_SIZE=${instanceSize}` : '', + 'exec 1>>/var/log/user-data.log 2>&1', + 'echo "Finished User Data Execution at $(date)"', + 'sudo mkdir -p /etc/docker', // Ensure /etc/docker directory exists + `sudo tee /etc/docker/daemon.json << EOF + { + "log-driver": "awslogs", + "log-opts": { + "awslogs-group": "${CLOUDWATCH_LOG_GROUP_NAME}", + "awslogs-region": "${cdk.Stack.of(self).region}", + "awslogs-stream": "${service}" + } + } + EOF`, + 'sudo systemctl restart docker', + 'sudo systemctl status docker' + ); + return ld; + }; + + const ollamaUsrData = ec2.UserData.forLinux(); + const cwAgentConfigPath = '/opt/aws/amazon-cloudwatch-agent/etc/amazon-cloudwatch-agent.json'; + ollamaUsrData.addCommands( + ...usrdata(logGroup.logGroupName, "ollama").render().split('\n').filter(line => line.trim() !== ''), + 'echo "Installing EFS utilities for Ollama..."', + 'sudo dnf install -y amazon-efs-utils nfs-utils', + 'echo "Starting Ollama specific setup..."', + 'echo "Configuring CloudWatch Agent for GPU metrics..."', + `sudo tee ${cwAgentConfigPath} << EOF + { + "agent": { "metrics_collection_interval": 60, "run_as_user": "root" }, + "metrics": { + "append_dimensions": { "AutoScalingGroupName": "\${aws:AutoScalingGroupName}", "ImageId": "\${aws:ImageId}", "InstanceId": "\${aws:InstanceId}", "InstanceType": "\${aws:InstanceType}" }, + "metrics_collected": { + "nvidia_gpu": { "measurement": [ {"name": "utilization_gpu", "unit": "Percent"}, {"name": "utilization_memory", "unit": "Percent"}, {"name": "memory_total", "unit": "Megabytes"}, {"name": "memory_used", "unit": "Megabytes"}, {"name": "memory_free", "unit": "Megabytes"}, {"name": "power_draw", "unit": "Watts"}, {"name": "temperature_gpu", "unit": "Count"} ], "metrics_collection_interval": 60, "nvidia_smi_path": "/usr/bin/nvidia-smi", "metrics_aggregation_interval": 60, "namespace": "${ollamaNamespace}" }, + "disk": { "measurement": [ "used_percent" ], "metrics_collection_interval": 60, "resources": [ "/" ] }, + "mem": { "measurement": [ "mem_used_percent" ], "metrics_collection_interval": 60 } + } + } + } + EOF`, + 'sudo /opt/aws/amazon-cloudwatch-agent/bin/amazon-cloudwatch-agent-ctl -a fetch-config -m ec2 -c file:${cwAgentConfigPath} -s', + 'sudo systemctl enable amazon-cloudwatch-agent', + 'echo "CloudWatch Agent configured and started."', + 'echo "Mounting EFS filesystem ${fileSystem.fileSystemId}..."', + `sudo mkdir -p ${ollamaModelDirectory}`, + `sudo mount -t efs -o tls ${fileSystem.fileSystemId}:/ ${ollamaModelDirectory}`, + `echo "${fileSystem.fileSystemId}:/ ${ollamaModelDirectory} efs _netdev,tls 0 0" | sudo tee -a /etc/fstab`, + `sudo chown ec2-user:ec2-user ${ollamaModelDirectory}`, + 'echo "EFS mounted successfully."', + 'echo "Starting Ollama container..."', + 'sudo docker run -d --name ollama \\', + ' --gpus all \\', + ' -p 0.0.0.0:11434:11434 \\', + ` -v ${ollamaModelDirectory}:/root/.ollama \\`, + ' --restart unless-stopped \\', + ' ollama/ollama serve', + '(', + ' echo "Waiting for Ollama service (background task)..."', + ' sleep 60', + ' echo "Pulling default Ollama model (llama3.1:8b) in background..."', + ' sudo docker exec ollama ollama pull llama3.1:8b || echo "Failed to pull default model initially, may need manual pull later."', + ' echo "Background model pull task finished."', + ') &', + 'disown', + 'echo "Ollama setup script finished."' + ); + + + // --- Launch Templates + const webLaunchTemplate = new ec2.LaunchTemplate(self, 'WebLaunchTemplate', { + machineImage: machineImageWeb, + userData: usrdata(logGroup.logGroupName, "server"), + instanceType: instanceTypeWeb, + securityGroup: webSecurityGroup, + keyPair: webKeyPair, + role: instanceRole, + }); + const mathWorkerLaunchTemplate = new ec2.LaunchTemplate(self, 'MathWorkerLaunchTemplate', { + machineImage: machineImageMathWorker, + userData: usrdata(logGroup.logGroupName, "math"), + instanceType: instanceTypeMathWorker, + securityGroup: mathWorkerSecurityGroup, + keyPair: mathWorkerKeyPair, + role: instanceRole, + }); + // Delphi Small Launch Template + const delphiSmallLaunchTemplate = new ec2.LaunchTemplate(self, 'DelphiSmallLaunchTemplate', { + machineImage: machineImageDelphiSmall, + userData: usrdata(logGroup.logGroupName, "delphi", "small"), + instanceType: instanceTypeDelphiSmall, + securityGroup: delphiSecurityGroup, + keyPair: delphiSmallKeyPair, + role: instanceRole, + }); + // Delphi Large Launch Template + const delphiLargeLaunchTemplate = new ec2.LaunchTemplate(self, 'DelphiLargeLaunchTemplate', { + machineImage: machineImageDelphiLarge, + userData: usrdata(logGroup.logGroupName, "delphi", "large"), + instanceType: instanceTypeDelphiLarge, + securityGroup: delphiSecurityGroup, + keyPair: delphiLargeKeyPair, + role: instanceRole, + }); + // Ollama Launch Template + const ollamaLaunchTemplate = new ec2.LaunchTemplate(self, 'OllamaLaunchTemplate', { + machineImage: machineImageOllama, + userData: ollamaUsrData, + instanceType: instanceTypeOllama, + securityGroup: ollamaSecurityGroup, + keyPair: ollamaKeyPair, + role: instanceRole, + blockDevices: [ + { + deviceName: '/dev/xvda', // Adjust if needed for DLAMI + volume: ec2.BlockDeviceVolume.ebs(100, { + volumeType: ec2.EbsDeviceVolumeType.GP3, + deleteOnTermination: true, + }), + }, + ], + }); + + return { + webLaunchTemplate, + mathWorkerLaunchTemplate, + delphiSmallLaunchTemplate, + delphiLargeLaunchTemplate, + ollamaLaunchTemplate + } +} \ No newline at end of file diff --git a/cdk/lib/cdk-stack.ts b/cdk/lib/cdk-stack.ts index 9d7b000145..53a33da7cd 100644 --- a/cdk/lib/cdk-stack.ts +++ b/cdk/lib/cdk-stack.ts @@ -1,22 +1,13 @@ import * as cdk from 'aws-cdk-lib'; import * as ec2 from 'aws-cdk-lib/aws-ec2'; -import * as autoscaling from 'aws-cdk-lib/aws-autoscaling'; import * as elbv2 from 'aws-cdk-lib/aws-elasticloadbalancingv2'; -import * as rds from 'aws-cdk-lib/aws-rds'; -import * as iam from 'aws-cdk-lib/aws-iam'; -import * as s3 from 'aws-cdk-lib/aws-s3'; import * as logs from 'aws-cdk-lib/aws-logs'; -import * as codedeploy from 'aws-cdk-lib/aws-codedeploy'; import * as secretsmanager from 'aws-cdk-lib/aws-secretsmanager'; import * as sns from 'aws-cdk-lib/aws-sns'; import * as subscriptions from 'aws-cdk-lib/aws-sns-subscriptions'; -import * as cloudwatch from 'aws-cdk-lib/aws-cloudwatch'; -import * as cloudwatch_actions from 'aws-cdk-lib/aws-cloudwatch-actions'; // Import actions submodule -import * as acm from 'aws-cdk-lib/aws-certificatemanager'; -import * as ecr from 'aws-cdk-lib/aws-ecr'; -import * as ssm from 'aws-cdk-lib/aws-ssm'; -import * as efs from 'aws-cdk-lib/aws-efs'; // Import EFS module +import * as efs from 'aws-cdk-lib/aws-efs'; import { Construct } from 'constructs'; + // custom constructs for code organization import createPolisVPC from '../vpc'; import { @@ -32,6 +23,14 @@ import { machineImageOllama } from '../ec2'; import createSecurityGroups from '../securityGroups'; +import createRoles from '../iamRoles'; +import createECRRepos from '../ecr'; +import createDBResources from '../db'; +import configureLaunchTemplates from '../launchTemplates'; +import createAutoScalingAndAlarms from '../autoscaling'; +import createCodedeployConfig from '../codedeploy'; +import createALBAndDNS from '../dns'; +import createSecretsAndDependencies from '../secrets'; interface PolisStackProps extends cdk.StackProps { enableSSHAccess?: boolean; // Make optional, default to false @@ -53,6 +52,8 @@ export class CdkStack extends cdk.Stack { const ollamaPort = 11434; const ollamaModelDirectory = '/efs/ollama-models'; const ollamaNamespace = 'OllamaMetrics'; // Custom namespace for GPU metrics + + // Create VPC const vpc = createPolisVPC(this); const alarmTopic = new sns.Topic(this, 'AlarmTopic', { @@ -64,6 +65,7 @@ export class CdkStack extends cdk.Stack { removalPolicy: cdk.RemovalPolicy.DESTROY, }); + // Create security group const { webSecurityGroup, mathWorkerSecurityGroup, @@ -90,8 +92,8 @@ export class CdkStack extends cdk.Stack { const sshPeer = ec2.Peer.ipv4(props.sshAllowedIpRange || defaultSSHRange); webSecurityGroup.addIngressRule(sshPeer, ec2.Port.tcp(22), 'Allow SSH access'); mathWorkerSecurityGroup.addIngressRule(sshPeer, ec2.Port.tcp(22), 'Allow SSH access'); - delphiSecurityGroup.addIngressRule(sshPeer, ec2.Port.tcp(22), 'Allow SSH access'); // NEW - ollamaSecurityGroup.addIngressRule(sshPeer, ec2.Port.tcp(22), 'Allow SSH access'); // NEW + delphiSecurityGroup.addIngressRule(sshPeer, ec2.Port.tcp(22), 'Allow SSH access'); + ollamaSecurityGroup.addIngressRule(sshPeer, ec2.Port.tcp(22), 'Allow SSH access'); } webSecurityGroup.addIngressRule(ec2.Peer.ipv4(props.sshAllowedIpRange || defaultSSHRange), ec2.Port.tcp(22), 'Allow SSH'); // Control SSH separately @@ -112,31 +114,7 @@ export class CdkStack extends cdk.Stack { const delphiLargeKeyPair = getKeyPair('DelphiLargeKeyPair', props.delphiLargeKeyPairName); const ollamaKeyPair = getKeyPair('OllamaKeyPair', props.ollamaKeyPairName); - - // --- IAM Role - const instanceRole = new iam.Role(this, 'InstanceRole', { - assumedBy: new iam.ServicePrincipal('ec2.amazonaws.com'), - managedPolicies: [ - iam.ManagedPolicy.fromAwsManagedPolicyName('AmazonSSMManagedInstanceCore'), - iam.ManagedPolicy.fromAwsManagedPolicyName('service-role/AmazonEC2RoleforAWSCodeDeploy'), - iam.ManagedPolicy.fromAwsManagedPolicyName('SecretsManagerReadWrite'), - iam.ManagedPolicy.fromAwsManagedPolicyName('AmazonEC2ContainerRegistryReadOnly'), - iam.ManagedPolicy.fromAwsManagedPolicyName('CloudWatchLogsFullAccess'), - iam.ManagedPolicy.fromAwsManagedPolicyName('CloudWatchAgentServerPolicy'), - ], - }); - instanceRole.addToPolicy(new iam.PolicyStatement({ - actions: ['s3:PutObject', 's3:PutObjectAcl', 's3:AbortMultipartUpload'], - resources: ['arn:aws:s3:::*', 'arn:aws:s3:::*/*'], - })); - - // IAM Role for CodeDeploy - const codeDeployRole = new iam.Role(this, 'CodeDeployRole', { - assumedBy: new iam.ServicePrincipal('codedeploy.amazonaws.com'), - managedPolicies: [ - iam.ManagedPolicy.fromAwsManagedPolicyName('service-role/AWSCodeDeployRole'), - ], - }); + const { instanceRole, codeDeployRole } = createRoles(this); // ALB Security Group const lbSecurityGroup = new ec2.SecurityGroup(this, 'LBSecurityGroup', { @@ -147,77 +125,11 @@ export class CdkStack extends cdk.Stack { lbSecurityGroup.addIngressRule(ec2.Peer.anyIpv4(), ec2.Port.tcp(80), 'Allow HTTP from anywhere'); lbSecurityGroup.addIngressRule(ec2.Peer.anyIpv4(), ec2.Port.tcp(443), 'Allow HTTPS from anywhere'); - // --- ECR Repositories - const createEcrRepo = (name: string): ecr.Repository => { - const repo = new ecr.Repository(this, `PolisRepository${name}`, { - repositoryName: `polis/${name.toLowerCase()}`, - removalPolicy: cdk.RemovalPolicy.RETAIN, - imageScanOnPush: true, - }); - - repo.addToResourcePolicy(new iam.PolicyStatement({ - sid: 'AllowPublicPull', - effect: iam.Effect.ALLOW, - principals: [new iam.AnyPrincipal()], - actions: [ - "ecr:BatchCheckLayerAvailability", - "ecr:GetDownloadUrlForLayer", - "ecr:BatchGetImage", - ], - })); - repo.grantPull(instanceRole); // Grant pull to the shared instance role - return repo; - }; - const ecrWebRepository = createEcrRepo('Server'); - const ecrMathRepository = createEcrRepo('Math'); - const ecrDelphiRepository = createEcrRepo('Delphi'); - - // --- SSM Parameter for Image Tag - const imageTagParameter = new ssm.StringParameter(this, 'ImageTagParameter', { - parameterName: '/polis/image-tag', - stringValue: 'initial-tag', //CI/CD will update this - }); - - // --- Postgres (PG17, GP2 in 'Private' ISOLATED subnet) --- - const dbSubnetGroup = new rds.SubnetGroup(this, 'DatabaseSubnetGroup', { - vpc, - subnetGroupName: 'PolisDatabaseSubnetGroup', - description: 'Subnet group for the postgres database', - vpcSubnets: { subnetGroupName: 'Private' }, - removalPolicy: cdk.RemovalPolicy.RETAIN, - }); - - const db = new rds.DatabaseInstance(this, 'Database', { - engine: rds.DatabaseInstanceEngine.postgres({version: rds.PostgresEngineVersion.VER_17 }), - instanceType: ec2.InstanceType.of(ec2.InstanceClass.T3, ec2.InstanceSize.LARGE), - vpc, - allocatedStorage: 20, - storageType: rds.StorageType.GP2, - credentials: rds.Credentials.fromGeneratedSecret('dbUser'), - databaseName: 'polisdb', - removalPolicy: cdk.RemovalPolicy.SNAPSHOT, - deletionProtection: true, - publiclyAccessible: false, - subnetGroup: dbSubnetGroup, - }); - - // SSM Parameters for DB connection - const dbSecretArnParam = new ssm.StringParameter(this, 'DBSecretArnParameter', { - parameterName: '/polis/db-secret-arn', - stringValue: db.secret!.secretArn, - description: 'SSM Parameter storing the ARN of the Polis Database Secret', - }); - const dbHostParam = new ssm.StringParameter(this, 'DBHostParameter', { - parameterName: '/polis/db-host', - stringValue: db.dbInstanceEndpointAddress, - description: 'SSM Parameter storing the Polis Database Host', - }); - const dbPortParam = new ssm.StringParameter(this, 'DBPortParameter', { - parameterName: '/polis/db-port', - stringValue: db.dbInstanceEndpointPort, - description: 'SSM Parameter storing the Polis Database Port', - }); + // Create ECR repos + const { ecrWebRepository, ecrDelphiRepository, ecrMathRepository, imageTagParameter } = createECRRepos(this, instanceRole); + // Create DB and related resources + const { dbSubnetGroup, db, dbSecretArnParam, dbHostParam, dbPortParam } = createDBResources(this, vpc); // --- EFS for Ollama Models const fileSystem = new efs.FileSystem(this, 'OllamaModelFileSystem', { @@ -231,287 +143,83 @@ export class CdkStack extends cdk.Stack { vpcSubnets: { subnetGroupName: 'PrivateWithEgress' }, }); - - // --- User Data Scripts (Optimized function used by all) --- - // Generic User Data function (Works with NAT Gateway for internet) - const usrdata = (CLOUDWATCH_LOG_GROUP_NAME: string, service: string, instanceSize?: string) => { - let ld: ec2.UserData; - ld = ec2.UserData.forLinux(); - ld.addCommands( - '#!/bin/bash', - 'set -e', - 'set -x', - `echo "Writing service type '${service}' to /tmp/service_type.txt"`, - `echo "${service}" > /tmp/service_type.txt`, - `echo "Contents of /tmp/service_type.txt: $(cat /tmp/service_type.txt)"`, - // If instanceSize is provided, write it to a file - instanceSize ? `echo "Writing instance size '${instanceSize}' to /tmp/instance_size.txt"` : '', - instanceSize ? `echo "${instanceSize}" > /tmp/instance_size.txt` : '', - instanceSize ? `echo "Contents of /tmp/instance_size.txt: $(cat /tmp/instance_size.txt)"` : '', - 'sudo yum update -y', - 'sudo yum install -y amazon-cloudwatch-agent -y', - 'sudo dnf install -y wget ruby docker', - 'sudo systemctl start docker', - 'sudo systemctl enable docker', - 'sudo usermod -a -G docker ec2-user', - 'sudo curl -L https://github.com/docker/compose/releases/latest/download/docker-compose-$(uname -s)-$(uname -m) -o /usr/local/bin/docker-compose', - 'sudo chmod +x /usr/local/bin/docker-compose', - 'docker-compose --version', // Verify installation - 'sudo yum install -y jq', - `export SERVICE=${service}`, - instanceSize ? `export INSTANCE_SIZE=${instanceSize}` : '', - 'exec 1>>/var/log/user-data.log 2>&1', - 'echo "Finished User Data Execution at $(date)"', - 'sudo mkdir -p /etc/docker', // Ensure /etc/docker directory exists - `sudo tee /etc/docker/daemon.json << EOF -{ - "log-driver": "awslogs", - "log-opts": { - "awslogs-group": "${CLOUDWATCH_LOG_GROUP_NAME}", - "awslogs-region": "${cdk.Stack.of(this).region}", - "awslogs-stream": "${service}" - } -} -EOF`, - 'sudo systemctl restart docker', - 'sudo systemctl status docker' - ); - return ld; - }; - - const ollamaUsrData = ec2.UserData.forLinux(); - const cwAgentConfigPath = '/opt/aws/amazon-cloudwatch-agent/etc/amazon-cloudwatch-agent.json'; - ollamaUsrData.addCommands( - ...usrdata(logGroup.logGroupName, "ollama").render().split('\n').filter(line => line.trim() !== ''), - 'echo "Installing EFS utilities for Ollama..."', - 'sudo dnf install -y amazon-efs-utils nfs-utils', - 'echo "Starting Ollama specific setup..."', - 'echo "Configuring CloudWatch Agent for GPU metrics..."', - `sudo tee ${cwAgentConfigPath} << EOF -{ - "agent": { "metrics_collection_interval": 60, "run_as_user": "root" }, - "metrics": { - "append_dimensions": { "AutoScalingGroupName": "\${aws:AutoScalingGroupName}", "ImageId": "\${aws:ImageId}", "InstanceId": "\${aws:InstanceId}", "InstanceType": "\${aws:InstanceType}" }, - "metrics_collected": { - "nvidia_gpu": { "measurement": [ {"name": "utilization_gpu", "unit": "Percent"}, {"name": "utilization_memory", "unit": "Percent"}, {"name": "memory_total", "unit": "Megabytes"}, {"name": "memory_used", "unit": "Megabytes"}, {"name": "memory_free", "unit": "Megabytes"}, {"name": "power_draw", "unit": "Watts"}, {"name": "temperature_gpu", "unit": "Count"} ], "metrics_collection_interval": 60, "nvidia_smi_path": "/usr/bin/nvidia-smi", "metrics_aggregation_interval": 60, "namespace": "${ollamaNamespace}" }, - "disk": { "measurement": [ "used_percent" ], "metrics_collection_interval": 60, "resources": [ "/" ] }, - "mem": { "measurement": [ "mem_used_percent" ], "metrics_collection_interval": 60 } - } - } -} -EOF`, - 'sudo /opt/aws/amazon-cloudwatch-agent/bin/amazon-cloudwatch-agent-ctl -a fetch-config -m ec2 -c file:${cwAgentConfigPath} -s', - 'sudo systemctl enable amazon-cloudwatch-agent', - 'echo "CloudWatch Agent configured and started."', - 'echo "Mounting EFS filesystem ${fileSystem.fileSystemId}..."', - `sudo mkdir -p ${ollamaModelDirectory}`, - `sudo mount -t efs -o tls ${fileSystem.fileSystemId}:/ ${ollamaModelDirectory}`, - `echo "${fileSystem.fileSystemId}:/ ${ollamaModelDirectory} efs _netdev,tls 0 0" | sudo tee -a /etc/fstab`, - `sudo chown ec2-user:ec2-user ${ollamaModelDirectory}`, - 'echo "EFS mounted successfully."', - 'echo "Starting Ollama container..."', - 'sudo docker run -d --name ollama \\', - ' --gpus all \\', - ' -p 0.0.0.0:11434:11434 \\', - ` -v ${ollamaModelDirectory}:/root/.ollama \\`, - ' --restart unless-stopped \\', - ' ollama/ollama serve', - '(', - ' echo "Waiting for Ollama service (background task)..."', - ' sleep 60', - ' echo "Pulling default Ollama model (llama3.1:8b) in background..."', - ' sudo docker exec ollama ollama pull llama3.1:8b || echo "Failed to pull default model initially, may need manual pull later."', - ' echo "Background model pull task finished."', - ') &', - 'disown', - 'echo "Ollama setup script finished."' + // launch templates + const { + webLaunchTemplate, + mathWorkerLaunchTemplate, + delphiSmallLaunchTemplate, + delphiLargeLaunchTemplate, + ollamaLaunchTemplate + } = configureLaunchTemplates(this, + logGroup, + ollamaNamespace, + ollamaModelDirectory, + fileSystem, + machineImageWeb, + instanceTypeWeb, + webSecurityGroup, + webKeyPair, + instanceRole, + machineImageMathWorker, + instanceTypeMathWorker, + mathWorkerSecurityGroup, + mathWorkerKeyPair, + machineImageDelphiSmall, + instanceTypeDelphiSmall, + delphiSmallKeyPair, + machineImageDelphiLarge, + instanceTypeDelphiLarge, + delphiSecurityGroup, + delphiLargeKeyPair, + machineImageOllama, + instanceTypeOllama, + ollamaKeyPair, + ollamaSecurityGroup ); - - // --- Launch Templates - const webLaunchTemplate = new ec2.LaunchTemplate(this, 'WebLaunchTemplate', { - machineImage: machineImageWeb, - userData: usrdata(logGroup.logGroupName, "server"), - instanceType: instanceTypeWeb, - securityGroup: webSecurityGroup, - keyPair: webKeyPair, - role: instanceRole, - }); - const mathWorkerLaunchTemplate = new ec2.LaunchTemplate(this, 'MathWorkerLaunchTemplate', { - machineImage: machineImageMathWorker, - userData: usrdata(logGroup.logGroupName, "math"), - instanceType: instanceTypeMathWorker, - securityGroup: mathWorkerSecurityGroup, - keyPair: mathWorkerKeyPair, - role: instanceRole, - }); - // Delphi Small Launch Template - const delphiSmallLaunchTemplate = new ec2.LaunchTemplate(this, 'DelphiSmallLaunchTemplate', { - machineImage: machineImageDelphiSmall, - userData: usrdata(logGroup.logGroupName, "delphi", "small"), - instanceType: instanceTypeDelphiSmall, - securityGroup: delphiSecurityGroup, - keyPair: delphiSmallKeyPair, - role: instanceRole, - }); - // Delphi Large Launch Template - const delphiLargeLaunchTemplate = new ec2.LaunchTemplate(this, 'DelphiLargeLaunchTemplate', { - machineImage: machineImageDelphiLarge, - userData: usrdata(logGroup.logGroupName, "delphi", "large"), - instanceType: instanceTypeDelphiLarge, - securityGroup: delphiSecurityGroup, - keyPair: delphiLargeKeyPair, - role: instanceRole, - }); - // Ollama Launch Template - const ollamaLaunchTemplate = new ec2.LaunchTemplate(this, 'OllamaLaunchTemplate', { - machineImage: machineImageOllama, - userData: ollamaUsrData, - instanceType: instanceTypeOllama, - securityGroup: ollamaSecurityGroup, - keyPair: ollamaKeyPair, - role: instanceRole, - blockDevices: [ - { - deviceName: '/dev/xvda', // Adjust if needed for DLAMI - volume: ec2.BlockDeviceVolume.ebs(100, { - volumeType: ec2.EbsDeviceVolumeType.GP3, - deleteOnTermination: true, - }), - }, - ], - }); - - - // --- Auto Scaling Groups - const commonAsgProps = { vpc, role: instanceRole }; - - // Ollama ASG - const asgOllama = new autoscaling.AutoScalingGroup(this, 'AsgOllama', { - vpc, - launchTemplate: ollamaLaunchTemplate, - minCapacity: 1, - maxCapacity: 3, - desiredCapacity: 1, - vpcSubnets: { subnetGroupName: 'PrivateWithEgress' }, - healthCheck: autoscaling.HealthCheck.ec2({ grace: cdk.Duration.minutes(10) }), - }); - asgOllama.node.addDependency(logGroup); - asgOllama.node.addDependency(fileSystem); // Ensure EFS is ready before instances start - - // Web ASG - const asgWeb = new autoscaling.AutoScalingGroup(this, 'Asg', { - vpc, - launchTemplate: webLaunchTemplate, - minCapacity: 2, - maxCapacity: 10, - desiredCapacity: 2, - vpcSubnets: { subnetType: ec2.SubnetType.PUBLIC }, - healthCheck: autoscaling.HealthCheck.elb({grace: cdk.Duration.minutes(5)}) - }); - - // Math Worker ASG - const asgMathWorker = new autoscaling.AutoScalingGroup(this, 'AsgMathWorker', { - vpc, - launchTemplate: mathWorkerLaunchTemplate, - minCapacity: 1, - desiredCapacity: 1, - maxCapacity: 5, - vpcSubnets: { subnetType: ec2.SubnetType.PUBLIC }, - healthCheck: autoscaling.HealthCheck.ec2({ grace: cdk.Duration.minutes(2) }), - }); - - // Delphi Small ASG - const asgDelphiSmall = new autoscaling.AutoScalingGroup(this, 'AsgDelphiSmall', { - vpc, - launchTemplate: delphiSmallLaunchTemplate, - minCapacity: 1, - desiredCapacity: 1, - maxCapacity: 5, - vpcSubnets: { subnetType: ec2.SubnetType.PRIVATE_WITH_EGRESS }, - healthCheck: autoscaling.HealthCheck.ec2({ grace: cdk.Duration.minutes(5) }), - }); - - // Delphi Large ASG - const asgDelphiLarge = new autoscaling.AutoScalingGroup(this, 'AsgDelphiLarge', { + // Auto Scaling Groups and alarms + const { + asgOllama, + asgWeb, + asgMathWorker, + asgDelphiSmall, + asgDelphiLarge, + commonAsgProps + } = createAutoScalingAndAlarms( + this, vpc, - launchTemplate: delphiLargeLaunchTemplate, - minCapacity: 1, - desiredCapacity: 1, - maxCapacity: 3, - vpcSubnets: { subnetType: ec2.SubnetType.PRIVATE_WITH_EGRESS }, - healthCheck: autoscaling.HealthCheck.ec2({ grace: cdk.Duration.minutes(5) }), - }); - - - // --- Scaling Policies & Alarms - const mathWorkerCpuMetric = new cloudwatch.Metric({ - namespace: 'AWS/EC2', - metricName: 'CPUUtilization', - dimensionsMap: { - AutoScalingGroupName: asgMathWorker.autoScalingGroupName - }, - statistic: 'Average', - period: cdk.Duration.minutes(10), - }); - asgMathWorker.scaleToTrackMetric('CpuTracking', { - metric: mathWorkerCpuMetric, - targetValue: 50, - }); - - // Add Delphi CPU Scaling Policies & Alarms - const createDelphiCpuScaling = (asg: autoscaling.AutoScalingGroup, name: string, target: number): cloudwatch.Metric => { - const cpuMetric = new cloudwatch.Metric({ - namespace: 'AWS/EC2', - metricName: 'CPUUtilization', - dimensionsMap: { AutoScalingGroupName: asg.autoScalingGroupName }, - statistic: 'Average', - period: cdk.Duration.minutes(5), - }); - asg.scaleToTrackMetric(`${name}CpuTracking`, { - metric: cpuMetric, - targetValue: target - }); - - // High CPU Alarm - const alarm = new cloudwatch.Alarm(this, `${name}HighCpuAlarm`, { - metric: cpuMetric, - threshold: 80, // Alert if CPU > 80% - evaluationPeriods: 2, // for 2 consecutive periods (10 minutes total) - datapointsToAlarm: 2, // Ensure 2 datapoints are breaching - comparisonOperator: cloudwatch.ComparisonOperator.GREATER_THAN_THRESHOLD, - alarmDescription: `Alert when ${name} instances CPU exceeds 80% for 10 minutes`, - treatMissingData: cloudwatch.TreatMissingData.IGNORE, // Or BREACHING/NOT_BREACHING as appropriate - }); - // Add SNS action to the alarm - alarm.addAlarmAction(new cloudwatch_actions.SnsAction(alarmTopic)); - return cpuMetric; - }; - const delphiSmallCpuMetric = createDelphiCpuScaling(asgDelphiSmall, 'DelphiSmall', 60); // Target 60% CPU - const delphiLargeCpuMetric = createDelphiCpuScaling(asgDelphiLarge, 'DelphiLarge', 60); // Target 60% CPU + instanceRole, + ollamaLaunchTemplate, + logGroup, + fileSystem, + webLaunchTemplate, + mathWorkerLaunchTemplate, + delphiSmallLaunchTemplate, + delphiLargeLaunchTemplate, + ollamaNamespace, + alarmTopic + ); - // Add Ollama GPU Scaling Policy - const ollamaGpuMetric = new cloudwatch.Metric({ - namespace: ollamaNamespace, // Custom namespace from CW Agent config - metricName: 'utilization_gpu', // GPU utilization metric name from CW Agent config - dimensionsMap: { AutoScalingGroupName: asgOllama.autoScalingGroupName }, - statistic: 'Average', - period: cdk.Duration.minutes(1), - }); - asgOllama.scaleToTrackMetric('OllamaGpuScaling', { - metric: ollamaGpuMetric, - targetValue: 75, - cooldown: cdk.Duration.minutes(5), // Prevent flapping - disableScaleIn: false, // Allow scaling down - estimatedInstanceWarmup: cdk.Duration.minutes(5), // Time until instance contributes metrics meaningfully - }); + // --- DEPLOY STUFF + const { + application, + deploymentBucket, + deploymentGroup + } = createCodedeployConfig( + this, + instanceRole, + asgWeb, + asgMathWorker, + asgDelphiSmall, + asgDelphiLarge, + codeDeployRole + ); // --- Ollama Network Load Balancer (Internal, in Private+Egress) const ollamaNlb = new elbv2.NetworkLoadBalancer(this, 'OllamaNlb', { vpc, internetFacing: false, // Internal only crossZoneEnabled: true, - // Place NLB interfaces in PRIVATE_WITH_EGRESS subnets alongside Ollama instances vpcSubnets: { subnetType: ec2.SubnetType.PRIVATE_WITH_EGRESS }, }); const ollamaListener = ollamaNlb.addListener('OllamaListener', { @@ -543,113 +251,39 @@ EOF`, }); ollamaServiceSecret.grantRead(instanceRole); - // --- DEPLOY STUFF - const application = new codedeploy.ServerApplication(this, 'CodeDeployApplication', { - applicationName: 'PolisApplication', - }); - - const deploymentBucket = new s3.Bucket(this, 'DeploymentPackageBucket', { - bucketName: `polis-deployment-packages-${cdk.Stack.of(this).account}-${cdk.Stack.of(this).region}`, - removalPolicy: cdk.RemovalPolicy.DESTROY, - autoDeleteObjects: true, - versioned: true, - publicReadAccess: false, - blockPublicAccess: s3.BlockPublicAccess.BLOCK_ALL, - }); - deploymentBucket.grantRead(instanceRole); - - // Deployment Group - const deploymentGroup = new codedeploy.ServerDeploymentGroup(this, 'DeploymentGroup', { - application, - deploymentGroupName: 'PolisDeploymentGroup', - autoScalingGroups: [asgWeb, asgMathWorker, asgDelphiSmall, asgDelphiLarge], - deploymentConfig: codedeploy.ServerDeploymentConfig.ONE_AT_A_TIME, - role: codeDeployRole, - installAgent: true, - // Consider load balancer integration for blue/green (more complex) - }); - - // --- DB Access Rules db.connections.allowFrom(asgWeb, ec2.Port.tcp(5432), 'Allow database access from web ASG'); db.connections.allowFrom(asgMathWorker, ec2.Port.tcp(5432), 'Allow database access from math ASG'); db.connections.allowFrom(asgDelphiSmall, ec2.Port.tcp(5432), 'Allow database access from Delphi small ASG'); db.connections.allowFrom(asgDelphiLarge, ec2.Port.tcp(5432), 'Allow database access from Delphi large ASG'); - // --- Application Load Balancer - const lb = new elbv2.ApplicationLoadBalancer(this, 'Lb', { - vpc, - internetFacing: true, - securityGroup: lbSecurityGroup, // Use the dedicated ALB security group - idleTimeout: cdk.Duration.seconds(300), - }); - - const webTargetGroup = new elbv2.ApplicationTargetGroup(this, 'WebAppTargetGroup', { + // ALB & DNS + const { + lb, + webTargetGroup, + httpListener, + httpsListener, + webScalingPolicy + } = createALBAndDNS( + this, vpc, - port: 80, - protocol: elbv2.ApplicationProtocol.HTTP, - targets: [asgWeb], - healthCheck: { - path: "/api/v3/testConnection", - interval: cdk.Duration.seconds(300) - } - }); - - const httpListener = lb.addListener('HttpListener', { - port: 80, - open: true, - defaultTargetGroups: [webTargetGroup], - }); - - const certificate = new acm.Certificate(this, 'WebAppCertificate', { - domainName: 'pol.is', - validation: acm.CertificateValidation.fromDns(), - }); - - const httpsListener = lb.addListener('HttpsListener', { - port: 443, - certificates: [certificate], - open: true, - defaultTargetGroups: [webTargetGroup], - }); - - const webScalingPolicy = asgWeb.scaleOnRequestCount('WebScalingPolicy', { - targetRequestsPerMinute: 600, - }); - - // --- Secrets & Dependencies --- - const webAppEnvVarsSecret = new secretsmanager.Secret(this, 'WebAppEnvVarsSecret', { - secretName: 'polis-web-app-env-vars', - description: 'Environment variables for the Polis web application', - }); - const clientAdminEnvVarsSecret = new secretsmanager.Secret(this, 'ClientAdminEnvVarsSecret', { - secretName: 'polis-client-admin-env-vars', - description: 'Environment variables for the Polis client-admin web application', - }); - - const clientReportEnvVarsSecret = new secretsmanager.Secret(this, 'ClientReportEnvVarsSecret', { - secretName: 'polis-client-report-env-vars', - description: 'Environment variables for the Polis client-report web application', - }); - webAppEnvVarsSecret.grantRead(instanceRole); - clientAdminEnvVarsSecret.grantRead(instanceRole); - clientReportEnvVarsSecret.grantRead(instanceRole); - - // Dependencies (Add ASGs to loops/lists) - const addDbDependency = (asg: autoscaling.IAutoScalingGroup) => asg.node.addDependency(db); - const addLogDependency = (asg: autoscaling.IAutoScalingGroup) => asg.node.addDependency(logGroup); - const addSecretDependency = (asg: autoscaling.IAutoScalingGroup) => asg.node.addDependency(webAppEnvVarsSecret); + lbSecurityGroup, + asgWeb + ); - // Apply common dependencies to all ASGs - [asgWeb, asgMathWorker, asgDelphiSmall, asgDelphiLarge, asgOllama].forEach(asg => { - addLogDependency(asg); - addSecretDependency(asg); - // Only add DB dependency if the service needs it - if (asg !== asgOllama) { // Assuming Ollama doesn't directly need DB creds - addDbDependency(asg); - } - }); - asgOllama.node.addDependency(fileSystem); + // --- Secrets & Dependencies - creates secrets managed in SSM, grants services permission to interact with each other, etc. + createSecretsAndDependencies( + this, + instanceRole, + db, + logGroup, + asgWeb, + asgMathWorker, + asgDelphiSmall, + asgDelphiLarge, + asgOllama, + fileSystem + ); // --- Outputs new cdk.CfnOutput(this, 'LoadBalancerDNS', { value: lb.loadBalancerDnsName, description: 'Public DNS name of the Application Load Balancer' }); diff --git a/cdk/secrets.ts b/cdk/secrets.ts new file mode 100644 index 0000000000..cbd32c3f5d --- /dev/null +++ b/cdk/secrets.ts @@ -0,0 +1,50 @@ +import { Construct } from "constructs"; +import * as cdk from 'aws-cdk-lib'; +import * as autoscaling from 'aws-cdk-lib/aws-autoscaling'; +import * as secretsmanager from 'aws-cdk-lib/aws-secretsmanager'; + +export default ( + self: Construct, + instanceRole: cdk.aws_iam.Role, + db: cdk.aws_rds.DatabaseInstance, + logGroup: cdk.aws_logs.LogGroup, + asgWeb: cdk.aws_autoscaling.AutoScalingGroup, + asgMathWorker: cdk.aws_autoscaling.AutoScalingGroup, + asgDelphiSmall: cdk.aws_autoscaling.AutoScalingGroup, + asgDelphiLarge: cdk.aws_autoscaling.AutoScalingGroup, + asgOllama: cdk.aws_autoscaling.AutoScalingGroup, + fileSystem: cdk.aws_efs.FileSystem +) => { + const webAppEnvVarsSecret = new secretsmanager.Secret(self, 'WebAppEnvVarsSecret', { + secretName: 'polis-web-app-env-vars', + description: 'Environment variables for the Polis web application', + }); + const clientAdminEnvVarsSecret = new secretsmanager.Secret(self, 'ClientAdminEnvVarsSecret', { + secretName: 'polis-client-admin-env-vars', + description: 'Environment variables for the Polis client-admin web application', + }); + + const clientReportEnvVarsSecret = new secretsmanager.Secret(self, 'ClientReportEnvVarsSecret', { + secretName: 'polis-client-report-env-vars', + description: 'Environment variables for the Polis client-report web application', + }); + webAppEnvVarsSecret.grantRead(instanceRole); + clientAdminEnvVarsSecret.grantRead(instanceRole); + clientReportEnvVarsSecret.grantRead(instanceRole); + + // Dependencies (Add ASGs to loops/lists) + const addDbDependency = (asg: autoscaling.IAutoScalingGroup) => asg.node.addDependency(db); + const addLogDependency = (asg: autoscaling.IAutoScalingGroup) => asg.node.addDependency(logGroup); + const addSecretDependency = (asg: autoscaling.IAutoScalingGroup) => asg.node.addDependency(webAppEnvVarsSecret); + + // Apply common dependencies to all ASGs + [asgWeb, asgMathWorker, asgDelphiSmall, asgDelphiLarge, asgOllama].forEach(asg => { + addLogDependency(asg); + addSecretDependency(asg); + // Only add DB dependency if the service needs it + if (asg !== asgOllama) { + addDbDependency(asg); + } + }); + asgOllama.node.addDependency(fileSystem); +} From 880cd438d2a9d48ff493968b6d0507d1f66fda8e Mon Sep 17 00:00:00 2001 From: tevko Date: Wed, 7 May 2025 22:08:01 -0500 Subject: [PATCH 04/42] cdk efs and permissions fixes --- cdk/config/amazon-cloudwatch-agent.json | 37 +++++++ cdk/launchTemplates.ts | 141 +++++++++++++++--------- cdk/lib/cdk-stack.ts | 23 +++- 3 files changed, 144 insertions(+), 57 deletions(-) create mode 100644 cdk/config/amazon-cloudwatch-agent.json diff --git a/cdk/config/amazon-cloudwatch-agent.json b/cdk/config/amazon-cloudwatch-agent.json new file mode 100644 index 0000000000..b94922a2ab --- /dev/null +++ b/cdk/config/amazon-cloudwatch-agent.json @@ -0,0 +1,37 @@ +{ + "agent": { "metrics_collection_interval": 60, "run_as_user": "root" }, + "metrics": { + "append_dimensions": { + "AutoScalingGroupName": "${aws:AutoScalingGroupName}", + "ImageId": "${aws:ImageId}", + "InstanceId": "${aws:InstanceId}", + "InstanceType": "${aws:InstanceType}" + }, + "metrics_collected": { + "nvidia_gpu": { + "measurement": [ + {"name": "utilization_gpu", "unit": "Percent"}, + {"name": "utilization_memory", "unit": "Percent"}, + {"name": "memory_total", "unit": "Megabytes"}, + {"name": "memory_used", "unit": "Megabytes"}, + {"name": "memory_free", "unit": "Megabytes"}, + {"name": "power_draw", "unit": "Watts"}, + {"name": "temperature_gpu", "unit": "Count"} + ], + "metrics_collection_interval": 60, + "nvidia_smi_path": "/usr/bin/nvidia-smi", + "metrics_aggregation_interval": 60, + "namespace": "OllamaMetrics" + }, + "disk": { + "measurement": [ "used_percent" ], + "metrics_collection_interval": 60, + "resources": [ "/" ] + }, + "mem": { + "measurement": [ "mem_used_percent" ], + "metrics_collection_interval": 60 + } + } + } +} \ No newline at end of file diff --git a/cdk/launchTemplates.ts b/cdk/launchTemplates.ts index 3645725271..a647487269 100644 --- a/cdk/launchTemplates.ts +++ b/cdk/launchTemplates.ts @@ -1,7 +1,7 @@ import * as ec2 from 'aws-cdk-lib/aws-ec2'; import * as cdk from 'aws-cdk-lib'; import { Construct } from 'constructs'; - +import * as s3_assets from 'aws-cdk-lib/aws-s3-assets'; export default ( self: Construct, @@ -60,69 +60,100 @@ export default ( 'exec 1>>/var/log/user-data.log 2>&1', 'echo "Finished User Data Execution at $(date)"', 'sudo mkdir -p /etc/docker', // Ensure /etc/docker directory exists - `sudo tee /etc/docker/daemon.json << EOF - { +`cat << EOF | sudo tee /etc/docker/daemon.json +{ "log-driver": "awslogs", "log-opts": { - "awslogs-group": "${CLOUDWATCH_LOG_GROUP_NAME}", - "awslogs-region": "${cdk.Stack.of(self).region}", - "awslogs-stream": "${service}" - } + "awslogs-group": "${CLOUDWATCH_LOG_GROUP_NAME}", + "awslogs-region": "${cdk.Stack.of(self).region}", + "awslogs-stream": "${service}" } - EOF`, - 'sudo systemctl restart docker', +} +EOF`, // Ensure EOF is on a new line with no leading/trailing spaces + `sudo chmod 644 /etc/docker/daemon.json`, // Good practice to set permissions + 'sudo systemctl restart docker', 'sudo systemctl status docker' ); return ld; }; const ollamaUsrData = ec2.UserData.forLinux(); - const cwAgentConfigPath = '/opt/aws/amazon-cloudwatch-agent/etc/amazon-cloudwatch-agent.json'; - ollamaUsrData.addCommands( - ...usrdata(logGroup.logGroupName, "ollama").render().split('\n').filter(line => line.trim() !== ''), - 'echo "Installing EFS utilities for Ollama..."', - 'sudo dnf install -y amazon-efs-utils nfs-utils', - 'echo "Starting Ollama specific setup..."', - 'echo "Configuring CloudWatch Agent for GPU metrics..."', - `sudo tee ${cwAgentConfigPath} << EOF - { - "agent": { "metrics_collection_interval": 60, "run_as_user": "root" }, - "metrics": { - "append_dimensions": { "AutoScalingGroupName": "\${aws:AutoScalingGroupName}", "ImageId": "\${aws:ImageId}", "InstanceId": "\${aws:InstanceId}", "InstanceType": "\${aws:InstanceType}" }, - "metrics_collected": { - "nvidia_gpu": { "measurement": [ {"name": "utilization_gpu", "unit": "Percent"}, {"name": "utilization_memory", "unit": "Percent"}, {"name": "memory_total", "unit": "Megabytes"}, {"name": "memory_used", "unit": "Megabytes"}, {"name": "memory_free", "unit": "Megabytes"}, {"name": "power_draw", "unit": "Watts"}, {"name": "temperature_gpu", "unit": "Count"} ], "metrics_collection_interval": 60, "nvidia_smi_path": "/usr/bin/nvidia-smi", "metrics_aggregation_interval": 60, "namespace": "${ollamaNamespace}" }, - "disk": { "measurement": [ "used_percent" ], "metrics_collection_interval": 60, "resources": [ "/" ] }, - "mem": { "measurement": [ "mem_used_percent" ], "metrics_collection_interval": 60 } - } - } - } - EOF`, - 'sudo /opt/aws/amazon-cloudwatch-agent/bin/amazon-cloudwatch-agent-ctl -a fetch-config -m ec2 -c file:${cwAgentConfigPath} -s', - 'sudo systemctl enable amazon-cloudwatch-agent', - 'echo "CloudWatch Agent configured and started."', - 'echo "Mounting EFS filesystem ${fileSystem.fileSystemId}..."', - `sudo mkdir -p ${ollamaModelDirectory}`, - `sudo mount -t efs -o tls ${fileSystem.fileSystemId}:/ ${ollamaModelDirectory}`, - `echo "${fileSystem.fileSystemId}:/ ${ollamaModelDirectory} efs _netdev,tls 0 0" | sudo tee -a /etc/fstab`, - `sudo chown ec2-user:ec2-user ${ollamaModelDirectory}`, - 'echo "EFS mounted successfully."', - 'echo "Starting Ollama container..."', - 'sudo docker run -d --name ollama \\', - ' --gpus all \\', - ' -p 0.0.0.0:11434:11434 \\', - ` -v ${ollamaModelDirectory}:/root/.ollama \\`, - ' --restart unless-stopped \\', - ' ollama/ollama serve', - '(', - ' echo "Waiting for Ollama service (background task)..."', - ' sleep 60', - ' echo "Pulling default Ollama model (llama3.1:8b) in background..."', - ' sudo docker exec ollama ollama pull llama3.1:8b || echo "Failed to pull default model initially, may need manual pull later."', - ' echo "Background model pull task finished."', - ') &', - 'disown', - 'echo "Ollama setup script finished."' - ); +// Define path for CloudWatch Agent config +// --- CloudWatch Agent Config Asset --- +const cwAgentConfigAsset = new s3_assets.Asset(self, 'CwAgentConfigAsset', { + path: 'config/amazon-cloudwatch-agent.json' // Adjust path relative to cdk project root +}); + +// Grant the instance role read access to the asset bucket +cwAgentConfigAsset.grantRead(instanceRole); +const cwAgentConfigPath = '/opt/aws/amazon-cloudwatch-agent/etc/amazon-cloudwatch-agent.json'; +const cwAgentTempPath = '/tmp/amazon-cloudwatch-agent.json'; // Temporary download location +const efsDnsName = `${fileSystem.fileSystemId}.efs.${cdk.Stack.of(self).region}.${cdk.Stack.of(self).urlSuffix}`; + +// Add commands to the Ollama UserData +ollamaUsrData.addCommands( + // Spread the base user data commands + ...usrdata(logGroup.logGroupName, "ollama").render().split('\n').filter(line => line.trim() !== ''), + + // Install EFS utilities + 'echo "Installing EFS utilities for Ollama..."', + 'sudo dnf install -y amazon-efs-utils nfs-utils', + + // Start Ollama-specific setup + 'echo "Starting Ollama specific setup..."', + 'echo "Configuring CloudWatch Agent for GPU metrics..."', + + // --- Download CW Agent config from S3 Asset --- + `echo "Downloading CW Agent config from S3..."`, + // Use aws cli to copy from the S3 location provided by the asset object + // The instance needs NAT access (which it has) and S3 permissions (granted above) + `aws s3 cp ${cwAgentConfigAsset.s3ObjectUrl} ${cwAgentTempPath}`, + // Ensure target directory exists and move the file into place + `sudo mkdir -p $(dirname ${cwAgentConfigPath})`, + `sudo mv ${cwAgentTempPath} ${cwAgentConfigPath}`, + `sudo chmod 644 ${cwAgentConfigPath}`, + `sudo chown root:root ${cwAgentConfigPath}`, // Ensure root ownership + 'echo "CW Agent config downloaded and placed."', + + // --- Enable and Start the CloudWatch Agent Service --- + 'echo "Enabling CloudWatch Agent service..."', + 'sudo systemctl enable amazon-cloudwatch-agent', + 'echo "Starting CloudWatch Agent service..."', + 'sudo systemctl start amazon-cloudwatch-agent', + 'echo "CloudWatch Agent service started."', + + // --- Mount EFS using standard NFSv4.1 --- + // Use the manually constructed EFS DNS name + `echo "Mounting EFS filesystem using NFSv4.1 and DNS Name: ${efsDnsName}"...`, // Use variable here + `sudo mkdir -p ${ollamaModelDirectory}`, // Ensure mount point exists + // Standard NFS mount command with recommended options for EFS + `sudo mount -t nfs4 -o nfsvers=4.1,rsize=1048576,wsize=1048576,hard,timeo=600,retrans=2,noresvport ${efsDnsName}:/ ${ollamaModelDirectory}`, // Use variable here + // Update fstab to use NFS4 and the DNS name for persistence + `echo "${efsDnsName}:/ ${ollamaModelDirectory} nfs4 nfsvers=4.1,rsize=1048576,wsize=1048576,hard,timeo=600,retrans=2,noresvport,_netdev 0 0" | sudo tee -a /etc/fstab`, // Use variable here + // Set ownership for the application user + `sudo chown ec2-user:ec2-user ${ollamaModelDirectory}`, + 'echo "EFS mounted successfully."', + + // --- Start Ollama container --- + 'echo "Starting Ollama container..."', + 'sudo docker run -d --name ollama \\', + ' --gpus all \\', + ' -p 0.0.0.0:11434:11434 \\', + ` -v ${ollamaModelDirectory}:/root/.ollama \\`, + ' --restart unless-stopped \\', + ' ollama/ollama serve', + + // --- Pull initial model in background --- + '(', + ' echo "Waiting for Ollama service (background task)..."', + ' sleep 60', + ' echo "Pulling default Ollama model (llama3.1:8b) in background..."', + ' sudo docker exec ollama ollama pull llama3.1:8b || echo "Failed to pull default model initially, may need manual pull later."', + ' echo "Background model pull task finished."', + ') &', + 'disown', + 'echo "Ollama setup script finished."' +); // End of ollamaUsrData.addCommands // --- Launch Templates diff --git a/cdk/lib/cdk-stack.ts b/cdk/lib/cdk-stack.ts index 53a33da7cd..3aa2f3030e 100644 --- a/cdk/lib/cdk-stack.ts +++ b/cdk/lib/cdk-stack.ts @@ -3,6 +3,7 @@ import * as ec2 from 'aws-cdk-lib/aws-ec2'; import * as elbv2 from 'aws-cdk-lib/aws-elasticloadbalancingv2'; import * as logs from 'aws-cdk-lib/aws-logs'; import * as secretsmanager from 'aws-cdk-lib/aws-secretsmanager'; +import * as iam from 'aws-cdk-lib/aws-iam'; import * as sns from 'aws-cdk-lib/aws-sns'; import * as subscriptions from 'aws-cdk-lib/aws-sns-subscriptions'; import * as efs from 'aws-cdk-lib/aws-efs'; @@ -76,9 +77,9 @@ export class CdkStack extends cdk.Stack { // Allow Delphi -> Ollama ollamaSecurityGroup.addIngressRule( - delphiSecurityGroup, + ec2.Peer.ipv4(vpc.vpcCidrBlock), // Allows traffic from any private IP within the VPC ec2.Port.tcp(ollamaPort), - `Allow Delphi access on ${ollamaPort}` + `Allow NLB traffic on ${ollamaPort} from VPC` ); // Allow Ollama -> EFS efsSecurityGroup.addIngressRule( @@ -132,6 +133,23 @@ export class CdkStack extends cdk.Stack { const { dbSubnetGroup, db, dbSecretArnParam, dbHostParam, dbPortParam } = createDBResources(this, vpc); // --- EFS for Ollama Models + const fileSystemPolicyDocument = new iam.PolicyDocument({ + statements: [ + new iam.PolicyStatement({ + effect: iam.Effect.ALLOW, + actions: [ + "elasticfilesystem:ClientMount", + "elasticfilesystem:ClientWrite", + "elasticfilesystem:ClientRootAccess", + ], + principals: [new iam.AnyPrincipal()], + resources: ["*"], // Applies to the filesystem this policy is attached to + conditions: { + Bool: { "elasticfilesystem:AccessedViaMountTarget": "true" } + } + }) + ] + }); const fileSystem = new efs.FileSystem(this, 'OllamaModelFileSystem', { vpc, encrypted: true, @@ -141,6 +159,7 @@ export class CdkStack extends cdk.Stack { removalPolicy: cdk.RemovalPolicy.RETAIN, securityGroup: efsSecurityGroup, vpcSubnets: { subnetGroupName: 'PrivateWithEgress' }, + fileSystemPolicy: fileSystemPolicyDocument, }); // launch templates From ef99be6ea1d76fa39a051f1aaf8d157dd8331792 Mon Sep 17 00:00:00 2001 From: tevko Date: Thu, 8 May 2025 18:23:43 -0500 Subject: [PATCH 05/42] docker fixes, add block storage --- cdk/launchTemplates.ts | 18 ++++++++++ docker-compose.yml | 3 -- scripts/after_install.sh | 74 +++++++++++++++++++++------------------- 3 files changed, 56 insertions(+), 39 deletions(-) diff --git a/cdk/launchTemplates.ts b/cdk/launchTemplates.ts index a647487269..eca0ba1bed 100644 --- a/cdk/launchTemplates.ts +++ b/cdk/launchTemplates.ts @@ -181,6 +181,15 @@ ollamaUsrData.addCommands( securityGroup: delphiSecurityGroup, keyPair: delphiSmallKeyPair, role: instanceRole, + blockDevices: [ + { + deviceName: '/dev/xvda', + volume: ec2.BlockDeviceVolume.ebs(50, { + volumeType: ec2.EbsDeviceVolumeType.GP3, + deleteOnTermination: true, + }), + }, + ], }); // Delphi Large Launch Template const delphiLargeLaunchTemplate = new ec2.LaunchTemplate(self, 'DelphiLargeLaunchTemplate', { @@ -190,6 +199,15 @@ ollamaUsrData.addCommands( securityGroup: delphiSecurityGroup, keyPair: delphiLargeKeyPair, role: instanceRole, + blockDevices: [ + { + deviceName: '/dev/xvda', + volume: ec2.BlockDeviceVolume.ebs(100, { + volumeType: ec2.EbsDeviceVolumeType.GP3, + deleteOnTermination: true, + }), + }, + ], }); // Ollama Launch Template const ollamaLaunchTemplate = new ec2.LaunchTemplate(self, 'OllamaLaunchTemplate', { diff --git a/docker-compose.yml b/docker-compose.yml index 4621d2ddaf..c0f50bd1b2 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -103,9 +103,6 @@ services: - DATABASE_USER=${POSTGRES_USER:-christian} - DATABASE_PASSWORD=${POSTGRES_PASSWORD:-polis123} - DATABASE_SSL_MODE=${DATABASE_SSL_MODE:-disable} - depends_on: - - dynamodb - - ollama networks: - "polis-net" extra_hosts: diff --git a/scripts/after_install.sh b/scripts/after_install.sh index cd11404499..9849482e37 100644 --- a/scripts/after_install.sh +++ b/scripts/after_install.sh @@ -2,6 +2,9 @@ set -e set -x +# MINIMAL CHANGE: Ensure parent directory exists before trying to cd into it +sudo mkdir -p /opt/polis + cd /opt/polis sudo yum install -y git GIT_REPO_URL="https://github.com/compdemocracy/polis.git" @@ -9,9 +12,11 @@ GIT_BRANCH="stable" if [ ! -d "polis" ]; then echo "Cloning public repository from $GIT_REPO_URL, branch: $GIT_BRANCH (HTTPS - Public Repo)" - git clone --depth 1 -b "$GIT_BRANCH" "$GIT_REPO_URL" polis + # MINIMAL CHANGE: Add sudo to the clone command + sudo git clone --depth 1 -b "$GIT_BRANCH" "$GIT_REPO_URL" polis else echo "Polis directory already exists, skipping cloning, pulling instead" + # No change needed here if 'else' block is entered, as subsequent commands already use sudo fi cd polis @@ -22,7 +27,7 @@ sudo git reset --hard origin/$GIT_BRANCH && sudo git pull # --- Fetch pre-configured .env from SSM Parameter Store --- PRE_CONFIGURED_ENV=$(aws secretsmanager get-secret-value --secret-id polis-web-app-env-vars --query SecretString --output text --region us-east-1) - +# Original check if [ -z "$PRE_CONFIGURED_ENV" ]; then echo "Error: Could not retrieve pre-configured .env from SSM Parameter polis-web-app-env-vars" exit 1 @@ -32,10 +37,11 @@ echo "Retrieved pre-configured .env from SSM Parameter" # --- Create/Overwrite .env file with pre-configured content --- echo "Creating/Overwriting .env file with pre-configured content from SSM" -echo "$PRE_CONFIGURED_ENV" > .env +echo "$PRE_CONFIGURED_ENV" | sudo tee .env > /dev/null echo ".env file created/overwritten with pre-configured content." # --- Database Configuration and Environment Variables from Secrets Manager --- +# Original logic and commands preserved # 1. Get Secret ARN from SSM Parameter SECRET_ARN=$(aws ssm get-parameter --name /polis/db-secret-arn --query 'Parameter.Value' --output text --region us-east-1) @@ -67,35 +73,37 @@ DB_PORT=$(aws ssm get-parameter --name "/polis/db-port" --query 'Parameter.Value # --- Construct DATABASE_URL using values from Secrets Manager AND SSM Parameters --- DATABASE_URL="postgres://${DB_USERNAME}:${DB_PASSWORD}@${DB_HOST}:${DB_PORT}/${DB_NAME}" -echo "Constructed DATABASE_URL: $DATABASE_URL" +echo "Constructed DATABASE_URL: $DATABASE_URL" # Original logging # --- Append DATABASE_URL to the end of .env --- echo "Appending DATABASE_URL to .env" -echo "DATABASE_URL=$DATABASE_URL" >> .env +printf "\nDATABASE_URL=%s\n" "$DATABASE_URL" | sudo tee -a .env > /dev/null +# Original service detection SERVICE_FROM_FILE=$(cat /tmp/service_type.txt) echo "DEBUG: Service type read from /tmp/service_type.txt: [$SERVICE_FROM_FILE]" +# Original Docker cleanup/start logic echo "Stopping and removing existing Docker containers..." -/usr/local/bin/docker-compose down || true # Stop all services, ignore errors if none running -docker rm -f $(docker ps -aq) || true # Forcefully remove all containers, ignore errors +sudo /usr/local/bin/docker-compose down || true +sudo docker rm -f $(docker ps -aq) || true echo "Docker containers stopped and removed." -yes | docker system prune -a --filter "until=72h" +yes | sudo docker system prune -a --filter "until=72h" echo "Docker cache cleared" -/usr/local/bin/docker-compose config +sudo /usr/local/bin/docker-compose config if [ "$SERVICE_FROM_FILE" == "server" ]; then echo "Starting docker-compose up for 'server' and 'nginx-proxy' services" - /usr/local/bin/docker-compose up -d server nginx-proxy --build --force-recreate + sudo /usr/local/bin/docker-compose up -d server nginx-proxy --build --force-recreate elif [ "$SERVICE_FROM_FILE" == "math" ]; then echo "Starting docker-compose up for 'math' service" - /usr/local/bin/docker-compose up -d math --build --force-recreate + sudo /usr/local/bin/docker-compose up -d math --build --force-recreate elif [ "$SERVICE_FROM_FILE" == "delphi" ]; then echo "Starting docker-compose up for 'delphi' service" echo "Fetching Ollama Service URL for Delphi..." - OLLAMA_URL=$(aws secretsmanager get-secret-value --secret-id /polis/ollama-service-url --query SecretString --output text --region us-east-1) + OLLAMA_URL=$(aws secretsmanager get-secret-value --secret-id /polis/ollama-service-url --query SecretString --output text --region us-east-1) if [ -z "$OLLAMA_URL" ]; then echo "Error: Could not retrieve Ollama Service URL from Secrets Manager: /polis/ollama-service-url" @@ -103,18 +111,14 @@ elif [ "$SERVICE_FROM_FILE" == "delphi" ]; then fi echo "Retrieved Ollama Service URL." - # Append OLLAMA_HOST to .env echo "Appending OLLAMA_HOST to .env for Delphi" - # Using printf for slightly safer appending in case of special characters in URL - printf "\nOLLAMA_HOST=%s\n" "$OLLAMA_URL" >> .env + printf "\nOLLAMA_HOST=%s\n" "$OLLAMA_URL" | sudo tee -a .env > /dev/null echo "OLLAMA_HOST appended." - - # Check if instance size file exists + if [ -f "/tmp/instance_size.txt" ]; then INSTANCE_SIZE=$(cat /tmp/instance_size.txt) echo "Instance size detected: $INSTANCE_SIZE" - - # Set environment variables based on instance size + if [ "$INSTANCE_SIZE" == "small" ]; then echo "Configuring delphi for small instance" export DELPHI_INSTANCE_TYPE="small" @@ -137,13 +141,12 @@ elif [ "$SERVICE_FROM_FILE" == "delphi" ]; then export DELPHI_CONTAINER_MEMORY="4g" export DELPHI_CONTAINER_CPUS="1" fi - - # Add environment variables to .env file - echo "DELPHI_INSTANCE_TYPE=$DELPHI_INSTANCE_TYPE" >> .env - echo "DELPHI_MAX_WORKERS=$DELPHI_MAX_WORKERS" >> .env - echo "DELPHI_WORKER_MEMORY=$DELPHI_WORKER_MEMORY" >> .env - echo "DELPHI_CONTAINER_MEMORY=$DELPHI_CONTAINER_MEMORY" >> .env - echo "DELPHI_CONTAINER_CPUS=$DELPHI_CONTAINER_CPUS" >> .env + + printf "\nDELPHI_INSTANCE_TYPE=%s\n" "$DELPHI_INSTANCE_TYPE" | sudo tee -a .env > /dev/null + printf "DELPHI_MAX_WORKERS=%s\n" "$DELPHI_MAX_WORKERS" | sudo tee -a .env > /dev/null + printf "DELPHI_WORKER_MEMORY=%s\n" "$DELPHI_WORKER_MEMORY" | sudo tee -a .env > /dev/null + printf "DELPHI_CONTAINER_MEMORY=%s\n" "$DELPHI_CONTAINER_MEMORY" | sudo tee -a .env > /dev/null + printf "DELPHI_CONTAINER_CPUS=%s\n" "$DELPHI_CONTAINER_CPUS" | sudo tee -a .env > /dev/null else echo "Instance size file not found, using default configuration" export DELPHI_INSTANCE_TYPE="default" @@ -151,17 +154,16 @@ elif [ "$SERVICE_FROM_FILE" == "delphi" ]; then export DELPHI_WORKER_MEMORY="1g" export DELPHI_CONTAINER_MEMORY="4g" export DELPHI_CONTAINER_CPUS="1" - - echo "DELPHI_INSTANCE_TYPE=$DELPHI_INSTANCE_TYPE" >> .env - echo "DELPHI_MAX_WORKERS=$DELPHI_MAX_WORKERS" >> .env - echo "DELPHI_WORKER_MEMORY=$DELPHI_WORKER_MEMORY" >> .env - echo "DELPHI_CONTAINER_MEMORY=$DELPHI_CONTAINER_MEMORY" >> .env - echo "DELPHI_CONTAINER_CPUS=$DELPHI_CONTAINER_CPUS" >> .env + + printf "\nDELPHI_INSTANCE_TYPE=%s\n" "$DELPHI_INSTANCE_TYPE" | sudo tee -a .env > /dev/null + printf "DELPHI_MAX_WORKERS=%s\n" "$DELPHI_MAX_WORKERS" | sudo tee -a .env > /dev/null + printf "DELPHI_WORKER_MEMORY=%s\n" "$DELPHI_WORKER_MEMORY" | sudo tee -a .env > /dev/null + printf "DELPHI_CONTAINER_MEMORY=%s\n" "$DELPHI_CONTAINER_MEMORY" | sudo tee -a .env > /dev/null + printf "DELPHI_CONTAINER_CPUS=%s\n" "$DELPHI_CONTAINER_CPUS" | sudo tee -a .env > /dev/null fi - - # Start delphi service - /usr/local/bin/docker-compose up -d delphi --build --force-recreate + + sudo /usr/local/bin/docker-compose up -d delphi --build --force-recreate else echo "Error: Unknown service type: [$SERVICE_FROM_FILE]. Starting all services (default docker-compose up -d)" - /usr/local/bin/docker-compose up -d --build --force-recreate # Fallback + sudo /usr/local/bin/docker-compose up -d --build --force-recreate fi \ No newline at end of file From 501e027bf878a27d186f18e3bb59f82627f89c61 Mon Sep 17 00:00:00 2001 From: tevko Date: Thu, 8 May 2025 19:32:18 -0500 Subject: [PATCH 06/42] remove default dynamo endpoint --- delphi/create_dynamodb_tables.py | 6 +++--- delphi/polismath/database/dynamodb.py | 2 +- delphi/polismath/run_math_pipeline.py | 2 +- delphi/scripts/delphi_cli.py | 2 +- delphi/scripts/job_poller.py | 6 +++--- delphi/start_poller.sh | 2 +- .../umap_narrative/500_generate_embedding_umap_cluster.py | 5 ----- delphi/umap_narrative/600_generate_llm_topic_names.py | 6 +----- delphi/umap_narrative/700_datamapplot_for_layer.py | 5 +---- delphi/umap_narrative/702_consensus_divisive_datamapplot.py | 2 +- delphi/umap_narrative/polismath_commentgraph/cli.py | 3 --- delphi/umap_narrative/run_pipeline.py | 5 ----- 12 files changed, 13 insertions(+), 33 deletions(-) diff --git a/delphi/create_dynamodb_tables.py b/delphi/create_dynamodb_tables.py index 174ebcf474..32cc3a3d4b 100644 --- a/delphi/create_dynamodb_tables.py +++ b/delphi/create_dynamodb_tables.py @@ -9,7 +9,7 @@ python create_dynamodb_tables.py [options] Options: - --endpoint-url ENDPOINT_URL DynamoDB endpoint URL (default: http://localhost:8000) + --endpoint-url ENDPOINT_URL DynamoDB endpoint URL --region REGION AWS region (default: us-west-2) --delete-existing Delete existing tables before creating new ones --evoc-only Create only EVōC tables @@ -399,7 +399,7 @@ def create_tables(endpoint_url=None, region_name='us-west-2', aws_profile=None): # Use the environment variable if endpoint_url is not provided if endpoint_url is None: - endpoint_url = os.environ.get('DYNAMODB_ENDPOINT', 'http://localhost:8000') + endpoint_url = os.environ.get('DYNAMODB_ENDPOINT') logger.info(f"Creating tables with DynamoDB endpoint: {endpoint_url}") """ @@ -471,7 +471,7 @@ def main(): # Parse arguments parser = argparse.ArgumentParser(description='Create DynamoDB tables for Delphi system') parser.add_argument('--endpoint-url', type=str, default=None, - help='DynamoDB endpoint URL (default: use DYNAMODB_ENDPOINT env var or http://localhost:8000)') + help='DynamoDB endpoint URL (default: use DYNAMODB_ENDPOINT env var)') parser.add_argument('--region', type=str, default='us-west-2', help='AWS region (default: us-west-2)') parser.add_argument('--delete-existing', action='store_true', diff --git a/delphi/polismath/database/dynamodb.py b/delphi/polismath/database/dynamodb.py index 86cb093b41..cb7b07cad9 100644 --- a/delphi/polismath/database/dynamodb.py +++ b/delphi/polismath/database/dynamodb.py @@ -22,7 +22,7 @@ class DynamoDBClient: """Client for interacting with DynamoDB for Polis math data.""" def __init__(self, - endpoint_url: Optional[str] = 'http://localhost:8000', + endpoint_url: Optional[str] = None, region_name: str = 'us-west-2', aws_access_key_id: Optional[str] = None, aws_secret_access_key: Optional[str] = None): diff --git a/delphi/polismath/run_math_pipeline.py b/delphi/polismath/run_math_pipeline.py index 584d0f6b8f..f4518b39be 100644 --- a/delphi/polismath/run_math_pipeline.py +++ b/delphi/polismath/run_math_pipeline.py @@ -330,7 +330,7 @@ def main(): logger.info(f"[{time.time() - start_time:.2f}s] Initializing DynamoDB client...") from polismath.database.dynamodb import DynamoDBClient # Use environment variables or sensible defaults for local/test - endpoint_url = os.environ.get('DYNAMODB_ENDPOINT', 'http://localhost:8000') + endpoint_url = os.environ.get('DYNAMODB_ENDPOINT') region_name = os.environ.get('AWS_REGION', 'us-west-2') aws_access_key_id = os.environ.get('AWS_ACCESS_KEY_ID', 'dummy') aws_secret_access_key = os.environ.get('AWS_SECRET_ACCESS_KEY', 'dummy') diff --git a/delphi/scripts/delphi_cli.py b/delphi/scripts/delphi_cli.py index f3b5ecdd32..10d855e799 100755 --- a/delphi/scripts/delphi_cli.py +++ b/delphi/scripts/delphi_cli.py @@ -54,7 +54,7 @@ def setup_dynamodb(endpoint_url=None, region='us-west-2'): """Set up DynamoDB connection.""" # Use environment variable if endpoint not provided if endpoint_url is None: - endpoint_url = os.environ.get('DYNAMODB_ENDPOINT', 'http://localhost:8000') + endpoint_url = os.environ.get('DYNAMODB_ENDPOINT') # For local development if 'localhost' in endpoint_url or 'host.docker.internal' in endpoint_url: diff --git a/delphi/scripts/job_poller.py b/delphi/scripts/job_poller.py index d9b581f98c..42c686792d 100755 --- a/delphi/scripts/job_poller.py +++ b/delphi/scripts/job_poller.py @@ -9,7 +9,7 @@ python job_poller.py [options] Options: - --endpoint-url=URL DynamoDB endpoint URL (default: http://localhost:8000) + --endpoint-url=URL DynamoDB endpoint URL --region=REGION AWS region (default: us-west-2) --interval=SECONDS Polling interval in seconds (default: 10) --max-workers=N Maximum number of concurrent workers (default: 1) @@ -43,7 +43,7 @@ class JobProcessor: def __init__(self, endpoint_url=None, region='us-west-2'): """Initialize the job processor.""" - self.endpoint_url = endpoint_url or os.environ.get('DYNAMODB_ENDPOINT', 'http://localhost:8000') + self.endpoint_url = endpoint_url or os.environ.get('DYNAMODB_ENDPOINT') self.region = region self.worker_id = str(uuid.uuid4()) @@ -587,7 +587,7 @@ def main(): signal.signal(signal.SIGTERM, signal_handler) logger.info("Starting Delphi Job Poller Service") - logger.info(f"Endpoint URL: {args.endpoint_url or os.environ.get('DYNAMODB_ENDPOINT', 'http://localhost:8000')}") + logger.info(f"Endpoint URL: {args.endpoint_url or os.environ.get('DYNAMODB_ENDPOINT')}") logger.info(f"Region: {args.region}") logger.info(f"Polling interval: {args.interval} seconds") logger.info(f"Maximum workers: {args.max_workers}") diff --git a/delphi/start_poller.sh b/delphi/start_poller.sh index 47e9aa86c4..d31e56fb3b 100755 --- a/delphi/start_poller.sh +++ b/delphi/start_poller.sh @@ -8,7 +8,7 @@ SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" POLLER_SCRIPT="$SCRIPT_DIR/scripts/job_poller.py" # Default options -ENDPOINT_URL="${DYNAMODB_ENDPOINT:-http://localhost:8000}" +ENDPOINT_URL="${DYNAMODB_ENDPOINT}" POLL_INTERVAL="${POLL_INTERVAL:-10}" LOG_LEVEL="${LOG_LEVEL:-INFO}" MAX_WORKERS="${MAX_WORKERS:-1}" diff --git a/delphi/umap_narrative/500_generate_embedding_umap_cluster.py b/delphi/umap_narrative/500_generate_embedding_umap_cluster.py index a3b67857f9..b82836620c 100755 --- a/delphi/umap_narrative/500_generate_embedding_umap_cluster.py +++ b/delphi/umap_narrative/500_generate_embedding_umap_cluster.py @@ -68,11 +68,6 @@ def setup_environment(db_host=None, db_port=None, db_name=None, db_user=None, db logger.info(f"- DATABASE: {os.environ.get('DATABASE_NAME')}") logger.info(f"- USER: {os.environ.get('DATABASE_USER')}") - # DynamoDB settings (for local DynamoDB) - # Only set if not already in environment - if not os.environ.get('DYNAMODB_ENDPOINT'): - os.environ['DYNAMODB_ENDPOINT'] = 'http://localhost:8000' - # Log the endpoint being used endpoint = os.environ.get('DYNAMODB_ENDPOINT') logger.info(f"Using DynamoDB endpoint: {endpoint}") diff --git a/delphi/umap_narrative/600_generate_llm_topic_names.py b/delphi/umap_narrative/600_generate_llm_topic_names.py index 9c937cbb2a..8cd6ebfb14 100755 --- a/delphi/umap_narrative/600_generate_llm_topic_names.py +++ b/delphi/umap_narrative/600_generate_llm_topic_names.py @@ -47,10 +47,6 @@ def setup_environment(dynamo_endpoint=None): if dynamo_endpoint: os.environ['DYNAMODB_ENDPOINT'] = dynamo_endpoint elif not os.environ.get('DYNAMODB_ENDPOINT'): - # Only set if not already in environment - if not os.environ.get('DYNAMODB_ENDPOINT'): - os.environ['DYNAMODB_ENDPOINT'] = 'http://localhost:8000' - # Log the endpoint being used endpoint = os.environ.get('DYNAMODB_ENDPOINT') logger.info(f"Using DynamoDB endpoint: {endpoint}") @@ -773,7 +769,7 @@ def main(): parser.add_argument('--output_dir', type=str, default="polis_data", help='Base directory for output files (default: polis_data)') parser.add_argument('--dynamo_endpoint', type=str, default=None, - help='DynamoDB endpoint URL (default: http://localhost:8000)') + help='DynamoDB endpoint URL') parser.add_argument('--start_cluster', type=int, default=None, help='Starting cluster ID for processing a range (inclusive)') parser.add_argument('--end_cluster', type=int, default=None, diff --git a/delphi/umap_narrative/700_datamapplot_for_layer.py b/delphi/umap_narrative/700_datamapplot_for_layer.py index accb68c86e..74c7a1c913 100644 --- a/delphi/umap_narrative/700_datamapplot_for_layer.py +++ b/delphi/umap_narrative/700_datamapplot_for_layer.py @@ -240,9 +240,6 @@ def setup_environment(db_host=None, db_port=None, db_name=None, db_user=None, db # DynamoDB settings (for local DynamoDB) if not os.environ.get('DYNAMODB_ENDPOINT'): - # Only set if not already in environment - if not os.environ.get('DYNAMODB_ENDPOINT'): - os.environ['DYNAMODB_ENDPOINT'] = 'http://localhost:8000' # Log the endpoint being used endpoint = os.environ.get('DYNAMODB_ENDPOINT') @@ -939,7 +936,7 @@ def main(): parser.add_argument('--output_dir', type=str, default=None, help='Directory to save the visualization') parser.add_argument('--dynamo_endpoint', type=str, default=None, - help='DynamoDB endpoint URL (default: http://localhost:8000)') + help='DynamoDB endpoint URL') args = parser.parse_args() diff --git a/delphi/umap_narrative/702_consensus_divisive_datamapplot.py b/delphi/umap_narrative/702_consensus_divisive_datamapplot.py index f97f0b7ec9..2f285d4bef 100755 --- a/delphi/umap_narrative/702_consensus_divisive_datamapplot.py +++ b/delphi/umap_narrative/702_consensus_divisive_datamapplot.py @@ -31,7 +31,7 @@ } DYNAMODB_CONFIG = { - 'endpoint_url': os.environ.get('DYNAMODB_ENDPOINT', 'http://localhost:8000'), + 'endpoint_url': os.environ.get('DYNAMODB_ENDPOINT'), 'region': os.environ.get('AWS_REGION', 'us-west-2'), 'access_key': os.environ.get('AWS_ACCESS_KEY_ID', 'fakeMyKeyId'), 'secret_key': os.environ.get('AWS_SECRET_ACCESS_KEY', 'fakeSecretAccessKey') diff --git a/delphi/umap_narrative/polismath_commentgraph/cli.py b/delphi/umap_narrative/polismath_commentgraph/cli.py index 763119fa7c..61dad431a2 100644 --- a/delphi/umap_narrative/polismath_commentgraph/cli.py +++ b/delphi/umap_narrative/polismath_commentgraph/cli.py @@ -305,9 +305,6 @@ def lambda_local(args): os.environ['DATABASE_PASSWORD'] = args.pg_password # Set up DynamoDB environment variables for local testing - # Only set if not already in environment - if not os.environ.get('DYNAMODB_ENDPOINT'): - os.environ['DYNAMODB_ENDPOINT'] = 'http://localhost:8000' # Log the endpoint being used logger.info(f"Using DynamoDB endpoint: {os.environ.get('DYNAMODB_ENDPOINT')}") diff --git a/delphi/umap_narrative/run_pipeline.py b/delphi/umap_narrative/run_pipeline.py index 76bce6e35d..b6aeb2a001 100755 --- a/delphi/umap_narrative/run_pipeline.py +++ b/delphi/umap_narrative/run_pipeline.py @@ -73,11 +73,6 @@ def setup_environment(db_host=None, db_port=None, db_name=None, db_user=None, db # DynamoDB settings (for local DynamoDB) # Don't override if already set in environment dynamo_endpoint = os.environ.get('DYNAMODB_ENDPOINT') - if not dynamo_endpoint: - os.environ['DYNAMODB_ENDPOINT'] = 'http://localhost:8000' - logger.info("Setting default DynamoDB endpoint: http://localhost:8000") - else: - logger.info(f"Using existing DynamoDB endpoint: {dynamo_endpoint}") # Always set these credentials for local development if not already set if not os.environ.get('AWS_ACCESS_KEY_ID'): From 6b9fe93ed524f1ca4a4a0b0f082de0a7edb931cc Mon Sep 17 00:00:00 2001 From: tevko Date: Thu, 8 May 2025 20:06:16 -0500 Subject: [PATCH 07/42] endpoint check fix --- delphi/scripts/delphi_cli.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/delphi/scripts/delphi_cli.py b/delphi/scripts/delphi_cli.py index 10d855e799..f4ea331080 100755 --- a/delphi/scripts/delphi_cli.py +++ b/delphi/scripts/delphi_cli.py @@ -57,9 +57,10 @@ def setup_dynamodb(endpoint_url=None, region='us-west-2'): endpoint_url = os.environ.get('DYNAMODB_ENDPOINT') # For local development - if 'localhost' in endpoint_url or 'host.docker.internal' in endpoint_url: - os.environ.setdefault('AWS_ACCESS_KEY_ID', 'fakeMyKeyId') - os.environ.setdefault('AWS_SECRET_ACCESS_KEY', 'fakeSecretAccessKey') + if endpoint_url: + if 'localhost' in endpoint_url or 'host.docker.internal' in endpoint_url: + os.environ.setdefault('AWS_ACCESS_KEY_ID', 'fakeMyKeyId') + os.environ.setdefault('AWS_SECRET_ACCESS_KEY', 'fakeSecretAccessKey') return boto3.resource('dynamodb', endpoint_url=endpoint_url, region_name=region) From ee0ade3da51b017d6db3b070e9713921ec1dbeb9 Mon Sep 17 00:00:00 2001 From: tevko Date: Fri, 9 May 2025 10:25:23 -0500 Subject: [PATCH 08/42] use us-east-1 --- cdk/iamRoles.ts | 26 +++ delphi/CLAUDE.md | 6 +- delphi/create_dynamodb_tables.py | 8 +- delphi/docker-compose.yml.bak | 2 +- delphi/polismath/database/dynamodb.py | 2 +- delphi/polismath/run_math_pipeline.py | 2 +- delphi/scripts/delphi_cli.py | 4 +- delphi/scripts/job_poller.py | 6 +- delphi/tests/test_postgres_real_data.py | 2 +- .../500_generate_embedding_umap_cluster.py | 6 +- .../600_generate_llm_topic_names.py | 2 +- .../700_datamapplot_for_layer.py | 2 +- .../701_static_datamapplot_for_layer.py | 2 +- .../702_CONSENSUS_DIVISIVE_README.md | 2 +- .../702_consensus_divisive_datamapplot.py | 4 +- .../800_report_topic_clusters.py | 4 +- .../polismath_commentgraph/DEPLOYMENT.md | 22 +- .../polismath_commentgraph/README.md | 14 +- .../polismath_commentgraph/cli.py | 6 +- .../polismath_commentgraph/utils/storage.py | 2 +- delphi/umap_narrative/run_pipeline.py | 4 +- example.env | 2 +- server/src/routes/delphi/jobs.ts | 200 ++++++++++-------- 23 files changed, 193 insertions(+), 137 deletions(-) diff --git a/cdk/iamRoles.ts b/cdk/iamRoles.ts index 5d628275a7..a1e080c812 100644 --- a/cdk/iamRoles.ts +++ b/cdk/iamRoles.ts @@ -1,5 +1,6 @@ import * as iam from 'aws-cdk-lib/aws-iam'; import { Construct } from 'constructs'; +import * as cdk from 'aws-cdk-lib'; export default (self: Construct) => { const instanceRole = new iam.Role(self, 'InstanceRole', { @@ -25,5 +26,30 @@ export default (self: Construct) => { iam.ManagedPolicy.fromAwsManagedPolicyName('service-role/AWSCodeDeployRole'), ], }); + const delphiJobQueueTableArn = cdk.Arn.format({ + service: 'dynamodb', + region: 'us-east-1', + account: cdk.Stack.of(self).account, + resource: 'table', + resourceName: 'Delphi_JobQueue', + }, cdk.Stack.of(self)); + + const delphiJobQueueTableIndexesArn = `${delphiJobQueueTableArn}/index/*`; + + instanceRole.addToPolicy(new iam.PolicyStatement({ + effect: iam.Effect.ALLOW, + actions: [ + "dynamodb:PutItem", + "dynamodb:GetItem", + "dynamodb:UpdateItem", + "dynamodb:DeleteItem", + "dynamodb:Query", + "dynamodb:Scan" + ], + resources: [ + delphiJobQueueTableArn, + delphiJobQueueTableIndexesArn + ], + })); return { instanceRole, codeDeployRole } } \ No newline at end of file diff --git a/delphi/CLAUDE.md b/delphi/CLAUDE.md index 8b352bed0a..7544d91443 100644 --- a/delphi/CLAUDE.md +++ b/delphi/CLAUDE.md @@ -84,7 +84,7 @@ Always use the commands above to determine the most substantial conversation whe ```bash docker exec polis-dev-delphi-1 python -c " import boto3, json - dynamodb = boto3.resource('dynamodb', endpoint_url='http://dynamodb:8000', region_name='us-west-2') + dynamodb = boto3.resource('dynamodb', endpoint_url='http://dynamodb:8000', region_name='us-east-1') table = dynamodb.Table('Delphi_JobQueue') job_id = '' # Replace with your job ID job = table.get_item(Key={'job_id': job_id})['Item'] @@ -98,7 +98,7 @@ Always use the commands above to determine the most substantial conversation whe ```bash docker exec polis-dev-delphi-1 python -c " import boto3, json - dynamodb = boto3.resource('dynamodb', endpoint_url='http://dynamodb:8000', region_name='us-west-2') + dynamodb = boto3.resource('dynamodb', endpoint_url='http://dynamodb:8000', region_name='us-east-1') table = dynamodb.Table('Delphi_JobQueue') job_id = '' # Replace with your job ID job = table.get_item(Key={'job_id': job_id})['Item'] @@ -138,7 +138,7 @@ When connecting to DynamoDB from the Delphi container, use these settings: DYNAMODB_ENDPOINT=http://host.docker.internal:8000 AWS_ACCESS_KEY_ID=dummy AWS_SECRET_ACCESS_KEY=dummy -AWS_REGION=us-west-2 +AWS_REGION=us-east-1 ``` These are configured in run_delphi.sh for all DynamoDB operations. diff --git a/delphi/create_dynamodb_tables.py b/delphi/create_dynamodb_tables.py index 32cc3a3d4b..01b51f8bcc 100644 --- a/delphi/create_dynamodb_tables.py +++ b/delphi/create_dynamodb_tables.py @@ -10,7 +10,7 @@ Options: --endpoint-url ENDPOINT_URL DynamoDB endpoint URL - --region REGION AWS region (default: us-west-2) + --region REGION AWS region (default: us-east-1) --delete-existing Delete existing tables before creating new ones --evoc-only Create only EVōC tables --polismath-only Create only Polis math tables @@ -394,7 +394,7 @@ def _create_tables(dynamodb, tables, existing_tables): return created_tables -def create_tables(endpoint_url=None, region_name='us-west-2', +def create_tables(endpoint_url=None, region_name='us-east-1', delete_existing=False, evoc_only=False, polismath_only=False, aws_profile=None): # Use the environment variable if endpoint_url is not provided @@ -472,8 +472,8 @@ def main(): parser = argparse.ArgumentParser(description='Create DynamoDB tables for Delphi system') parser.add_argument('--endpoint-url', type=str, default=None, help='DynamoDB endpoint URL (default: use DYNAMODB_ENDPOINT env var)') - parser.add_argument('--region', type=str, default='us-west-2', - help='AWS region (default: us-west-2)') + parser.add_argument('--region', type=str, default='us-east-1', + help='AWS region (default: us-east-1)') parser.add_argument('--delete-existing', action='store_true', help='Delete existing tables before creating new ones') parser.add_argument('--evoc-only', action='store_true', diff --git a/delphi/docker-compose.yml.bak b/delphi/docker-compose.yml.bak index ac6512efda..e04b62fb47 100644 --- a/delphi/docker-compose.yml.bak +++ b/delphi/docker-compose.yml.bak @@ -56,7 +56,7 @@ services: - DYNAMODB_ENDPOINT=http://host.docker.internal:8000 - AWS_ACCESS_KEY_ID=dummy - AWS_SECRET_ACCESS_KEY=dummy - - AWS_REGION=us-west-2 + - AWS_REGION=us-east-1 # Ollama settings - configurable via environment variables - OLLAMA_MODEL=${OLLAMA_MODEL:-llama3.1:8b} - OLLAMA_HOST=http://host.docker.internal:11434 # Connect to Ollama on host network diff --git a/delphi/polismath/database/dynamodb.py b/delphi/polismath/database/dynamodb.py index cb7b07cad9..a52480aedd 100644 --- a/delphi/polismath/database/dynamodb.py +++ b/delphi/polismath/database/dynamodb.py @@ -23,7 +23,7 @@ class DynamoDBClient: def __init__(self, endpoint_url: Optional[str] = None, - region_name: str = 'us-west-2', + region_name: str = 'us-east-1', aws_access_key_id: Optional[str] = None, aws_secret_access_key: Optional[str] = None): """ diff --git a/delphi/polismath/run_math_pipeline.py b/delphi/polismath/run_math_pipeline.py index f4518b39be..13e411dbae 100644 --- a/delphi/polismath/run_math_pipeline.py +++ b/delphi/polismath/run_math_pipeline.py @@ -331,7 +331,7 @@ def main(): from polismath.database.dynamodb import DynamoDBClient # Use environment variables or sensible defaults for local/test endpoint_url = os.environ.get('DYNAMODB_ENDPOINT') - region_name = os.environ.get('AWS_REGION', 'us-west-2') + region_name = os.environ.get('AWS_REGION', 'us-east-1') aws_access_key_id = os.environ.get('AWS_ACCESS_KEY_ID', 'dummy') aws_secret_access_key = os.environ.get('AWS_SECRET_ACCESS_KEY', 'dummy') dynamodb_client = DynamoDBClient( diff --git a/delphi/scripts/delphi_cli.py b/delphi/scripts/delphi_cli.py index f4ea331080..2c880bbfc3 100755 --- a/delphi/scripts/delphi_cli.py +++ b/delphi/scripts/delphi_cli.py @@ -50,7 +50,7 @@ def create_elegant_header(): console.print(header) console.print() -def setup_dynamodb(endpoint_url=None, region='us-west-2'): +def setup_dynamodb(endpoint_url=None, region='us-east-1'): """Set up DynamoDB connection.""" # Use environment variable if endpoint not provided if endpoint_url is None: @@ -777,7 +777,7 @@ def main(): # Common options parser.add_argument("--endpoint-url", help="DynamoDB endpoint URL") - parser.add_argument("--region", default="us-west-2", help="AWS region") + parser.add_argument("--region", default="us-east-1", help="AWS region") # Interactive mode is the default when no arguments are provided parser.add_argument("--interactive", action="store_true", diff --git a/delphi/scripts/job_poller.py b/delphi/scripts/job_poller.py index 42c686792d..76c13e40ea 100755 --- a/delphi/scripts/job_poller.py +++ b/delphi/scripts/job_poller.py @@ -10,7 +10,7 @@ Options: --endpoint-url=URL DynamoDB endpoint URL - --region=REGION AWS region (default: us-west-2) + --region=REGION AWS region (default: us-east-1) --interval=SECONDS Polling interval in seconds (default: 10) --max-workers=N Maximum number of concurrent workers (default: 1) --log-level=LEVEL Logging level (default: INFO) @@ -41,7 +41,7 @@ class JobProcessor: """Process jobs from the Delphi_JobQueue.""" - def __init__(self, endpoint_url=None, region='us-west-2'): + def __init__(self, endpoint_url=None, region='us-east-1'): """Initialize the job processor.""" self.endpoint_url = endpoint_url or os.environ.get('DYNAMODB_ENDPOINT') self.region = region @@ -567,7 +567,7 @@ def main(): parser = argparse.ArgumentParser(description='Delphi Job Poller Service') parser.add_argument('--endpoint-url', type=str, default=None, help='DynamoDB endpoint URL') - parser.add_argument('--region', type=str, default='us-west-2', + parser.add_argument('--region', type=str, default='us-east-1', help='AWS region') parser.add_argument('--interval', type=int, default=10, help='Polling interval in seconds') diff --git a/delphi/tests/test_postgres_real_data.py b/delphi/tests/test_postgres_real_data.py index 73515e9f78..ba573b5646 100644 --- a/delphi/tests/test_postgres_real_data.py +++ b/delphi/tests/test_postgres_real_data.py @@ -40,7 +40,7 @@ def init_dynamodb(): # Create and initialize the client client = DynamoDBClient( endpoint_url='http://localhost:8000', - region_name='us-west-2', + region_name='us-east-1', aws_access_key_id='dummy', aws_secret_access_key='dummy' ) diff --git a/delphi/umap_narrative/500_generate_embedding_umap_cluster.py b/delphi/umap_narrative/500_generate_embedding_umap_cluster.py index b82836620c..0127494099 100755 --- a/delphi/umap_narrative/500_generate_embedding_umap_cluster.py +++ b/delphi/umap_narrative/500_generate_embedding_umap_cluster.py @@ -78,7 +78,7 @@ def setup_environment(db_host=None, db_port=None, db_name=None, db_user=None, db if not os.environ.get('AWS_SECRET_ACCESS_KEY'): os.environ['AWS_SECRET_ACCESS_KEY'] = 'fakeSecretAccessKey' if not os.environ.get('AWS_REGION') and not os.environ.get('AWS_DEFAULT_REGION'): - os.environ['AWS_DEFAULT_REGION'] = 'us-west-2' + os.environ['AWS_DEFAULT_REGION'] = 'us-east-1' def fetch_conversation_data(zid): """ @@ -373,7 +373,7 @@ def process_conversation(zid, export_dynamo=True): dynamo_storage = None if export_dynamo: dynamo_storage = DynamoDBStorage( - region_name='us-west-2', + region_name='us-east-1', endpoint_url=os.environ.get('DYNAMODB_ENDPOINT') ) @@ -513,7 +513,7 @@ def main(): # Process with mock data (store in DynamoDB if requested) if not args.no_dynamo: dynamo_storage = DynamoDBStorage( - region_name='us-west-2', + region_name='us-east-1', endpoint_url=os.environ.get('DYNAMODB_ENDPOINT') ) diff --git a/delphi/umap_narrative/600_generate_llm_topic_names.py b/delphi/umap_narrative/600_generate_llm_topic_names.py index 8cd6ebfb14..20a4cfcbe5 100755 --- a/delphi/umap_narrative/600_generate_llm_topic_names.py +++ b/delphi/umap_narrative/600_generate_llm_topic_names.py @@ -59,7 +59,7 @@ def setup_environment(dynamo_endpoint=None): os.environ['AWS_SECRET_ACCESS_KEY'] = 'fakeSecretAccessKey' if not os.environ.get('AWS_DEFAULT_REGION'): - os.environ['AWS_DEFAULT_REGION'] = 'us-west-2' + os.environ['AWS_DEFAULT_REGION'] = 'us-east-1' logger.info(f"DynamoDB endpoint: {os.environ.get('DYNAMODB_ENDPOINT')}") logger.info(f"AWS region: {os.environ.get('AWS_DEFAULT_REGION')}") diff --git a/delphi/umap_narrative/700_datamapplot_for_layer.py b/delphi/umap_narrative/700_datamapplot_for_layer.py index 74c7a1c913..7827c41b52 100644 --- a/delphi/umap_narrative/700_datamapplot_for_layer.py +++ b/delphi/umap_narrative/700_datamapplot_for_layer.py @@ -249,7 +249,7 @@ def setup_environment(db_host=None, db_port=None, db_name=None, db_user=None, db if not os.environ.get('AWS_SECRET_ACCESS_KEY'): os.environ['AWS_SECRET_ACCESS_KEY'] = 'fakeSecretAccessKey' if not os.environ.get('AWS_DEFAULT_REGION'): - os.environ['AWS_DEFAULT_REGION'] = 'us-west-2' + os.environ['AWS_DEFAULT_REGION'] = 'us-east-1' # S3 settings if not os.environ.get('AWS_S3_ENDPOINT'): diff --git a/delphi/umap_narrative/701_static_datamapplot_for_layer.py b/delphi/umap_narrative/701_static_datamapplot_for_layer.py index ad5a1b85a5..6c50b358e5 100755 --- a/delphi/umap_narrative/701_static_datamapplot_for_layer.py +++ b/delphi/umap_narrative/701_static_datamapplot_for_layer.py @@ -35,7 +35,7 @@ class DynamoDBStorage: def __init__(self, endpoint_url=None): self.endpoint_url = endpoint_url or os.environ.get("DYNAMODB_ENDPOINT", "http://dynamodb-local:8000") - self.region = os.environ.get("AWS_REGION", "us-west-2") + self.region = os.environ.get("AWS_REGION", "us-east-1") self.dynamodb = boto3.resource('dynamodb', endpoint_url=self.endpoint_url, region_name=self.region) # Define table names using the new Delphi_ naming scheme diff --git a/delphi/umap_narrative/702_CONSENSUS_DIVISIVE_README.md b/delphi/umap_narrative/702_CONSENSUS_DIVISIVE_README.md index 2b00c01abb..eaf94b8b31 100644 --- a/delphi/umap_narrative/702_CONSENSUS_DIVISIVE_README.md +++ b/delphi/umap_narrative/702_CONSENSUS_DIVISIVE_README.md @@ -44,7 +44,7 @@ export DATABASE_SSL_MODE="disable" # DynamoDB configuration export DYNAMODB_ENDPOINT="http://localhost:8000" -export AWS_REGION="us-west-2" +export AWS_REGION="us-east-1" export AWS_ACCESS_KEY_ID="fakeMyKeyId" export AWS_SECRET_ACCESS_KEY="fakeSecretAccessKey" diff --git a/delphi/umap_narrative/702_consensus_divisive_datamapplot.py b/delphi/umap_narrative/702_consensus_divisive_datamapplot.py index 2f285d4bef..039dc7d414 100755 --- a/delphi/umap_narrative/702_consensus_divisive_datamapplot.py +++ b/delphi/umap_narrative/702_consensus_divisive_datamapplot.py @@ -32,7 +32,7 @@ DYNAMODB_CONFIG = { 'endpoint_url': os.environ.get('DYNAMODB_ENDPOINT'), - 'region': os.environ.get('AWS_REGION', 'us-west-2'), + 'region': os.environ.get('AWS_REGION', 'us-east-1'), 'access_key': os.environ.get('AWS_ACCESS_KEY_ID', 'fakeMyKeyId'), 'secret_key': os.environ.get('AWS_SECRET_ACCESS_KEY', 'fakeSecretAccessKey') } @@ -103,7 +103,7 @@ def load_data_from_dynamodb(zid, layer_num=0): endpoint_url = os.environ.get('DYNAMODB_ENDPOINT', 'http://dynamodb-local:8000') dynamodb = boto3.resource('dynamodb', endpoint_url=endpoint_url, - region_name=os.environ.get('AWS_REGION', 'us-west-2'), + region_name=os.environ.get('AWS_REGION', 'us-east-1'), aws_access_key_id=os.environ.get('AWS_ACCESS_KEY_ID', 'fakeMyKeyId'), aws_secret_access_key=os.environ.get('AWS_SECRET_ACCESS_KEY', 'fakeSecretAccessKey')) diff --git a/delphi/umap_narrative/800_report_topic_clusters.py b/delphi/umap_narrative/800_report_topic_clusters.py index 58d6c1fad4..5c28519bf0 100755 --- a/delphi/umap_narrative/800_report_topic_clusters.py +++ b/delphi/umap_narrative/800_report_topic_clusters.py @@ -66,7 +66,7 @@ def __init__(self, table_name="Delphi_NarrativeReports", disable_cache=False): self.dynamodb = boto3.resource( 'dynamodb', endpoint_url=os.environ.get('DYNAMODB_ENDPOINT'), - region_name=os.environ.get('AWS_DEFAULT_REGION', 'us-west-2'), + region_name=os.environ.get('AWS_DEFAULT_REGION', 'us-east-1'), aws_access_key_id=os.environ.get('AWS_ACCESS_KEY_ID', 'fakeMyKeyId'), aws_secret_access_key=os.environ.get('AWS_SECRET_ACCESS_KEY', 'fakeSecretAccessKey') ) @@ -615,7 +615,7 @@ def load_comment_clusters_from_dynamodb(self, conversation_id): dynamodb = boto3.resource( 'dynamodb', endpoint_url=os.environ.get('DYNAMODB_ENDPOINT', 'http://host.docker.internal:8000'), - region_name=os.environ.get('AWS_REGION', 'us-west-2'), + region_name=os.environ.get('AWS_REGION', 'us-east-1'), aws_access_key_id=os.environ.get('AWS_ACCESS_KEY_ID', 'fakeMyKeyId'), aws_secret_access_key=os.environ.get('AWS_SECRET_ACCESS_KEY', 'fakeSecretAccessKey') ) diff --git a/delphi/umap_narrative/polismath_commentgraph/DEPLOYMENT.md b/delphi/umap_narrative/polismath_commentgraph/DEPLOYMENT.md index 0a685a93a8..a6af337caf 100644 --- a/delphi/umap_narrative/polismath_commentgraph/DEPLOYMENT.md +++ b/delphi/umap_narrative/polismath_commentgraph/DEPLOYMENT.md @@ -101,13 +101,13 @@ docker build -t polis-comment-graph-lambda . ```bash # Get the ECR login -aws ecr get-login-password --region us-west-2 | docker login --username AWS --password-stdin 123456789012.dkr.ecr.us-west-2.amazonaws.com +aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin 123456789012.dkr.ecr.us-east-1.amazonaws.com # Tag the image -docker tag polis-comment-graph-lambda:latest 123456789012.dkr.ecr.us-west-2.amazonaws.com/polis-comment-graph-lambda:latest +docker tag polis-comment-graph-lambda:latest 123456789012.dkr.ecr.us-east-1.amazonaws.com/polis-comment-graph-lambda:latest # Push the image -docker push 123456789012.dkr.ecr.us-west-2.amazonaws.com/polis-comment-graph-lambda:latest +docker push 123456789012.dkr.ecr.us-east-1.amazonaws.com/polis-comment-graph-lambda:latest ``` ## Creating the Lambda Function @@ -118,9 +118,9 @@ docker push 123456789012.dkr.ecr.us-west-2.amazonaws.com/polis-comment-graph-lam aws lambda create-function \ --function-name polis-comment-graph-lambda \ --package-type Image \ - --code ImageUri=123456789012.dkr.ecr.us-west-2.amazonaws.com/polis-comment-graph-lambda:latest \ + --code ImageUri=123456789012.dkr.ecr.us-east-1.amazonaws.com/polis-comment-graph-lambda:latest \ --role arn:aws:iam::123456789012:role/lambda-execution-role \ - --environment "Variables={DATABASE_HOST=polis-db.cluster-xyz.us-west-2.rds.amazonaws.com,DATABASE_NAME=polis,DATABASE_USER=polis}" \ + --environment "Variables={DATABASE_HOST=polis-db.cluster-xyz.us-east-1.rds.amazonaws.com,DATABASE_NAME=polis,DATABASE_USER=polis}" \ --timeout 300 \ --memory-size 1024 ``` @@ -141,9 +141,9 @@ aws sns create-topic --name polis-new-comment-topic # Create a subscription for the Lambda function aws sns subscribe \ - --topic-arn arn:aws:sns:us-west-2:123456789012:polis-new-comment-topic \ + --topic-arn arn:aws:sns:us-east-1:123456789012:polis-new-comment-topic \ --protocol lambda \ - --notification-endpoint arn:aws:lambda:us-west-2:123456789012:function:polis-comment-graph-lambda + --notification-endpoint arn:aws:lambda:us-east-1:123456789012:function:polis-comment-graph-lambda # Grant permission for SNS to invoke the Lambda aws lambda add-permission \ @@ -151,7 +151,7 @@ aws lambda add-permission \ --statement-id sns-new-comment \ --action lambda:InvokeFunction \ --principal sns.amazonaws.com \ - --source-arn arn:aws:sns:us-west-2:123456789012:polis-new-comment-topic + --source-arn arn:aws:sns:us-east-1:123456789012:polis-new-comment-topic ``` ### 2. CloudWatch Scheduled Event for Batch Processing @@ -165,7 +165,7 @@ aws events put-rule \ # Add the Lambda function as a target aws events put-targets \ --rule polis-daily-processing \ - --targets "Id"="1","Arn"="arn:aws:lambda:us-west-2:123456789012:function:polis-comment-graph-lambda","Input"="{\"event_type\":\"process_conversation\",\"conversation_id\":\"all\"}" + --targets "Id"="1","Arn"="arn:aws:lambda:us-east-1:123456789012:function:polis-comment-graph-lambda","Input"="{\"event_type\":\"process_conversation\",\"conversation_id\":\"all\"}" # Grant permission for CloudWatch Events to invoke the Lambda aws lambda add-permission \ @@ -173,7 +173,7 @@ aws lambda add-permission \ --statement-id cloudwatch-daily \ --action lambda:InvokeFunction \ --principal events.amazonaws.com \ - --source-arn arn:aws:events:us-west-2:123456789012:rule/polis-daily-processing + --source-arn arn:aws:events:us-east-1:123456789012:rule/polis-daily-processing ``` ## Testing the Deployment @@ -207,7 +207,7 @@ When you need to update the Lambda function: ```bash aws lambda update-function-code \ --function-name polis-comment-graph-lambda \ - --image-uri 123456789012.dkr.ecr.us-west-2.amazonaws.com/polis-comment-graph-lambda:latest + --image-uri 123456789012.dkr.ecr.us-east-1.amazonaws.com/polis-comment-graph-lambda:latest ``` ## Technical Notes diff --git a/delphi/umap_narrative/polismath_commentgraph/README.md b/delphi/umap_narrative/polismath_commentgraph/README.md index 8e301a78e8..1f2a14a50a 100644 --- a/delphi/umap_narrative/polismath_commentgraph/README.md +++ b/delphi/umap_narrative/polismath_commentgraph/README.md @@ -96,9 +96,9 @@ The service follows a serverless architecture: 2. Push to ECR: ```bash - aws ecr get-login-password --region us-west-2 | docker login --username AWS --password-stdin 123456789012.dkr.ecr.us-west-2.amazonaws.com - docker tag polis-comment-graph-lambda:latest 123456789012.dkr.ecr.us-west-2.amazonaws.com/polis-comment-graph-lambda:latest - docker push 123456789012.dkr.ecr.us-west-2.amazonaws.com/polis-comment-graph-lambda:latest + aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin 123456789012.dkr.ecr.us-east-1.amazonaws.com + docker tag polis-comment-graph-lambda:latest 123456789012.dkr.ecr.us-east-1.amazonaws.com/polis-comment-graph-lambda:latest + docker push 123456789012.dkr.ecr.us-east-1.amazonaws.com/polis-comment-graph-lambda:latest ``` 3. Create Lambda function using the AWS CLI: @@ -106,9 +106,9 @@ The service follows a serverless architecture: aws lambda create-function \ --function-name polis-comment-graph-lambda \ --package-type Image \ - --code ImageUri=123456789012.dkr.ecr.us-west-2.amazonaws.com/polis-comment-graph-lambda:latest \ + --code ImageUri=123456789012.dkr.ecr.us-east-1.amazonaws.com/polis-comment-graph-lambda:latest \ --role arn:aws:iam::123456789012:role/lambda-execution-role \ - --environment "Variables={DATABASE_HOST=polis-db.cluster-xyz.us-west-2.rds.amazonaws.com,DATABASE_NAME=polis,DATABASE_USER=polis}" \ + --environment "Variables={DATABASE_HOST=polis-db.cluster-xyz.us-east-1.rds.amazonaws.com,DATABASE_NAME=polis,DATABASE_USER=polis}" \ --timeout 300 \ --memory-size 1024 ``` @@ -121,7 +121,7 @@ The service follows a serverless architecture: - `DATABASE_USER`: PostgreSQL username - `DATABASE_PASSWORD`: PostgreSQL password - `DYNAMODB_ENDPOINT`: Optional DynamoDB endpoint for local development -- `AWS_REGION`: AWS region for DynamoDB and other services (default: us-west-2) +- `AWS_REGION`: AWS region for DynamoDB and other services (default: us-east-1) - `MODEL_CACHE_DIR`: Directory to cache SentenceTransformer models (default: /tmp/model_cache) - `LOG_LEVEL`: Logging level (default: INFO) @@ -178,7 +178,7 @@ Then create the required tables: python -c " import boto3 dynamodb = boto3.resource('dynamodb', endpoint_url='http://localhost:8000', - region_name='us-west-2', + region_name='us-east-1', aws_access_key_id='fakeMyKeyId', aws_secret_access_key='fakeSecretAccessKey') diff --git a/delphi/umap_narrative/polismath_commentgraph/cli.py b/delphi/umap_narrative/polismath_commentgraph/cli.py index 61dad431a2..3b4679cbaa 100644 --- a/delphi/umap_narrative/polismath_commentgraph/cli.py +++ b/delphi/umap_narrative/polismath_commentgraph/cli.py @@ -289,7 +289,7 @@ def lambda_local(args): context = type('obj', (object,), { 'function_name': 'lambda_local', 'aws_request_id': '12345', - 'invoked_function_arn': 'arn:aws:lambda:us-west-2:123456789012:function:lambda_local' + 'invoked_function_arn': 'arn:aws:lambda:us-east-1:123456789012:function:lambda_local' }) # Override environment variables if provided @@ -310,13 +310,13 @@ def lambda_local(args): logger.info(f"Using DynamoDB endpoint: {os.environ.get('DYNAMODB_ENDPOINT')}") os.environ['AWS_ACCESS_KEY_ID'] = 'fakeMyKeyId' os.environ['AWS_SECRET_ACCESS_KEY'] = 'fakeSecretAccessKey' - os.environ['AWS_DEFAULT_REGION'] = 'us-west-2' + os.environ['AWS_DEFAULT_REGION'] = 'us-east-1' # Reinitialize the DynamoDB storage with direct credentials from .utils.storage import DynamoDBStorage global dynamo_storage dynamo_storage = DynamoDBStorage( - region_name='us-west-2', + region_name='us-east-1', endpoint_url=os.environ.get('DYNAMODB_ENDPOINT') ) diff --git a/delphi/umap_narrative/polismath_commentgraph/utils/storage.py b/delphi/umap_narrative/polismath_commentgraph/utils/storage.py index 237f44b11e..187dc9afaa 100644 --- a/delphi/umap_narrative/polismath_commentgraph/utils/storage.py +++ b/delphi/umap_narrative/polismath_commentgraph/utils/storage.py @@ -379,7 +379,7 @@ def __init__(self, region_name: str = None, endpoint_url: str = None): endpoint_url: Optional endpoint URL for local DynamoDB """ # Get settings from environment variables with fallbacks - self.region_name = region_name or os.environ.get('AWS_REGION', 'us-west-2') + self.region_name = region_name or os.environ.get('AWS_REGION', 'us-east-1') self.endpoint_url = endpoint_url or os.environ.get('DYNAMODB_ENDPOINT') # Get AWS credentials from environment variables diff --git a/delphi/umap_narrative/run_pipeline.py b/delphi/umap_narrative/run_pipeline.py index b6aeb2a001..3ab7b7f687 100755 --- a/delphi/umap_narrative/run_pipeline.py +++ b/delphi/umap_narrative/run_pipeline.py @@ -82,7 +82,7 @@ def setup_environment(db_host=None, db_port=None, db_name=None, db_user=None, db os.environ['AWS_SECRET_ACCESS_KEY'] = 'fakeSecretAccessKey' if not os.environ.get('AWS_DEFAULT_REGION') and not os.environ.get('AWS_REGION'): - os.environ['AWS_DEFAULT_REGION'] = 'us-west-2' + os.environ['AWS_DEFAULT_REGION'] = 'us-east-1' def fetch_conversation_data(zid): """ @@ -1149,7 +1149,7 @@ def process_conversation(zid, export_dynamo=True, use_ollama=False): logger.info(f"Using DynamoDB endpoint from environment: {endpoint_url}") dynamo_storage = DynamoDBStorage( - region_name='us-west-2', + region_name='us-east-1', endpoint_url=endpoint_url ) diff --git a/example.env b/example.env index 62d3307a3c..7bd6dd9de9 100644 --- a/example.env +++ b/example.env @@ -127,7 +127,7 @@ MAX_REPORT_CACHE_DURATION= ###### DYNAMODB ###### # When using local DynamoDB, this should be http://dynamodb:8000. -# In production, set DYNAMODB_ENDPOINT to the cloud endpoint (e.g. https://dynamodb.us-west-2.amazonaws.com), +# In production, set DYNAMODB_ENDPOINT to the cloud endpoint (e.g. https://dynamodb.us-east-1.amazonaws.com), # or simply leave it blank. DYNAMODB_ENDPOINT=http://dynamodb:8000 diff --git a/server/src/routes/delphi/jobs.ts b/server/src/routes/delphi/jobs.ts index ade3883e5a..ede6fb1464 100644 --- a/server/src/routes/delphi/jobs.ts +++ b/server/src/routes/delphi/jobs.ts @@ -1,19 +1,19 @@ -import { v4 as uuidv4 } from 'uuid'; -import { Request, Response } from 'express'; -import { DynamoDB } from '@aws-sdk/client-dynamodb'; -import { DynamoDBDocument } from '@aws-sdk/lib-dynamodb'; -import logger from '../../utils/logger'; -import { getZidFromReport } from '../../utils/parameter'; +import { v4 as uuidv4 } from "uuid"; +import { Request, Response } from "express"; +import { DynamoDB } from "@aws-sdk/client-dynamodb"; +import { DynamoDBDocument } from "@aws-sdk/lib-dynamodb"; +import logger from "../../utils/logger"; +import { getZidFromReport } from "../../utils/parameter"; // Initialize DynamoDB client const dynamoDbClient = new DynamoDB({ // Use environment variables for endpoint and region, or the docker service name - endpoint: process.env.DYNAMODB_ENDPOINT || 'http://dynamodb:8000', - region: process.env.AWS_REGION || 'us-west-2', + endpoint: process.env.DYNAMODB_ENDPOINT || "http://dynamodb:8000", + region: process.env.AWS_REGION || "us-east-1", // For local development or Docker container credentials: { - accessKeyId: process.env.AWS_ACCESS_KEY_ID || 'DUMMYIDEXAMPLE', - secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY || 'DUMMYEXAMPLEKEY', + accessKeyId: process.env.AWS_ACCESS_KEY_ID || "DUMMYIDEXAMPLE", + secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY || "DUMMYEXAMPLEKEY", }, }); @@ -21,56 +21,64 @@ const dynamoDbClient = new DynamoDB({ const docClient = DynamoDBDocument.from(dynamoDbClient); // Handler for POST /api/v3/delphi/jobs - Create a new Delphi job -export async function handle_POST_delphi_jobs(req: Request, res: Response): Promise { +export async function handle_POST_delphi_jobs( + req: Request, + res: Response +): Promise { try { - logger.info(`Creating Delphi job with parameters: ${JSON.stringify(req.body)}`); - + logger.info( + `Creating Delphi job with parameters: ${JSON.stringify(req.body)}` + ); + // Extract parameters from request body - const { + const { report_id, conversation_id, - job_type = 'FULL_PIPELINE', + job_type = "FULL_PIPELINE", priority = 50, max_votes, batch_size, - model = 'claude-3-7-sonnet-20250219', - include_topics = true + model = "claude-3-7-sonnet-20250219", + include_topics = true, } = req.body; - + // Validate required parameters - if ((!report_id && !conversation_id)) { + if (!report_id && !conversation_id) { res.status(400).json({ - status: 'error', - error: 'Missing required parameter: either report_id or conversation_id must be provided', + status: "error", + error: + "Missing required parameter: either report_id or conversation_id must be provided", }); return; } - + // Convert report_id to conversation_id if needed // Assuming there's a mapping function or service to do this - const zid = conversation_id || (report_id ? await getConversationIdFromReportId(report_id) : null); - + const zid = + conversation_id || + (report_id ? await getConversationIdFromReportId(report_id) : null); + if (!zid) { res.status(400).json({ - status: 'error', - error: 'Could not determine conversation ID', + status: "error", + error: "Could not determine conversation ID", }); return; } - + // Generate a unique job ID const job_id = uuidv4(); - + // Current timestamp in ISO format const now = new Date().toISOString(); - + // Build job configuration based on the Python CLI implementation const jobConfig: any = {}; - - if (job_type === 'FULL_PIPELINE') { + + if (job_type === "FULL_PIPELINE") { // Full pipeline configs const stages = []; - + // PCA stage const pcaConfig: any = {}; if (max_votes) { @@ -80,120 +88,138 @@ export async function handle_POST_delphi_jobs(req: Request, res: Response): Prom pcaConfig.batch_size = parseInt(batch_size, 10); } stages.push({ stage: "PCA", config: pcaConfig }); - + // UMAP stage stages.push({ stage: "UMAP", config: { n_neighbors: 15, - min_dist: 0.1 - } + min_dist: 0.1, + }, }); - + // Report stage stages.push({ stage: "REPORT", config: { model: model, - include_topics: include_topics - } + include_topics: include_topics, + }, }); - + // Add stages and visualizations to job config jobConfig.stages = stages; jobConfig.visualizations = ["basic", "enhanced", "multilayer"]; } - + // Create job item with version number for optimistic locking const jobItem = { - job_id: job_id, // Primary key - status: 'PENDING', // Secondary index key - created_at: now, // Secondary index key + job_id: job_id, // Primary key + status: "PENDING", // Secondary index key + created_at: now, // Secondary index key updated_at: now, - version: 1, // Version for optimistic locking - started_at: "", // Using empty strings for nullable fields + version: 1, // Version for optimistic locking + started_at: "", // Using empty strings for nullable fields completed_at: "", - worker_id: "none", // Non-empty placeholder for index + worker_id: "none", // Non-empty placeholder for index job_type: job_type, priority: parseInt(String(priority), 10), - conversation_id: String(zid), // Using conversation_id + conversation_id: String(zid), // Using conversation_id retry_count: 0, max_retries: 3, - timeout_seconds: 7200, // 2 hours default timeout + timeout_seconds: 7200, // 2 hours default timeout job_config: JSON.stringify(jobConfig), job_results: JSON.stringify({}), logs: JSON.stringify({ entries: [ { timestamp: now, - level: 'INFO', - message: `Job created for conversation ${zid}` - } + level: "INFO", + message: `Job created for conversation ${zid}`, + }, ], - log_location: "" + log_location: "", }), - created_by: 'api' + created_by: "api", }; - + // Put item in DynamoDB try { - logger.info(`Putting job item in DynamoDB: ${JSON.stringify({ - TableName: 'Delphi_JobQueue', - Item: { job_id: jobItem.job_id, conversation_id: jobItem.conversation_id } - })}`); - + logger.info( + `Putting job item in DynamoDB: ${JSON.stringify({ + TableName: "Delphi_JobQueue", + Item: { + job_id: jobItem.job_id, + conversation_id: jobItem.conversation_id, + }, + })}` + ); + await docClient.put({ - TableName: 'Delphi_JobQueue', - Item: jobItem + TableName: "Delphi_JobQueue", + Item: jobItem, }); - + // Return success with job ID res.json({ - status: 'success', + status: "success", job_id: job_id, - conversation_id: zid + conversation_id: zid, }); } catch (dbError) { - logger.error(`Error writing to DynamoDB: ${dbError instanceof Error ? dbError.message : dbError}`); + logger.error( + `Error writing to DynamoDB: ${ + dbError instanceof Error ? dbError.message : dbError + }` + ); throw dbError; // Let the outer catch handle it } - } catch (error) { - logger.error(`Error creating Delphi job: ${error instanceof Error ? error.message : error}`); + logger.error( + `Error creating Delphi job: ${ + error instanceof Error ? error.message : error + }` + ); // Log more details for better debugging if (error instanceof Error) { logger.error(`Error name: ${error.name}`); logger.error(`Error stack: ${error.stack}`); } - + // Return detailed error for debugging res.status(500).json({ - status: 'error', - error: error instanceof Error ? error.message : 'Unknown error', - code: error instanceof Error && 'code' in error ? (error as any).code : undefined, - details: process.env.NODE_ENV === 'development' ? String(error) : undefined + status: "error", + error: error instanceof Error ? error.message : "Unknown error", + code: + error instanceof Error && "code" in error + ? (error as any).code + : undefined, + details: + process.env.NODE_ENV === "development" ? String(error) : undefined, }); } } // Helper function to get conversation_id from report_id -async function getConversationIdFromReportId(report_id: string): Promise { +async function getConversationIdFromReportId( + report_id: string +): Promise { try { logger.info(`Getting conversation_id for report_id: ${report_id}`); - + // Use the existing util function if available, otherwise implement here - if (typeof getZidFromReport === 'function') { + if (typeof getZidFromReport === "function") { const zid = await getZidFromReport(report_id); // Ensure we return a string or null to match the function signature return zid !== null ? zid.toString() : null; } - + // Strip the 'r' prefix if it exists (e.g., r123abc -> 123abc) let normalized_report_id = report_id; - if (report_id.startsWith('r') && report_id.length > 1) { + if (report_id.startsWith("r") && report_id.length > 1) { normalized_report_id = report_id.substring(1); } - + // In this case, we need to query the zid from the zinvites table // The report_id is the same as the zinvite const query = ` @@ -201,25 +227,29 @@ async function getConversationIdFromReportId(report_id: string): Promise Date: Fri, 9 May 2025 10:58:57 -0500 Subject: [PATCH 09/42] remove more hardcoded values --- delphi/Dockerfile | 16 +- delphi/run_delphi.sh | 4 +- docker-compose.yml | 2 +- example.env | 3 +- server/src/routes/delphi.ts | 400 +++++++++++---------- server/src/routes/delphi/jobs.ts | 2 +- server/src/routes/delphi/topics.ts | 400 +++++++++++---------- server/src/routes/delphi/visualizations.ts | 192 +++++----- 8 files changed, 561 insertions(+), 458 deletions(-) diff --git a/delphi/Dockerfile b/delphi/Dockerfile index 738514777d..f9e8055eb9 100644 --- a/delphi/Dockerfile +++ b/delphi/Dockerfile @@ -62,4 +62,18 @@ RUN chmod +x start_poller.sh run_delphi.sh setup_ollama.sh # 1. Initialize DynamoDB tables # 2. Set up the Ollama model based on environment variables # 3. Start the job poller with a 2-second polling interval -CMD ["bash", "-c", "echo 'Setting up DynamoDB tables...' && python create_dynamodb_tables.py --endpoint-url=${DYNAMODB_ENDPOINT:-http://dynamodb:8000} && echo 'Setting up MinIO bucket...' && python setup_minio.py && echo 'Setting up Ollama model...' && ./setup_ollama.sh && echo 'Starting job poller...' && POLL_INTERVAL=2 ./start_poller.sh"] + +CMD ["bash", "-c", "\ + if [ -n \"${DYNAMODB_ENDPOINT}\" ]; then \ + echo 'Local/Dev environment detected (DELPHI_DEV_OR_PROD=${DELPHI_DEV_OR_PROD}), running setup scripts...'; \ + echo 'Setting up DynamoDB tables...' && python create_dynamodb_tables.py && \ + echo 'Setting up MinIO bucket...' && python setup_minio.py && \ + echo 'Setting up Ollama model...' && ./setup_ollama.sh; \ + else \ + echo 'Production environment detected (DELPHI_DEV_OR_PROD=${DELPHI_DEV_OR_PROD}), skipping local setup scripts.'; \ + fi && \ + echo 'Setting up DynamoDB tables... && \ + python create_dynamodb_tables.py && \ + echo 'Starting job poller...' && \ + POLL_INTERVAL=${POLL_INTERVAL:-2} ./start_poller.sh \ +"] diff --git a/delphi/run_delphi.sh b/delphi/run_delphi.sh index 02598c58ac..6d580eeaa0 100755 --- a/delphi/run_delphi.sh +++ b/delphi/run_delphi.sh @@ -71,9 +71,9 @@ echo -e "${YELLOW}Using Ollama model: $MODEL${NC}" # Set up environment for the pipeline export PYTHONPATH="/app:$PYTHONPATH" -export OLLAMA_HOST=${OLLAMA_HOST:-http://ollama:11434} +export OLLAMA_HOST=${OLLAMA_HOST} export OLLAMA_MODEL=$MODEL -export DYNAMODB_ENDPOINT=${DYNAMODB_ENDPOINT:-http://dynamodb:8000} +export DYNAMODB_ENDPOINT=${DYNAMODB_ENDPOINT} # For testing with limited votes if [ -n "$MAX_VOTES" ]; then diff --git a/docker-compose.yml b/docker-compose.yml index c0f50bd1b2..b587a5e9e0 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -80,7 +80,7 @@ services: - LOG_LEVEL=${DELPHI_LOG_LEVEL:-INFO} - DELPHI_DEV_OR_PROD=${DELPHI_DEV_OR_PROD:-prod} # DynamoDB connection settings for local mode (will be overridden in prod) - - DYNAMODB_ENDPOINT=${DYNAMODB_ENDPOINT:-http://dynamodb:8000} + - DYNAMODB_ENDPOINT=${DYNAMODB_ENDPOINT} - POLL_INTERVAL=${POLL_INTERVAL:-2} # Ollama connection - OLLAMA_HOST=${OLLAMA_HOST:-http://ollama:11434} diff --git a/example.env b/example.env index 7bd6dd9de9..a53875d002 100644 --- a/example.env +++ b/example.env @@ -127,8 +127,7 @@ MAX_REPORT_CACHE_DURATION= ###### DYNAMODB ###### # When using local DynamoDB, this should be http://dynamodb:8000. -# In production, set DYNAMODB_ENDPOINT to the cloud endpoint (e.g. https://dynamodb.us-east-1.amazonaws.com), -# or simply leave it blank. +# In production leave it blank. DYNAMODB_ENDPOINT=http://dynamodb:8000 ###### S3 STORAGE ###### diff --git a/server/src/routes/delphi.ts b/server/src/routes/delphi.ts index 8f61b32ac8..bbf4fe233f 100644 --- a/server/src/routes/delphi.ts +++ b/server/src/routes/delphi.ts @@ -9,47 +9,47 @@ import { getZidFromReport } from "../utils/parameter"; */ export function handle_GET_delphi(req: Request, res: Response) { logger.info("Delphi API request received"); - + // Get report_id from request const report_id = req.query.report_id as string; - + if (!report_id) { - return res.json({ - status: "error", - message: "report_id is required" + return res.json({ + status: "error", + message: "report_id is required", }); } // Extract zid from report_id - we need this to query DynamoDB getZidFromReport(report_id) - .then(zid => { + .then((zid) => { if (!zid) { return res.json({ status: "error", message: "Could not find conversation for report_id", - report_id: report_id + report_id: report_id, }); } const conversation_id = zid.toString(); - logger.info(`Fetching Delphi LLM topics for conversation_id: ${conversation_id}`); + logger.info( + `Fetching Delphi LLM topics for conversation_id: ${conversation_id}` + ); // Force using local DynamoDB by hardcoding the endpoint const dynamoDBConfig: any = { region: process.env.AWS_REGION || "us-east-1", - // Force to use the local DynamoDB endpoint - endpoint: "http://dynamodb:8000" }; - + // Log what we're using logger.info(`Forcing local DynamoDB connection: Endpoint: ${dynamoDBConfig.endpoint} Region: ${dynamoDBConfig.region}`); - + // For local DynamoDB, use dummy credentials dynamoDBConfig.credentials = { - accessKeyId: 'DUMMYIDEXAMPLE', - secretAccessKey: 'DUMMYEXAMPLEKEY' + accessKeyId: "DUMMYIDEXAMPLE", + secretAccessKey: "DUMMYEXAMPLEKEY", }; // Log connection config for debugging @@ -57,7 +57,9 @@ export function handle_GET_delphi(req: Request, res: Response) { Region: ${dynamoDBConfig.region} Endpoint: ${dynamoDBConfig.endpoint || "Default AWS endpoint"} AWS_ACCESS_KEY_ID: ${process.env.AWS_ACCESS_KEY_ID ? "Set" : "Not set"} - AWS_SECRET_ACCESS_KEY: ${process.env.AWS_SECRET_ACCESS_KEY ? "Set" : "Not set"} + AWS_SECRET_ACCESS_KEY: ${ + process.env.AWS_SECRET_ACCESS_KEY ? "Set" : "Not set" + } `); // Create DynamoDB clients @@ -66,7 +68,7 @@ export function handle_GET_delphi(req: Request, res: Response) { marshallOptions: { convertEmptyValues: true, removeUndefinedValues: true, - } + }, }); // Table name for LLM topic names @@ -76,50 +78,64 @@ export function handle_GET_delphi(req: Request, res: Response) { try { // Create a command to list all tables const listTablesCommand = new ListTablesCommand({}); - + // Log that we're checking tables logger.info(`Checking DynamoDB tables...`); - + // Execute the command and handle results - client.send(listTablesCommand) - .then(tableData => { + client + .send(listTablesCommand) + .then((tableData) => { // Make sure TableNames is defined const tableNames = tableData.TableNames || []; - logger.info(`Found ${tableNames.length} DynamoDB tables: ${JSON.stringify(tableNames)}`); - + logger.info( + `Found ${tableNames.length} DynamoDB tables: ${JSON.stringify( + tableNames + )}` + ); + // Check if our table exists const tableExists = tableNames.includes(tableName); logger.info(`Table ${tableName} exists: ${tableExists}`); - + if (!tableExists) { // If table doesn't exist, return a helpful message // Also provide info on how to create the table return res.json({ status: "success", message: `Table ${tableName} not found in DynamoDB.`, - hint: "The table may need to be created by running the Delphi pipeline", + hint: + "The table may need to be created by running the Delphi pipeline", report_id: report_id, conversation_id: conversation_id, available_tables: tableNames, - topics: {} + topics: {}, }); } - + // If we get here, the table exists, proceed with query proceedWithQuery(); }) - .catch(err => { + .catch((err) => { logger.error(`Error listing DynamoDB tables: ${err.message}`); logger.error(`Error type: ${err.name}`); if (err.code === "UnrecognizedClientException") { - logger.error("This error usually indicates an authentication issue with DynamoDB"); + logger.error( + "This error usually indicates an authentication issue with DynamoDB" + ); logger.error("Check AWS credentials and region settings"); } else if (err.name === "NetworkingError") { - logger.error(`Cannot connect to DynamoDB endpoint: ${dynamoDBConfig.endpoint}`); - logger.error("Check if the DynamoDB service is running and accessible from the server container"); - logger.error("Consider testing with: curl " + dynamoDBConfig.endpoint); + logger.error( + `Cannot connect to DynamoDB endpoint: ${dynamoDBConfig.endpoint}` + ); + logger.error( + "Check if the DynamoDB service is running and accessible from the server container" + ); + logger.error( + "Consider testing with: curl " + dynamoDBConfig.endpoint + ); } - + // If we can't list tables, we should still try the query // It might be a permissions issue where we can query but not list logger.info("Proceeding with query anyway..."); @@ -133,7 +149,7 @@ export function handle_GET_delphi(req: Request, res: Response) { logger.info("Proceeding with query anyway..."); proceedWithQuery(); } - + // Function to execute the actual query function proceedWithQuery() { // Query parameters to get LLM topic names for the conversation @@ -141,181 +157,197 @@ export function handle_GET_delphi(req: Request, res: Response) { TableName: tableName, KeyConditionExpression: "conversation_id = :cid", ExpressionAttributeValues: { - ":cid": conversation_id - } + ":cid": conversation_id, + }, }; - + // Log that we're executing the query logger.info(`Executing DynamoDB query: ${JSON.stringify(params)}`); // Query DynamoDB - docClient.send(new QueryCommand(params)) - .then(data => { - // Early return if no items found - if (!data.Items || data.Items.length === 0) { + docClient + .send(new QueryCommand(params)) + .then((data) => { + // Early return if no items found + if (!data.Items || data.Items.length === 0) { + return res.json({ + status: "success", + message: "No LLM topics found for this conversation", + report_id: report_id, + conversation_id: conversation_id, + topics: {}, + }); + } + + // Process results - organize topics by run, then by layer, then by cluster + // Group by creation timestamp and model to identify different runs + const items = data.Items; + + // First group by model and creation date (truncate to day for grouping) + const runGroups: Record = {}; + + items.forEach((item) => { + const modelName = item.model_name || "unknown"; + const createdAt = item.created_at || ""; + const createdDate = createdAt.substring(0, 10); // Take just the date part YYYY-MM-DD + + // Create a run key based on model and creation date + const runKey = `${modelName}_${createdDate}`; + + if (!runGroups[runKey]) { + runGroups[runKey] = []; + } + + runGroups[runKey].push(item); + }); + + // Now organize each run into layers and clusters + const allRuns: Record = {}; + + Object.entries(runGroups).forEach(([runKey, runItems]) => { + const topicsByLayer: Record> = {}; + + // Process each item in this run + runItems.forEach((item) => { + const layerId = item.layer_id; + const clusterId = item.cluster_id; + + // Initialize layer if it doesn't exist + if (!topicsByLayer[layerId]) { + topicsByLayer[layerId] = {}; + } + + // Add topic to its layer + topicsByLayer[layerId][clusterId] = { + topic_name: item.topic_name, + model_name: item.model_name, + created_at: item.created_at, + topic_key: item.topic_key, + }; + }); + + // Get sample data to represent the run + const sampleItem = runItems[0]; + + // Add run with metadata + allRuns[runKey] = { + model_name: sampleItem.model_name, + created_date: sampleItem.created_at?.substring(0, 10), + topics_by_layer: topicsByLayer, + item_count: runItems.length, + }; + }); + + // Return all runs, with the most recent runs first + const sortedRuns = Object.entries(allRuns) + .sort(([keyA, runA], [keyB, runB]) => { + // Sort by created_date in descending order (newest first) + const dateA = runA.created_date || ""; + const dateB = runB.created_date || ""; + return dateB.localeCompare(dateA); + }) + .reduce((acc, [key, value]) => { + acc[key] = value; + return acc; + }, {} as Record); + + // Return the results return res.json({ status: "success", - message: "No LLM topics found for this conversation", + message: "LLM topics retrieved successfully", report_id: report_id, conversation_id: conversation_id, - topics: {} + runs: sortedRuns, }); - } - - // Process results - organize topics by run, then by layer, then by cluster - // Group by creation timestamp and model to identify different runs - const items = data.Items; - - // First group by model and creation date (truncate to day for grouping) - const runGroups: Record = {}; - - items.forEach(item => { - const modelName = item.model_name || 'unknown'; - const createdAt = item.created_at || ''; - const createdDate = createdAt.substring(0, 10); // Take just the date part YYYY-MM-DD - - // Create a run key based on model and creation date - const runKey = `${modelName}_${createdDate}`; - - if (!runGroups[runKey]) { - runGroups[runKey] = []; + }) + .catch((err) => { + // Check if this is a "table not found" error + if (err.name === "ResourceNotFoundException") { + logger.warn( + `DynamoDB table not found: Delphi_CommentClustersLLMTopicNames` + ); + return res.json({ + status: "success", + message: "Delphi topic service not available yet", + hint: + "The table may need to be created by running the Delphi pipeline", + report_id: report_id, + conversation_id: conversation_id, + topics: {}, + }); } - - runGroups[runKey].push(item); - }); - // Now organize each run into layers and clusters - const allRuns: Record = {}; - - Object.entries(runGroups).forEach(([runKey, runItems]) => { - const topicsByLayer: Record> = {}; - - // Process each item in this run - runItems.forEach(item => { - const layerId = item.layer_id; - const clusterId = item.cluster_id; - - // Initialize layer if it doesn't exist - if (!topicsByLayer[layerId]) { - topicsByLayer[layerId] = {}; - } - - // Add topic to its layer - topicsByLayer[layerId][clusterId] = { - topic_name: item.topic_name, - model_name: item.model_name, - created_at: item.created_at, - topic_key: item.topic_key - }; - }); - - // Get sample data to represent the run - const sampleItem = runItems[0]; - - // Add run with metadata - allRuns[runKey] = { - model_name: sampleItem.model_name, - created_date: sampleItem.created_at?.substring(0, 10), - topics_by_layer: topicsByLayer, - item_count: runItems.length - }; - }); + // Log detailed error information + logger.error(`Error querying DynamoDB: ${err.message}`); + logger.error(`Error type: ${err.name}`); + logger.error(`Error code: ${err.$metadata?.httpStatusCode}`); + + // Format a helpful message based on the error type + let helpMessage = ""; + + // Check credentials error + if (err.name === "CredentialsProviderError") { + logger.error( + "AWS credential issue - check environment variables" + ); + helpMessage = + "AWS credential issue - check AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment variables"; + } + + // Check connection error + if (err.name === "NetworkingError") { + logger.error( + `Network error connecting to DynamoDB endpoint: ${ + dynamoDBConfig.endpoint || "default" + }` + ); + helpMessage = `Network error connecting to DynamoDB at ${ + dynamoDBConfig.endpoint || "default" + } - check if DynamoDB service is running and accessible`; + } + + // Check permissions error + if (err.name === "AccessDeniedException") { + logger.error( + "AWS permissions issue - credentials do not have access to this DynamoDB table" + ); + helpMessage = + "AWS permissions issue - credentials do not have access to this DynamoDB table"; + } + + // If validation error + if (err.name === "ValidationException") { + logger.error(`DynamoDB validation error: ${err.message}`); + helpMessage = `DynamoDB validation error: ${err.message} - check table schema or partition key`; + } + + // Try to log more details if available + try { + logger.error(JSON.stringify(err, null, 2)); + } catch (e) { + logger.error("Could not stringify error object"); + } - // Return all runs, with the most recent runs first - const sortedRuns = Object.entries(allRuns) - .sort(([keyA, runA], [keyB, runB]) => { - // Sort by created_date in descending order (newest first) - const dateA = runA.created_date || ''; - const dateB = runB.created_date || ''; - return dateB.localeCompare(dateA); - }) - .reduce((acc, [key, value]) => { - acc[key] = value; - return acc; - }, {} as Record); - - // Return the results - return res.json({ - status: "success", - message: "LLM topics retrieved successfully", - report_id: report_id, - conversation_id: conversation_id, - runs: sortedRuns - }); - }) - .catch(err => { - // Check if this is a "table not found" error - if (err.name === "ResourceNotFoundException") { - logger.warn(`DynamoDB table not found: Delphi_CommentClustersLLMTopicNames`); return res.json({ - status: "success", - message: "Delphi topic service not available yet", - hint: "The table may need to be created by running the Delphi pipeline", + status: "success", // Use success to avoid frontend errors + message: "Error querying DynamoDB", + error: err.message, + error_type: err.name, + help: helpMessage, report_id: report_id, conversation_id: conversation_id, - topics: {} + topics: {}, // Return empty topics to avoid client-side errors }); - } - - // Log detailed error information - logger.error(`Error querying DynamoDB: ${err.message}`); - logger.error(`Error type: ${err.name}`); - logger.error(`Error code: ${err.$metadata?.httpStatusCode}`); - - // Format a helpful message based on the error type - let helpMessage = ""; - - // Check credentials error - if (err.name === "CredentialsProviderError") { - logger.error("AWS credential issue - check environment variables"); - helpMessage = "AWS credential issue - check AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment variables"; - } - - // Check connection error - if (err.name === "NetworkingError") { - logger.error(`Network error connecting to DynamoDB endpoint: ${dynamoDBConfig.endpoint || "default"}`); - helpMessage = `Network error connecting to DynamoDB at ${dynamoDBConfig.endpoint || "default"} - check if DynamoDB service is running and accessible`; - } - - // Check permissions error - if (err.name === "AccessDeniedException") { - logger.error("AWS permissions issue - credentials do not have access to this DynamoDB table"); - helpMessage = "AWS permissions issue - credentials do not have access to this DynamoDB table"; - } - - // If validation error - if (err.name === "ValidationException") { - logger.error(`DynamoDB validation error: ${err.message}`); - helpMessage = `DynamoDB validation error: ${err.message} - check table schema or partition key`; - } - - // Try to log more details if available - try { - logger.error(JSON.stringify(err, null, 2)); - } catch (e) { - logger.error("Could not stringify error object"); - } - - return res.json({ - status: "success", // Use success to avoid frontend errors - message: "Error querying DynamoDB", - error: err.message, - error_type: err.name, - help: helpMessage, - report_id: report_id, - conversation_id: conversation_id, - topics: {} // Return empty topics to avoid client-side errors }); - }); } }) - .catch(err => { + .catch((err) => { logger.error(`Error in delphi endpoint: ${err}`); return res.json({ status: "error", message: "Error processing request", error: err.message, - report_id: report_id + report_id: report_id, }); }); -} \ No newline at end of file +} diff --git a/server/src/routes/delphi/jobs.ts b/server/src/routes/delphi/jobs.ts index ede6fb1464..773dd012c2 100644 --- a/server/src/routes/delphi/jobs.ts +++ b/server/src/routes/delphi/jobs.ts @@ -8,7 +8,7 @@ import { getZidFromReport } from "../../utils/parameter"; // Initialize DynamoDB client const dynamoDbClient = new DynamoDB({ // Use environment variables for endpoint and region, or the docker service name - endpoint: process.env.DYNAMODB_ENDPOINT || "http://dynamodb:8000", + endpoint: process.env.DYNAMODB_ENDPOINT, region: process.env.AWS_REGION || "us-east-1", // For local development or Docker container credentials: { diff --git a/server/src/routes/delphi/topics.ts b/server/src/routes/delphi/topics.ts index ae41c697c8..f5e45abcfb 100644 --- a/server/src/routes/delphi/topics.ts +++ b/server/src/routes/delphi/topics.ts @@ -9,47 +9,47 @@ import { getZidFromReport } from "../../utils/parameter"; */ export function handle_GET_delphi(req: Request, res: Response) { logger.info("Delphi API request received"); - + // Get report_id from request const report_id = req.query.report_id as string; - + if (!report_id) { - return res.json({ - status: "error", - message: "report_id is required" + return res.json({ + status: "error", + message: "report_id is required", }); } // Extract zid from report_id - we need this to query DynamoDB getZidFromReport(report_id) - .then(zid => { + .then((zid) => { if (!zid) { return res.json({ status: "error", message: "Could not find conversation for report_id", - report_id: report_id + report_id: report_id, }); } const conversation_id = zid.toString(); - logger.info(`Fetching Delphi LLM topics for conversation_id: ${conversation_id}`); + logger.info( + `Fetching Delphi LLM topics for conversation_id: ${conversation_id}` + ); // Force using local DynamoDB by hardcoding the endpoint const dynamoDBConfig: any = { region: process.env.AWS_REGION || "us-east-1", - // Force to use the local DynamoDB endpoint - endpoint: "http://dynamodb:8000" }; - + // Log what we're using logger.info(`Forcing local DynamoDB connection: Endpoint: ${dynamoDBConfig.endpoint} Region: ${dynamoDBConfig.region}`); - + // For local DynamoDB, use dummy credentials dynamoDBConfig.credentials = { - accessKeyId: 'DUMMYIDEXAMPLE', - secretAccessKey: 'DUMMYEXAMPLEKEY' + accessKeyId: "DUMMYIDEXAMPLE", + secretAccessKey: "DUMMYEXAMPLEKEY", }; // Log connection config for debugging @@ -57,7 +57,9 @@ export function handle_GET_delphi(req: Request, res: Response) { Region: ${dynamoDBConfig.region} Endpoint: ${dynamoDBConfig.endpoint || "Default AWS endpoint"} AWS_ACCESS_KEY_ID: ${process.env.AWS_ACCESS_KEY_ID ? "Set" : "Not set"} - AWS_SECRET_ACCESS_KEY: ${process.env.AWS_SECRET_ACCESS_KEY ? "Set" : "Not set"} + AWS_SECRET_ACCESS_KEY: ${ + process.env.AWS_SECRET_ACCESS_KEY ? "Set" : "Not set" + } `); // Create DynamoDB clients @@ -66,7 +68,7 @@ export function handle_GET_delphi(req: Request, res: Response) { marshallOptions: { convertEmptyValues: true, removeUndefinedValues: true, - } + }, }); // Table name for LLM topic names @@ -76,50 +78,64 @@ export function handle_GET_delphi(req: Request, res: Response) { try { // Create a command to list all tables const listTablesCommand = new ListTablesCommand({}); - + // Log that we're checking tables logger.info(`Checking DynamoDB tables...`); - + // Execute the command and handle results - client.send(listTablesCommand) - .then(tableData => { + client + .send(listTablesCommand) + .then((tableData) => { // Make sure TableNames is defined const tableNames = tableData.TableNames || []; - logger.info(`Found ${tableNames.length} DynamoDB tables: ${JSON.stringify(tableNames)}`); - + logger.info( + `Found ${tableNames.length} DynamoDB tables: ${JSON.stringify( + tableNames + )}` + ); + // Check if our table exists const tableExists = tableNames.includes(tableName); logger.info(`Table ${tableName} exists: ${tableExists}`); - + if (!tableExists) { // If table doesn't exist, return a helpful message // Also provide info on how to create the table return res.json({ status: "success", message: `Table ${tableName} not found in DynamoDB.`, - hint: "The table may need to be created by running the Delphi pipeline", + hint: + "The table may need to be created by running the Delphi pipeline", report_id: report_id, conversation_id: conversation_id, available_tables: tableNames, - topics: {} + topics: {}, }); } - + // If we get here, the table exists, proceed with query proceedWithQuery(); }) - .catch(err => { + .catch((err) => { logger.error(`Error listing DynamoDB tables: ${err.message}`); logger.error(`Error type: ${err.name}`); if (err.code === "UnrecognizedClientException") { - logger.error("This error usually indicates an authentication issue with DynamoDB"); + logger.error( + "This error usually indicates an authentication issue with DynamoDB" + ); logger.error("Check AWS credentials and region settings"); } else if (err.name === "NetworkingError") { - logger.error(`Cannot connect to DynamoDB endpoint: ${dynamoDBConfig.endpoint}`); - logger.error("Check if the DynamoDB service is running and accessible from the server container"); - logger.error("Consider testing with: curl " + dynamoDBConfig.endpoint); + logger.error( + `Cannot connect to DynamoDB endpoint: ${dynamoDBConfig.endpoint}` + ); + logger.error( + "Check if the DynamoDB service is running and accessible from the server container" + ); + logger.error( + "Consider testing with: curl " + dynamoDBConfig.endpoint + ); } - + // If we can't list tables, we should still try the query // It might be a permissions issue where we can query but not list logger.info("Proceeding with query anyway..."); @@ -133,7 +149,7 @@ export function handle_GET_delphi(req: Request, res: Response) { logger.info("Proceeding with query anyway..."); proceedWithQuery(); } - + // Function to execute the actual query function proceedWithQuery() { // Query parameters to get LLM topic names for the conversation @@ -141,181 +157,197 @@ export function handle_GET_delphi(req: Request, res: Response) { TableName: tableName, KeyConditionExpression: "conversation_id = :cid", ExpressionAttributeValues: { - ":cid": conversation_id - } + ":cid": conversation_id, + }, }; - + // Log that we're executing the query logger.info(`Executing DynamoDB query: ${JSON.stringify(params)}`); // Query DynamoDB - docClient.send(new QueryCommand(params)) - .then(data => { - // Early return if no items found - if (!data.Items || data.Items.length === 0) { + docClient + .send(new QueryCommand(params)) + .then((data) => { + // Early return if no items found + if (!data.Items || data.Items.length === 0) { + return res.json({ + status: "success", + message: "No LLM topics found for this conversation", + report_id: report_id, + conversation_id: conversation_id, + topics: {}, + }); + } + + // Process results - organize topics by run, then by layer, then by cluster + // Group by creation timestamp and model to identify different runs + const items = data.Items; + + // First group by model and creation date (truncate to day for grouping) + const runGroups: Record = {}; + + items.forEach((item) => { + const modelName = item.model_name || "unknown"; + const createdAt = item.created_at || ""; + const createdDate = createdAt.substring(0, 10); // Take just the date part YYYY-MM-DD + + // Create a run key based on model and creation date + const runKey = `${modelName}_${createdDate}`; + + if (!runGroups[runKey]) { + runGroups[runKey] = []; + } + + runGroups[runKey].push(item); + }); + + // Now organize each run into layers and clusters + const allRuns: Record = {}; + + Object.entries(runGroups).forEach(([runKey, runItems]) => { + const topicsByLayer: Record> = {}; + + // Process each item in this run + runItems.forEach((item) => { + const layerId = item.layer_id; + const clusterId = item.cluster_id; + + // Initialize layer if it doesn't exist + if (!topicsByLayer[layerId]) { + topicsByLayer[layerId] = {}; + } + + // Add topic to its layer + topicsByLayer[layerId][clusterId] = { + topic_name: item.topic_name, + model_name: item.model_name, + created_at: item.created_at, + topic_key: item.topic_key, + }; + }); + + // Get sample data to represent the run + const sampleItem = runItems[0]; + + // Add run with metadata + allRuns[runKey] = { + model_name: sampleItem.model_name, + created_date: sampleItem.created_at?.substring(0, 10), + topics_by_layer: topicsByLayer, + item_count: runItems.length, + }; + }); + + // Return all runs, with the most recent runs first + const sortedRuns = Object.entries(allRuns) + .sort(([keyA, runA], [keyB, runB]) => { + // Sort by created_date in descending order (newest first) + const dateA = runA.created_date || ""; + const dateB = runB.created_date || ""; + return dateB.localeCompare(dateA); + }) + .reduce((acc, [key, value]) => { + acc[key] = value; + return acc; + }, {} as Record); + + // Return the results return res.json({ status: "success", - message: "No LLM topics found for this conversation", + message: "LLM topics retrieved successfully", report_id: report_id, conversation_id: conversation_id, - topics: {} + runs: sortedRuns, }); - } - - // Process results - organize topics by run, then by layer, then by cluster - // Group by creation timestamp and model to identify different runs - const items = data.Items; - - // First group by model and creation date (truncate to day for grouping) - const runGroups: Record = {}; - - items.forEach(item => { - const modelName = item.model_name || 'unknown'; - const createdAt = item.created_at || ''; - const createdDate = createdAt.substring(0, 10); // Take just the date part YYYY-MM-DD - - // Create a run key based on model and creation date - const runKey = `${modelName}_${createdDate}`; - - if (!runGroups[runKey]) { - runGroups[runKey] = []; + }) + .catch((err) => { + // Check if this is a "table not found" error + if (err.name === "ResourceNotFoundException") { + logger.warn( + `DynamoDB table not found: Delphi_CommentClustersLLMTopicNames` + ); + return res.json({ + status: "success", + message: "Delphi topic service not available yet", + hint: + "The table may need to be created by running the Delphi pipeline", + report_id: report_id, + conversation_id: conversation_id, + topics: {}, + }); } - - runGroups[runKey].push(item); - }); - // Now organize each run into layers and clusters - const allRuns: Record = {}; - - Object.entries(runGroups).forEach(([runKey, runItems]) => { - const topicsByLayer: Record> = {}; - - // Process each item in this run - runItems.forEach(item => { - const layerId = item.layer_id; - const clusterId = item.cluster_id; - - // Initialize layer if it doesn't exist - if (!topicsByLayer[layerId]) { - topicsByLayer[layerId] = {}; - } - - // Add topic to its layer - topicsByLayer[layerId][clusterId] = { - topic_name: item.topic_name, - model_name: item.model_name, - created_at: item.created_at, - topic_key: item.topic_key - }; - }); - - // Get sample data to represent the run - const sampleItem = runItems[0]; - - // Add run with metadata - allRuns[runKey] = { - model_name: sampleItem.model_name, - created_date: sampleItem.created_at?.substring(0, 10), - topics_by_layer: topicsByLayer, - item_count: runItems.length - }; - }); + // Log detailed error information + logger.error(`Error querying DynamoDB: ${err.message}`); + logger.error(`Error type: ${err.name}`); + logger.error(`Error code: ${err.$metadata?.httpStatusCode}`); + + // Format a helpful message based on the error type + let helpMessage = ""; + + // Check credentials error + if (err.name === "CredentialsProviderError") { + logger.error( + "AWS credential issue - check environment variables" + ); + helpMessage = + "AWS credential issue - check AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment variables"; + } + + // Check connection error + if (err.name === "NetworkingError") { + logger.error( + `Network error connecting to DynamoDB endpoint: ${ + dynamoDBConfig.endpoint || "default" + }` + ); + helpMessage = `Network error connecting to DynamoDB at ${ + dynamoDBConfig.endpoint || "default" + } - check if DynamoDB service is running and accessible`; + } + + // Check permissions error + if (err.name === "AccessDeniedException") { + logger.error( + "AWS permissions issue - credentials do not have access to this DynamoDB table" + ); + helpMessage = + "AWS permissions issue - credentials do not have access to this DynamoDB table"; + } + + // If validation error + if (err.name === "ValidationException") { + logger.error(`DynamoDB validation error: ${err.message}`); + helpMessage = `DynamoDB validation error: ${err.message} - check table schema or partition key`; + } + + // Try to log more details if available + try { + logger.error(JSON.stringify(err, null, 2)); + } catch (e) { + logger.error("Could not stringify error object"); + } - // Return all runs, with the most recent runs first - const sortedRuns = Object.entries(allRuns) - .sort(([keyA, runA], [keyB, runB]) => { - // Sort by created_date in descending order (newest first) - const dateA = runA.created_date || ''; - const dateB = runB.created_date || ''; - return dateB.localeCompare(dateA); - }) - .reduce((acc, [key, value]) => { - acc[key] = value; - return acc; - }, {} as Record); - - // Return the results - return res.json({ - status: "success", - message: "LLM topics retrieved successfully", - report_id: report_id, - conversation_id: conversation_id, - runs: sortedRuns - }); - }) - .catch(err => { - // Check if this is a "table not found" error - if (err.name === "ResourceNotFoundException") { - logger.warn(`DynamoDB table not found: Delphi_CommentClustersLLMTopicNames`); return res.json({ - status: "success", - message: "Delphi topic service not available yet", - hint: "The table may need to be created by running the Delphi pipeline", + status: "success", // Use success to avoid frontend errors + message: "Error querying DynamoDB", + error: err.message, + error_type: err.name, + help: helpMessage, report_id: report_id, conversation_id: conversation_id, - topics: {} + topics: {}, // Return empty topics to avoid client-side errors }); - } - - // Log detailed error information - logger.error(`Error querying DynamoDB: ${err.message}`); - logger.error(`Error type: ${err.name}`); - logger.error(`Error code: ${err.$metadata?.httpStatusCode}`); - - // Format a helpful message based on the error type - let helpMessage = ""; - - // Check credentials error - if (err.name === "CredentialsProviderError") { - logger.error("AWS credential issue - check environment variables"); - helpMessage = "AWS credential issue - check AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment variables"; - } - - // Check connection error - if (err.name === "NetworkingError") { - logger.error(`Network error connecting to DynamoDB endpoint: ${dynamoDBConfig.endpoint || "default"}`); - helpMessage = `Network error connecting to DynamoDB at ${dynamoDBConfig.endpoint || "default"} - check if DynamoDB service is running and accessible`; - } - - // Check permissions error - if (err.name === "AccessDeniedException") { - logger.error("AWS permissions issue - credentials do not have access to this DynamoDB table"); - helpMessage = "AWS permissions issue - credentials do not have access to this DynamoDB table"; - } - - // If validation error - if (err.name === "ValidationException") { - logger.error(`DynamoDB validation error: ${err.message}`); - helpMessage = `DynamoDB validation error: ${err.message} - check table schema or partition key`; - } - - // Try to log more details if available - try { - logger.error(JSON.stringify(err, null, 2)); - } catch (e) { - logger.error("Could not stringify error object"); - } - - return res.json({ - status: "success", // Use success to avoid frontend errors - message: "Error querying DynamoDB", - error: err.message, - error_type: err.name, - help: helpMessage, - report_id: report_id, - conversation_id: conversation_id, - topics: {} // Return empty topics to avoid client-side errors }); - }); } }) - .catch(err => { + .catch((err) => { logger.error(`Error in delphi endpoint: ${err}`); return res.json({ status: "error", message: "Error processing request", error: err.message, - report_id: report_id + report_id: report_id, }); }); -} \ No newline at end of file +} diff --git a/server/src/routes/delphi/visualizations.ts b/server/src/routes/delphi/visualizations.ts index 854da926db..b4b541c435 100644 --- a/server/src/routes/delphi/visualizations.ts +++ b/server/src/routes/delphi/visualizations.ts @@ -2,32 +2,39 @@ import { Request, Response } from "express"; import logger from "../../utils/logger"; import { getZidFromReport } from "../../utils/parameter"; import { DynamoDBClient } from "@aws-sdk/client-dynamodb"; -import { DynamoDBDocumentClient, QueryCommand, ScanCommand } from "@aws-sdk/lib-dynamodb"; -import { - S3Client, - ListObjectsV2Command, - GetObjectCommand +import { + DynamoDBDocumentClient, + QueryCommand, + ScanCommand, +} from "@aws-sdk/lib-dynamodb"; +import { + S3Client, + ListObjectsV2Command, + GetObjectCommand, } from "@aws-sdk/client-s3"; import { getSignedUrl } from "@aws-sdk/s3-request-presigner"; /** * Handler for Delphi API route that retrieves visualization information */ -export async function handle_GET_delphi_visualizations(req: Request, res: Response) { +export async function handle_GET_delphi_visualizations( + req: Request, + res: Response +) { logger.info("Delphi visualizations API request received"); - + try { // Get report_id from request const report_id = req.query.report_id as string; const jobId = req.query.job_id as string; - + if (!report_id) { - return res.json({ - status: "error", - message: "report_id is required" + return res.json({ + status: "error", + message: "report_id is required", }); } - + // Extract zid from report_id let zid; try { @@ -37,20 +44,22 @@ export async function handle_GET_delphi_visualizations(req: Request, res: Respon return res.json({ status: "error", message: "Could not find conversation for report_id", - report_id + report_id, }); } - + if (!zid) { return res.json({ status: "error", message: "Could not find conversation for report_id", - report_id + report_id, }); } const conversation_id = zid.toString(); - logger.info(`Fetching visualizations for report_id: ${report_id}, conversation_id: ${conversation_id}`); + logger.info( + `Fetching visualizations for report_id: ${report_id}, conversation_id: ${conversation_id}` + ); // Configure S3 client const s3Config: any = { @@ -58,9 +67,9 @@ export async function handle_GET_delphi_visualizations(req: Request, res: Respon endpoint: process.env.AWS_S3_ENDPOINT || "http://minio:9000", credentials: { accessKeyId: process.env.AWS_S3_ACCESS_KEY_ID || "minioadmin", - secretAccessKey: process.env.AWS_S3_SECRET_ACCESS_KEY || "minioadmin" + secretAccessKey: process.env.AWS_S3_SECRET_ACCESS_KEY || "minioadmin", }, - forcePathStyle: true // Required for MinIO + forcePathStyle: true, // Required for MinIO }; // Log S3 connection info @@ -80,16 +89,16 @@ export async function handle_GET_delphi_visualizations(req: Request, res: Respon status: "error", message: "Failed to initialize S3 client", error: err.message || String(err), - report_id + report_id, }); } - + const bucketName = process.env.AWS_S3_BUCKET_NAME || "delphi"; // Define S3 path prefix to search // Use conversation_id instead of report_id since files are stored by conversation_id - const prefix = jobId - ? `visualizations/${conversation_id}/${jobId}/` + const prefix = jobId + ? `visualizations/${conversation_id}/${jobId}/` : `visualizations/${conversation_id}/`; // Get job metadata from DynamoDB if available @@ -107,20 +116,28 @@ export async function handle_GET_delphi_visualizations(req: Request, res: Respon const listObjectsParams = { Bucket: bucketName, Prefix: prefix, - MaxKeys: 1000 // Increase if you expect more than 1000 objects + MaxKeys: 1000, // Increase if you expect more than 1000 objects }; - + // Enhanced logging for debugging - logger.info(`Listing S3 objects with params: ${JSON.stringify(listObjectsParams)}`); + logger.info( + `Listing S3 objects with params: ${JSON.stringify(listObjectsParams)}` + ); try { - s3Response = await s3Client.send(new ListObjectsV2Command(listObjectsParams)); - + s3Response = await s3Client.send( + new ListObjectsV2Command(listObjectsParams) + ); + // Log successful response - logger.info(`S3 listing successful. Found ${s3Response.Contents?.length || 0} objects.`); + logger.info( + `S3 listing successful. Found ${ + s3Response.Contents?.length || 0 + } objects.` + ); if (s3Response.Contents && s3Response.Contents.length > 0) { // Log first few keys for debugging - const keys = s3Response.Contents.slice(0, 3).map(obj => obj.Key); + const keys = s3Response.Contents.slice(0, 3).map((obj) => obj.Key); logger.info(`Sample object keys: ${JSON.stringify(keys)}`); } } catch (s3Error: any) { @@ -136,10 +153,10 @@ export async function handle_GET_delphi_visualizations(req: Request, res: Respon message: "Error listing visualizations", error: err.message || String(err), report_id, - conversation_id + conversation_id, }); } - + // Check if files were found if (!s3Response.Contents || s3Response.Contents.length === 0) { return res.json({ @@ -148,53 +165,59 @@ export async function handle_GET_delphi_visualizations(req: Request, res: Respon report_id, conversation_id, visualizations: [], - jobs: jobMetadata + jobs: jobMetadata, }); } // Group visualizations by job const visualizationsByJob: Record = {}; - + // Process each object for (const obj of s3Response.Contents) { const key = obj.Key || ""; - + // Parse job ID from the key // Expected format: visualizations/{report_id}/{job_id}/layer_{layer_id}_datamapplot.html - const keyParts = key.split('/'); - + const keyParts = key.split("/"); + if (keyParts.length < 4) continue; // Skip if doesn't match expected format - + const currentJobId = keyParts[2]; const fileName = keyParts[3]; - + // Skip if not an HTML file - if (!fileName.endsWith('.html') && !fileName.endsWith('.png') && !fileName.endsWith('.svg')) { + if ( + !fileName.endsWith(".html") && + !fileName.endsWith(".png") && + !fileName.endsWith(".svg") + ) { continue; } - + // Parse layer ID const layerMatch = fileName.match(/layer_(\d+)/); const layerId = layerMatch ? parseInt(layerMatch[1]) : null; - + if (layerId === null) continue; // Skip if can't determine layer - + // Generate a signed URL for this object let url; try { const getObjectParams = { Bucket: bucketName, - Key: key + Key: key, }; - + // Instead of using presigned URLs that don't work across network boundaries, // just return a direct URL to the object that can be accessed from the browser url = `http://localhost:9000/${bucketName}/${key}`; } catch (err: any) { - logger.error(`Error generating signed URL for ${key}: ${err.message || err}`); + logger.error( + `Error generating signed URL for ${key}: ${err.message || err}` + ); continue; // Skip this file and continue } - + // Determine visualization type let type = "unknown"; if (fileName.includes("datamapplot.html")) { @@ -206,12 +229,12 @@ export async function handle_GET_delphi_visualizations(req: Request, res: Respon } else if (fileName.includes("static.svg")) { type = "static_svg"; } - + // Initialize job array if needed if (!visualizationsByJob[currentJobId]) { visualizationsByJob[currentJobId] = []; } - + // Add to the job's visualizations visualizationsByJob[currentJobId].push({ key, @@ -219,29 +242,31 @@ export async function handle_GET_delphi_visualizations(req: Request, res: Respon layerId, type, lastModified: obj.LastModified, - size: obj.Size + size: obj.Size, }); } - + // Sort visualizations by layer ID - Object.values(visualizationsByJob).forEach(visArray => { + Object.values(visualizationsByJob).forEach((visArray) => { visArray.sort((a, b) => (a.layerId || 0) - (b.layerId || 0)); }); - + // Combine job metadata with visualizations - const jobsWithVisualizations = Object.keys(visualizationsByJob).map(jobId => { - const jobInfo = jobMetadata[jobId] || { - jobId, - status: "unknown", - createdAt: null - }; - - return { - ...jobInfo, - visualizations: visualizationsByJob[jobId] - }; - }); - + const jobsWithVisualizations = Object.keys(visualizationsByJob).map( + (jobId) => { + const jobInfo = jobMetadata[jobId] || { + jobId, + status: "unknown", + createdAt: null, + }; + + return { + ...jobInfo, + visualizations: visualizationsByJob[jobId], + }; + } + ); + // Sort jobs by createdAt (newest first) jobsWithVisualizations.sort((a, b) => { const dateA = a.createdAt ? new Date(a.createdAt).getTime() : 0; @@ -255,9 +280,8 @@ export async function handle_GET_delphi_visualizations(req: Request, res: Respon message: "Visualizations retrieved successfully", report_id, conversation_id, - jobs: jobsWithVisualizations + jobs: jobsWithVisualizations, }); - } catch (err: any) { logger.error(`Error in delphi visualizations endpoint: ${err.message}`); logger.error(err.stack); @@ -265,7 +289,7 @@ export async function handle_GET_delphi_visualizations(req: Request, res: Respon status: "error", message: "Error processing request", error: err.message, - report_id: req.query.report_id as string + report_id: req.query.report_id as string, }); } } @@ -273,16 +297,18 @@ export async function handle_GET_delphi_visualizations(req: Request, res: Respon /** * Fetch job metadata from DynamoDB */ -async function fetchJobMetadata(report_id: string, conversation_id: string): Promise> { +async function fetchJobMetadata( + report_id: string, + conversation_id: string +): Promise> { try { // Configure DynamoDB client const dynamoDBConfig: any = { region: process.env.AWS_REGION || "us-east-1", - endpoint: "http://dynamodb:8000", // Local DynamoDB credentials: { - accessKeyId: 'DUMMYIDEXAMPLE', - secretAccessKey: 'DUMMYEXAMPLEKEY' - } + accessKeyId: "DUMMYIDEXAMPLE", + secretAccessKey: "DUMMYEXAMPLEKEY", + }, }; // Create DynamoDB clients @@ -291,7 +317,7 @@ async function fetchJobMetadata(report_id: string, conversation_id: string): Pro marshallOptions: { convertEmptyValues: true, removeUndefinedValues: true, - } + }, }); // Scan for jobs by conversation ID (using scan instead of query since the index may not exist) @@ -300,19 +326,19 @@ async function fetchJobMetadata(report_id: string, conversation_id: string): Pro TableName: "Delphi_JobQueue", FilterExpression: "conversation_id = :cid", ExpressionAttributeValues: { - ":cid": conversation_id - } + ":cid": conversation_id, + }, }; try { logger.info(`Scanning for jobs with conversation_id: ${conversation_id}`); const scanResponse = await docClient.send(new ScanCommand(scanParams)); - + if (!scanResponse.Items || scanResponse.Items.length === 0) { logger.info(`No jobs found for conversation ${conversation_id}`); return {}; } - + // Process jobs from scan return processJobItems(scanResponse.Items); } catch (err: any) { @@ -330,19 +356,19 @@ async function fetchJobMetadata(report_id: string, conversation_id: string): Pro */ function processJobItems(items: any[]): Record { const jobMap: Record = {}; - + for (const item of items) { const job_id = item.job_id; - + jobMap[job_id] = { jobId: job_id, status: item.status || "unknown", createdAt: item.created_at || null, startedAt: item.started_at || null, completedAt: item.completed_at || null, - results: item.job_results ? JSON.parse(item.job_results) : null + results: item.job_results ? JSON.parse(item.job_results) : null, }; } - + return jobMap; -} \ No newline at end of file +} From b44422eed13bdc432ae8b4228420bf0fda316046 Mon Sep 17 00:00:00 2001 From: tevko Date: Fri, 9 May 2025 15:09:16 -0500 Subject: [PATCH 10/42] adjust delphi dockerfile --- delphi/Dockerfile | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/delphi/Dockerfile b/delphi/Dockerfile index f9e8055eb9..da45b8fd5d 100644 --- a/delphi/Dockerfile +++ b/delphi/Dockerfile @@ -64,16 +64,18 @@ RUN chmod +x start_poller.sh run_delphi.sh setup_ollama.sh # 3. Start the job poller with a 2-second polling interval CMD ["bash", "-c", "\ + echo 'Ensuring DynamoDB tables are set up (runs in all environments)...'; \ + python create_dynamodb_tables.py && \ + echo 'DynamoDB table setup script finished.'; \ + \ if [ -n \"${DYNAMODB_ENDPOINT}\" ]; then \ - echo 'Local/Dev environment detected (DELPHI_DEV_OR_PROD=${DELPHI_DEV_OR_PROD}), running setup scripts...'; \ - echo 'Setting up DynamoDB tables...' && python create_dynamodb_tables.py && \ + echo 'DYNAMODB_ENDPOINT is set (value: \"${DYNAMODB_ENDPOINT}\"), assuming local/dev environment. Running additional local setup scripts...'; \ echo 'Setting up MinIO bucket...' && python setup_minio.py && \ - echo 'Setting up Ollama model...' && ./setup_ollama.sh; \ + echo 'Setting up Ollama model (local script)...' && ./setup_ollama.sh; \ else \ - echo 'Production environment detected (DELPHI_DEV_OR_PROD=${DELPHI_DEV_OR_PROD}), skipping local setup scripts.'; \ + echo 'DYNAMODB_ENDPOINT is not set, assuming production-like environment. Skipping MinIO and local Ollama setup scripts.'; \ fi && \ - echo 'Setting up DynamoDB tables... && \ - python create_dynamodb_tables.py && \ + \ echo 'Starting job poller...' && \ POLL_INTERVAL=${POLL_INTERVAL:-2} ./start_poller.sh \ -"] +"] \ No newline at end of file From 5502c0e5f74f9e59c286a9c2c06107cb437aec16 Mon Sep 17 00:00:00 2001 From: tevko Date: Fri, 9 May 2025 15:39:40 -0500 Subject: [PATCH 11/42] fix up setupdynamo func --- delphi/scripts/delphi_cli.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/delphi/scripts/delphi_cli.py b/delphi/scripts/delphi_cli.py index 2c880bbfc3..4e2d629b99 100755 --- a/delphi/scripts/delphi_cli.py +++ b/delphi/scripts/delphi_cli.py @@ -51,14 +51,15 @@ def create_elegant_header(): console.print() def setup_dynamodb(endpoint_url=None, region='us-east-1'): - """Set up DynamoDB connection.""" - # Use environment variable if endpoint not provided if endpoint_url is None: endpoint_url = os.environ.get('DYNAMODB_ENDPOINT') - # For local development + if endpoint_url == "": + endpoint_url = None + if endpoint_url: - if 'localhost' in endpoint_url or 'host.docker.internal' in endpoint_url: + local_patterns = ['localhost', 'host.docker.internal', 'dynamodb:'] + if any(pattern in endpoint_url for pattern in local_patterns): os.environ.setdefault('AWS_ACCESS_KEY_ID', 'fakeMyKeyId') os.environ.setdefault('AWS_SECRET_ACCESS_KEY', 'fakeSecretAccessKey') From a4403439fff0a09c76aad8f0b4e567656e93c743 Mon Sep 17 00:00:00 2001 From: tevko Date: Fri, 9 May 2025 16:04:12 -0500 Subject: [PATCH 12/42] fix reference --- delphi/scripts/job_poller.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/delphi/scripts/job_poller.py b/delphi/scripts/job_poller.py index 76c13e40ea..0b27ebe245 100755 --- a/delphi/scripts/job_poller.py +++ b/delphi/scripts/job_poller.py @@ -52,6 +52,10 @@ def __init__(self, endpoint_url=None, region='us-east-1'): # For local development os.environ.setdefault('AWS_ACCESS_KEY_ID', 'fakeMyKeyId') os.environ.setdefault('AWS_SECRET_ACCESS_KEY', 'fakeSecretAccessKey') + + if self.endpoint_url == "": + logger.info("DynamoDB: DYNAMODB_ENDPOINT was an empty string, treating as None for AWS default endpoint.") + self.endpoint_url = None logger.info(f"Connecting to DynamoDB at {self.endpoint_url}") self.dynamodb = boto3.resource('dynamodb', From d9409d344f3af418427d0173b5fd3263aa3aff78 Mon Sep 17 00:00:00 2001 From: tevko Date: Fri, 9 May 2025 16:34:23 -0500 Subject: [PATCH 13/42] add ssl mode hardcoded --- delphi/polismath/database/postgres.py | 4 ++-- scripts/after_install.sh | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/delphi/polismath/database/postgres.py b/delphi/polismath/database/postgres.py index f43c5ad2ed..0112fe75a9 100644 --- a/delphi/polismath/database/postgres.py +++ b/delphi/polismath/database/postgres.py @@ -132,9 +132,9 @@ def get_uri(self) -> str: password_str = f":{self.password}" if self.password else "" # Build URI - uri = f"postgresql://{self.user}{password_str}@{self.host}:{self.port}/{self.database}" + uri = f"postgresql://{self.user}{password_str}@{self.host}:{self.port}/{self.database}?sslmode=require" - # Add SSL mode if needed + # todo - remove ssl mode if local or dev if self.ssl_mode and self.ssl_mode != "prefer": uri += f"?sslmode={self.ssl_mode}" diff --git a/scripts/after_install.sh b/scripts/after_install.sh index 9849482e37..9fa64d0348 100644 --- a/scripts/after_install.sh +++ b/scripts/after_install.sh @@ -71,7 +71,7 @@ DB_PORT=$(aws ssm get-parameter --name "/polis/db-port" --query 'Parameter.Value # --- Construct DATABASE_URL using values from Secrets Manager AND SSM Parameters --- -DATABASE_URL="postgres://${DB_USERNAME}:${DB_PASSWORD}@${DB_HOST}:${DB_PORT}/${DB_NAME}" +DATABASE_URL="postgres://${DB_USERNAME}:${DB_PASSWORD}@${DB_HOST}:${DB_PORT}/${DB_NAME}?sslmode=require" echo "Constructed DATABASE_URL: $DATABASE_URL" # Original logging From bbaf00d1a8f55de7a40004fc13f1240dc4f7376f Mon Sep 17 00:00:00 2001 From: tevko Date: Fri, 9 May 2025 21:22:17 -0500 Subject: [PATCH 14/42] another place to add secure mode --- delphi/umap_narrative/polismath_commentgraph/utils/storage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/delphi/umap_narrative/polismath_commentgraph/utils/storage.py b/delphi/umap_narrative/polismath_commentgraph/utils/storage.py index 187dc9afaa..fc7e9a9336 100644 --- a/delphi/umap_narrative/polismath_commentgraph/utils/storage.py +++ b/delphi/umap_narrative/polismath_commentgraph/utils/storage.py @@ -111,7 +111,7 @@ def get_uri(self) -> str: password_str = f":{self.password}" if self.password else "" # Build URI - uri = f"postgresql://{self.user}{password_str}@{self.host}:{self.port}/{self.database}" + uri = f"postgresql://{self.user}{password_str}@{self.host}:{self.port}/{self.database}?sslmode=require" # Add SSL mode if needed if self.ssl_mode and self.ssl_mode != 'prefer': From ecdae202c7c04d16098b74771677f80936651e12 Mon Sep 17 00:00:00 2001 From: tevko Date: Fri, 9 May 2025 21:36:08 -0500 Subject: [PATCH 15/42] ssl fix --- delphi/polismath/database/postgres.py | 2 -- delphi/umap_narrative/polismath_commentgraph/utils/storage.py | 4 ---- 2 files changed, 6 deletions(-) diff --git a/delphi/polismath/database/postgres.py b/delphi/polismath/database/postgres.py index 0112fe75a9..abe6843ead 100644 --- a/delphi/polismath/database/postgres.py +++ b/delphi/polismath/database/postgres.py @@ -135,8 +135,6 @@ def get_uri(self) -> str: uri = f"postgresql://{self.user}{password_str}@{self.host}:{self.port}/{self.database}?sslmode=require" # todo - remove ssl mode if local or dev - if self.ssl_mode and self.ssl_mode != "prefer": - uri += f"?sslmode={self.ssl_mode}" return uri diff --git a/delphi/umap_narrative/polismath_commentgraph/utils/storage.py b/delphi/umap_narrative/polismath_commentgraph/utils/storage.py index fc7e9a9336..ad8c5d1cd1 100644 --- a/delphi/umap_narrative/polismath_commentgraph/utils/storage.py +++ b/delphi/umap_narrative/polismath_commentgraph/utils/storage.py @@ -113,10 +113,6 @@ def get_uri(self) -> str: # Build URI uri = f"postgresql://{self.user}{password_str}@{self.host}:{self.port}/{self.database}?sslmode=require" - # Add SSL mode if needed - if self.ssl_mode and self.ssl_mode != 'prefer': - uri += f"?sslmode={self.ssl_mode}" - return uri @classmethod From efdce6330a1ed934c4a563d2286623e01bb3339b Mon Sep 17 00:00:00 2001 From: tevko Date: Fri, 9 May 2025 22:17:14 -0500 Subject: [PATCH 16/42] update iam roles --- cdk/iamRoles.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cdk/iamRoles.ts b/cdk/iamRoles.ts index a1e080c812..49802c6aff 100644 --- a/cdk/iamRoles.ts +++ b/cdk/iamRoles.ts @@ -31,7 +31,7 @@ export default (self: Construct) => { region: 'us-east-1', account: cdk.Stack.of(self).account, resource: 'table', - resourceName: 'Delphi_JobQueue', + resourceName: 'Delphi_*', }, cdk.Stack.of(self)); const delphiJobQueueTableIndexesArn = `${delphiJobQueueTableArn}/index/*`; @@ -51,5 +51,6 @@ export default (self: Construct) => { delphiJobQueueTableIndexesArn ], })); + return { instanceRole, codeDeployRole } } \ No newline at end of file From c52c5bf986083937d2ba9cc8792ba5bdb79090ce Mon Sep 17 00:00:00 2001 From: tevko Date: Sat, 10 May 2025 09:35:07 -0500 Subject: [PATCH 17/42] remove hardcoded minio from docker --- delphi/umap_narrative/700_datamapplot_for_layer.py | 6 ------ delphi/umap_narrative/701_static_datamapplot_for_layer.py | 8 -------- docker-compose.yml | 2 +- 3 files changed, 1 insertion(+), 15 deletions(-) diff --git a/delphi/umap_narrative/700_datamapplot_for_layer.py b/delphi/umap_narrative/700_datamapplot_for_layer.py index 7827c41b52..7b5f77d8ad 100644 --- a/delphi/umap_narrative/700_datamapplot_for_layer.py +++ b/delphi/umap_narrative/700_datamapplot_for_layer.py @@ -252,12 +252,6 @@ def setup_environment(db_host=None, db_port=None, db_name=None, db_user=None, db os.environ['AWS_DEFAULT_REGION'] = 'us-east-1' # S3 settings - if not os.environ.get('AWS_S3_ENDPOINT'): - os.environ['AWS_S3_ENDPOINT'] = 'http://localhost:9000' - if not os.environ.get('AWS_S3_ACCESS_KEY_ID'): - os.environ['AWS_S3_ACCESS_KEY_ID'] = 'minioadmin' - if not os.environ.get('AWS_S3_SECRET_ACCESS_KEY'): - os.environ['AWS_S3_SECRET_ACCESS_KEY'] = 'minioadmin' if not os.environ.get('AWS_S3_BUCKET_NAME'): os.environ['AWS_S3_BUCKET_NAME'] = 'delphi' diff --git a/delphi/umap_narrative/701_static_datamapplot_for_layer.py b/delphi/umap_narrative/701_static_datamapplot_for_layer.py index 6c50b358e5..50a5ce2163 100755 --- a/delphi/umap_narrative/701_static_datamapplot_for_layer.py +++ b/delphi/umap_narrative/701_static_datamapplot_for_layer.py @@ -414,14 +414,6 @@ def generate_static_datamapplot(zid, layer_num=0, output_dir=None): os.makedirs(container_dir, exist_ok=True) if os.path.exists("/visualizations"): os.makedirs(host_dir, exist_ok=True) - - # Make sure S3 environment variables are set - if not os.environ.get('AWS_S3_ENDPOINT'): - os.environ['AWS_S3_ENDPOINT'] = 'http://localhost:9000' - if not os.environ.get('AWS_S3_ACCESS_KEY_ID'): - os.environ['AWS_S3_ACCESS_KEY_ID'] = 'minioadmin' - if not os.environ.get('AWS_S3_SECRET_ACCESS_KEY'): - os.environ['AWS_S3_SECRET_ACCESS_KEY'] = 'minioadmin' if not os.environ.get('AWS_S3_BUCKET_NAME'): os.environ['AWS_S3_BUCKET_NAME'] = 'delphi' if not os.environ.get('AWS_REGION'): diff --git a/docker-compose.yml b/docker-compose.yml index b587a5e9e0..d0204aaf62 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -90,7 +90,7 @@ services: - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID:-} - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY:-} # S3 storage for visualization files - - AWS_S3_ENDPOINT=${AWS_S3_ENDPOINT:-http://minio:9000} + - AWS_S3_ENDPOINT=${AWS_S3_ENDPOINT} - AWS_S3_ACCESS_KEY_ID=${AWS_S3_ACCESS_KEY_ID:-minioadmin} - AWS_S3_SECRET_ACCESS_KEY=${AWS_S3_SECRET_ACCESS_KEY:-minioadmin} - AWS_S3_BUCKET_NAME=${AWS_S3_BUCKET_NAME:-delphi} From 8398905af602f8a001e0ab99814d7e4948d0cef4 Mon Sep 17 00:00:00 2001 From: tevko Date: Sat, 10 May 2025 18:19:39 -0500 Subject: [PATCH 18/42] remove s3 check --- delphi/umap_narrative/700_datamapplot_for_layer.py | 4 ---- delphi/umap_narrative/701_static_datamapplot_for_layer.py | 4 ---- 2 files changed, 8 deletions(-) diff --git a/delphi/umap_narrative/700_datamapplot_for_layer.py b/delphi/umap_narrative/700_datamapplot_for_layer.py index 7b5f77d8ad..3750f7f0f1 100644 --- a/delphi/umap_narrative/700_datamapplot_for_layer.py +++ b/delphi/umap_narrative/700_datamapplot_for_layer.py @@ -42,10 +42,6 @@ def s3_upload_file(local_file_path, s3_key): bucket_name = os.environ.get('AWS_S3_BUCKET_NAME') region = os.environ.get('AWS_REGION') - if not all([endpoint_url, access_key, secret_key, bucket_name]): - logger.error("Missing S3 configuration. Cannot upload file.") - return False - try: # Create S3 client s3_client = boto3.client( diff --git a/delphi/umap_narrative/701_static_datamapplot_for_layer.py b/delphi/umap_narrative/701_static_datamapplot_for_layer.py index 50a5ce2163..923e0fd32b 100755 --- a/delphi/umap_narrative/701_static_datamapplot_for_layer.py +++ b/delphi/umap_narrative/701_static_datamapplot_for_layer.py @@ -276,10 +276,6 @@ def s3_upload_file(local_file_path, s3_key): bucket_name = os.environ.get('AWS_S3_BUCKET_NAME') region = os.environ.get('AWS_REGION') - if not all([endpoint_url, access_key, secret_key, bucket_name]): - logger.error("Missing S3 configuration. Cannot upload file.") - return False - try: # Create S3 client s3_client = boto3.client( From 4959ce9ad5af243c478ffa6ddd59054204ea6657 Mon Sep 17 00:00:00 2001 From: tevko Date: Sat, 10 May 2025 20:10:00 -0500 Subject: [PATCH 19/42] change s3 endpoint to none if missingf --- delphi/umap_narrative/700_datamapplot_for_layer.py | 7 +++++-- delphi/umap_narrative/701_static_datamapplot_for_layer.py | 7 +++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/delphi/umap_narrative/700_datamapplot_for_layer.py b/delphi/umap_narrative/700_datamapplot_for_layer.py index 3750f7f0f1..21c0b04597 100644 --- a/delphi/umap_narrative/700_datamapplot_for_layer.py +++ b/delphi/umap_narrative/700_datamapplot_for_layer.py @@ -41,6 +41,9 @@ def s3_upload_file(local_file_path, s3_key): secret_key = os.environ.get('AWS_S3_SECRET_ACCESS_KEY') bucket_name = os.environ.get('AWS_S3_BUCKET_NAME') region = os.environ.get('AWS_REGION') + + if endpoint_url == "": + endpoint_url = None try: # Create S3 client @@ -51,8 +54,8 @@ def s3_upload_file(local_file_path, s3_key): aws_secret_access_key=secret_key, region_name=region, # For MinIO/local development, these settings help - config=boto3.session.Config(signature_version='s3v4'), - verify=False + # config=boto3.session.Config(signature_version='s3v4'), + # verify=False ) # Check if bucket exists, create if it doesn't diff --git a/delphi/umap_narrative/701_static_datamapplot_for_layer.py b/delphi/umap_narrative/701_static_datamapplot_for_layer.py index 923e0fd32b..9765097305 100755 --- a/delphi/umap_narrative/701_static_datamapplot_for_layer.py +++ b/delphi/umap_narrative/701_static_datamapplot_for_layer.py @@ -275,6 +275,9 @@ def s3_upload_file(local_file_path, s3_key): secret_key = os.environ.get('AWS_S3_SECRET_ACCESS_KEY') bucket_name = os.environ.get('AWS_S3_BUCKET_NAME') region = os.environ.get('AWS_REGION') + + if endpoint_url == "": + endpoint_url = None try: # Create S3 client @@ -285,8 +288,8 @@ def s3_upload_file(local_file_path, s3_key): aws_secret_access_key=secret_key, region_name=region, # For MinIO/local development, these settings help - config=boto3.session.Config(signature_version='s3v4'), - verify=False + # config=boto3.session.Config(signature_version='s3v4'), + # verify=False ) # Check if bucket exists, create if it doesn't From c8373482108a44decccee8a161891c945195e7f0 Mon Sep 17 00:00:00 2001 From: tevko Date: Mon, 12 May 2025 17:21:57 -0500 Subject: [PATCH 20/42] more s3 config --- delphi/umap_narrative/700_datamapplot_for_layer.py | 6 ++++++ delphi/umap_narrative/701_static_datamapplot_for_layer.py | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/delphi/umap_narrative/700_datamapplot_for_layer.py b/delphi/umap_narrative/700_datamapplot_for_layer.py index 21c0b04597..55598ed0f1 100644 --- a/delphi/umap_narrative/700_datamapplot_for_layer.py +++ b/delphi/umap_narrative/700_datamapplot_for_layer.py @@ -44,6 +44,12 @@ def s3_upload_file(local_file_path, s3_key): if endpoint_url == "": endpoint_url = None + + if aws_access_key_id == "": + aws_access_key_id = None + + if aws_secret_access_key == "": + aws_secret_access_key = None try: # Create S3 client diff --git a/delphi/umap_narrative/701_static_datamapplot_for_layer.py b/delphi/umap_narrative/701_static_datamapplot_for_layer.py index 9765097305..de651c7469 100755 --- a/delphi/umap_narrative/701_static_datamapplot_for_layer.py +++ b/delphi/umap_narrative/701_static_datamapplot_for_layer.py @@ -278,6 +278,12 @@ def s3_upload_file(local_file_path, s3_key): if endpoint_url == "": endpoint_url = None + + if aws_access_key_id == "": + aws_access_key_id = None + + if aws_secret_access_key == "": + aws_secret_access_key = None try: # Create S3 client From 062eab7c5377e6a2c2fd00a2c4410e900aad465b Mon Sep 17 00:00:00 2001 From: tevko Date: Mon, 12 May 2025 21:20:28 -0500 Subject: [PATCH 21/42] fix indentation --- delphi/umap_narrative/700_datamapplot_for_layer.py | 2 +- delphi/umap_narrative/701_static_datamapplot_for_layer.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/delphi/umap_narrative/700_datamapplot_for_layer.py b/delphi/umap_narrative/700_datamapplot_for_layer.py index 55598ed0f1..b6a18104ee 100644 --- a/delphi/umap_narrative/700_datamapplot_for_layer.py +++ b/delphi/umap_narrative/700_datamapplot_for_layer.py @@ -45,7 +45,7 @@ def s3_upload_file(local_file_path, s3_key): if endpoint_url == "": endpoint_url = None - if aws_access_key_id == "": + if aws_access_key_id == "": aws_access_key_id = None if aws_secret_access_key == "": diff --git a/delphi/umap_narrative/701_static_datamapplot_for_layer.py b/delphi/umap_narrative/701_static_datamapplot_for_layer.py index de651c7469..4d78ac6488 100755 --- a/delphi/umap_narrative/701_static_datamapplot_for_layer.py +++ b/delphi/umap_narrative/701_static_datamapplot_for_layer.py @@ -279,7 +279,7 @@ def s3_upload_file(local_file_path, s3_key): if endpoint_url == "": endpoint_url = None - if aws_access_key_id == "": + if aws_access_key_id == "": aws_access_key_id = None if aws_secret_access_key == "": From be5f941258c8a75d2559aac26dc2e935d8126eaf Mon Sep 17 00:00:00 2001 From: tevko Date: Tue, 13 May 2025 13:07:36 -0500 Subject: [PATCH 22/42] s3 logic fix --- delphi/umap_narrative/700_datamapplot_for_layer.py | 13 +++++++------ .../701_static_datamapplot_for_layer.py | 13 +++++++------ 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/delphi/umap_narrative/700_datamapplot_for_layer.py b/delphi/umap_narrative/700_datamapplot_for_layer.py index b6a18104ee..82346db0e3 100644 --- a/delphi/umap_narrative/700_datamapplot_for_layer.py +++ b/delphi/umap_narrative/700_datamapplot_for_layer.py @@ -45,11 +45,11 @@ def s3_upload_file(local_file_path, s3_key): if endpoint_url == "": endpoint_url = None - if aws_access_key_id == "": - aws_access_key_id = None + if access_key == "": + access_key = None - if aws_secret_access_key == "": - aws_secret_access_key = None + if secret_key == "": + secret_key = None try: # Create S3 client @@ -73,8 +73,9 @@ def s3_upload_file(local_file_path, s3_key): try: # Create the bucket - for MinIO local we don't need LocationConstraint - if region == 'us-east-1' or 'localhost' in endpoint_url or 'minio' in endpoint_url: - s3_client.create_bucket(Bucket=bucket_name) + if endpoint_url: + if region == 'us-east-1' or 'localhost' in endpoint_url or 'minio' in endpoint_url: + s3_client.create_bucket(Bucket=bucket_name) else: s3_client.create_bucket( Bucket=bucket_name, diff --git a/delphi/umap_narrative/701_static_datamapplot_for_layer.py b/delphi/umap_narrative/701_static_datamapplot_for_layer.py index 4d78ac6488..77022d743c 100755 --- a/delphi/umap_narrative/701_static_datamapplot_for_layer.py +++ b/delphi/umap_narrative/701_static_datamapplot_for_layer.py @@ -279,11 +279,11 @@ def s3_upload_file(local_file_path, s3_key): if endpoint_url == "": endpoint_url = None - if aws_access_key_id == "": - aws_access_key_id = None + if access_key == "": + access_key = None - if aws_secret_access_key == "": - aws_secret_access_key = None + if secret_key == "": + secret_key = None try: # Create S3 client @@ -307,8 +307,9 @@ def s3_upload_file(local_file_path, s3_key): try: # Create the bucket - for MinIO local we don't need LocationConstraint - if region == 'us-east-1' or 'localhost' in endpoint_url or 'minio' in endpoint_url: - s3_client.create_bucket(Bucket=bucket_name) + if endpoint_url: + if region == 'us-east-1' or 'localhost' in endpoint_url or 'minio' in endpoint_url: + s3_client.create_bucket(Bucket=bucket_name) else: s3_client.create_bucket( Bucket=bucket_name, From a197a4371e2947933bce2ddbf9edf5fdd8b0e4e0 Mon Sep 17 00:00:00 2001 From: tevko Date: Tue, 13 May 2025 14:52:29 -0500 Subject: [PATCH 23/42] indentation fix --- delphi/umap_narrative/700_datamapplot_for_layer.py | 2 +- delphi/umap_narrative/701_static_datamapplot_for_layer.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/delphi/umap_narrative/700_datamapplot_for_layer.py b/delphi/umap_narrative/700_datamapplot_for_layer.py index 82346db0e3..c0689df6a8 100644 --- a/delphi/umap_narrative/700_datamapplot_for_layer.py +++ b/delphi/umap_narrative/700_datamapplot_for_layer.py @@ -45,7 +45,7 @@ def s3_upload_file(local_file_path, s3_key): if endpoint_url == "": endpoint_url = None - if access_key == "": + if access_key == "": access_key = None if secret_key == "": diff --git a/delphi/umap_narrative/701_static_datamapplot_for_layer.py b/delphi/umap_narrative/701_static_datamapplot_for_layer.py index 77022d743c..bb3ebe260f 100755 --- a/delphi/umap_narrative/701_static_datamapplot_for_layer.py +++ b/delphi/umap_narrative/701_static_datamapplot_for_layer.py @@ -279,7 +279,7 @@ def s3_upload_file(local_file_path, s3_key): if endpoint_url == "": endpoint_url = None - if access_key == "": + if access_key == "": access_key = None if secret_key == "": From b5aee63d61d5b0849ed57bea7a21d0eb4d6512bd Mon Sep 17 00:00:00 2001 From: tevko Date: Tue, 13 May 2025 15:39:39 -0500 Subject: [PATCH 24/42] remove env check --- delphi/umap_narrative/700_datamapplot_for_layer.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/delphi/umap_narrative/700_datamapplot_for_layer.py b/delphi/umap_narrative/700_datamapplot_for_layer.py index c0689df6a8..67fd3dc4a9 100644 --- a/delphi/umap_narrative/700_datamapplot_for_layer.py +++ b/delphi/umap_narrative/700_datamapplot_for_layer.py @@ -250,10 +250,6 @@ def setup_environment(db_host=None, db_port=None, db_name=None, db_user=None, db # Log the endpoint being used endpoint = os.environ.get('DYNAMODB_ENDPOINT') logger.info(f"Using DynamoDB endpoint: {endpoint}") - if not os.environ.get('AWS_ACCESS_KEY_ID'): - os.environ['AWS_ACCESS_KEY_ID'] = 'fakeMyKeyId' - if not os.environ.get('AWS_SECRET_ACCESS_KEY'): - os.environ['AWS_SECRET_ACCESS_KEY'] = 'fakeSecretAccessKey' if not os.environ.get('AWS_DEFAULT_REGION'): os.environ['AWS_DEFAULT_REGION'] = 'us-east-1' From 279c34bf954c9f006b98c57cecc15d7837a5c517 Mon Sep 17 00:00:00 2001 From: tevko Date: Tue, 13 May 2025 16:47:46 -0500 Subject: [PATCH 25/42] comment out s3 creds --- delphi/umap_narrative/700_datamapplot_for_layer.py | 6 +++--- delphi/umap_narrative/701_static_datamapplot_for_layer.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/delphi/umap_narrative/700_datamapplot_for_layer.py b/delphi/umap_narrative/700_datamapplot_for_layer.py index 67fd3dc4a9..0ae34eebc8 100644 --- a/delphi/umap_narrative/700_datamapplot_for_layer.py +++ b/delphi/umap_narrative/700_datamapplot_for_layer.py @@ -55,9 +55,9 @@ def s3_upload_file(local_file_path, s3_key): # Create S3 client s3_client = boto3.client( 's3', - endpoint_url=endpoint_url, - aws_access_key_id=access_key, - aws_secret_access_key=secret_key, + # endpoint_url=endpoint_url, + # aws_access_key_id=access_key, + # aws_secret_access_key=secret_key, region_name=region, # For MinIO/local development, these settings help # config=boto3.session.Config(signature_version='s3v4'), diff --git a/delphi/umap_narrative/701_static_datamapplot_for_layer.py b/delphi/umap_narrative/701_static_datamapplot_for_layer.py index bb3ebe260f..2789224ca5 100755 --- a/delphi/umap_narrative/701_static_datamapplot_for_layer.py +++ b/delphi/umap_narrative/701_static_datamapplot_for_layer.py @@ -289,9 +289,9 @@ def s3_upload_file(local_file_path, s3_key): # Create S3 client s3_client = boto3.client( 's3', - endpoint_url=endpoint_url, - aws_access_key_id=access_key, - aws_secret_access_key=secret_key, + # endpoint_url=endpoint_url, + # aws_access_key_id=access_key, + # aws_secret_access_key=secret_key, region_name=region, # For MinIO/local development, these settings help # config=boto3.session.Config(signature_version='s3v4'), From 5852ea514849cf9a12e66dabd8d617517ee14fab Mon Sep 17 00:00:00 2001 From: tevko Date: Tue, 13 May 2025 17:08:53 -0500 Subject: [PATCH 26/42] remove location constraint --- delphi/umap_narrative/700_datamapplot_for_layer.py | 2 +- delphi/umap_narrative/701_static_datamapplot_for_layer.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/delphi/umap_narrative/700_datamapplot_for_layer.py b/delphi/umap_narrative/700_datamapplot_for_layer.py index 0ae34eebc8..86ba5e91fb 100644 --- a/delphi/umap_narrative/700_datamapplot_for_layer.py +++ b/delphi/umap_narrative/700_datamapplot_for_layer.py @@ -79,7 +79,7 @@ def s3_upload_file(local_file_path, s3_key): else: s3_client.create_bucket( Bucket=bucket_name, - CreateBucketConfiguration={'LocationConstraint': region} + # CreateBucketConfiguration={'LocationConstraint': region} - not in us-east-1 - but in other regions ) # Apply bucket policy to make objects public-read diff --git a/delphi/umap_narrative/701_static_datamapplot_for_layer.py b/delphi/umap_narrative/701_static_datamapplot_for_layer.py index 2789224ca5..688172643b 100755 --- a/delphi/umap_narrative/701_static_datamapplot_for_layer.py +++ b/delphi/umap_narrative/701_static_datamapplot_for_layer.py @@ -313,7 +313,7 @@ def s3_upload_file(local_file_path, s3_key): else: s3_client.create_bucket( Bucket=bucket_name, - CreateBucketConfiguration={'LocationConstraint': region} + # CreateBucketConfiguration={'LocationConstraint': region} - not in us-east-1 - but in other regions ) # Apply bucket policy to make objects public-read From bdafb6b8f00ba4e236bbb3fe948b32486bdcbc3f Mon Sep 17 00:00:00 2001 From: tevko Date: Tue, 13 May 2025 17:26:47 -0500 Subject: [PATCH 27/42] unique bucket name --- delphi/umap_narrative/700_datamapplot_for_layer.py | 2 +- delphi/umap_narrative/701_static_datamapplot_for_layer.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/delphi/umap_narrative/700_datamapplot_for_layer.py b/delphi/umap_narrative/700_datamapplot_for_layer.py index 86ba5e91fb..02685438fa 100644 --- a/delphi/umap_narrative/700_datamapplot_for_layer.py +++ b/delphi/umap_narrative/700_datamapplot_for_layer.py @@ -255,7 +255,7 @@ def setup_environment(db_host=None, db_port=None, db_name=None, db_user=None, db # S3 settings if not os.environ.get('AWS_S3_BUCKET_NAME'): - os.environ['AWS_S3_BUCKET_NAME'] = 'delphi' + os.environ['AWS_S3_BUCKET_NAME'] = 'polis-delphi' logger.info(f"S3 Storage settings:") logger.info(f"- Endpoint: {os.environ.get('AWS_S3_ENDPOINT')}") diff --git a/delphi/umap_narrative/701_static_datamapplot_for_layer.py b/delphi/umap_narrative/701_static_datamapplot_for_layer.py index 688172643b..7a1f7e6d49 100755 --- a/delphi/umap_narrative/701_static_datamapplot_for_layer.py +++ b/delphi/umap_narrative/701_static_datamapplot_for_layer.py @@ -421,7 +421,7 @@ def generate_static_datamapplot(zid, layer_num=0, output_dir=None): if os.path.exists("/visualizations"): os.makedirs(host_dir, exist_ok=True) if not os.environ.get('AWS_S3_BUCKET_NAME'): - os.environ['AWS_S3_BUCKET_NAME'] = 'delphi' + os.environ['AWS_S3_BUCKET_NAME'] = 'polis-delphi' if not os.environ.get('AWS_REGION'): os.environ['AWS_REGION'] = 'us-east-1' From 8571b5e462d7b1d7d669fa4b569f3bbd576eaea3 Mon Sep 17 00:00:00 2001 From: tevko Date: Tue, 13 May 2025 18:01:20 -0500 Subject: [PATCH 28/42] remove public read on aws --- delphi/scripts/job_poller.py | 2 +- delphi/umap_narrative/700_datamapplot_for_layer.py | 6 +++--- delphi/umap_narrative/701_static_datamapplot_for_layer.py | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/delphi/scripts/job_poller.py b/delphi/scripts/job_poller.py index 0b27ebe245..8c19055254 100755 --- a/delphi/scripts/job_poller.py +++ b/delphi/scripts/job_poller.py @@ -505,7 +505,7 @@ def process_job(self, job): 'visualization_path': f'visualizations/{report_id}/{job_id}', 'report_id': report_id, 'visualization_urls': { - 'interactive': f"{os.environ.get('AWS_S3_ENDPOINT', '')}/{os.environ.get('AWS_S3_BUCKET_NAME', 'delphi')}/visualizations/{report_id}/{job_id}/layer_0_datamapplot.html" + 'interactive': f"{os.environ.get('AWS_S3_ENDPOINT', '')}/{os.environ.get('AWS_S3_BUCKET_NAME', 'polis-delphi')}/visualizations/{report_id}/{job_id}/layer_0_datamapplot.html" } } diff --git a/delphi/umap_narrative/700_datamapplot_for_layer.py b/delphi/umap_narrative/700_datamapplot_for_layer.py index 02685438fa..c4abad1d4c 100644 --- a/delphi/umap_narrative/700_datamapplot_for_layer.py +++ b/delphi/umap_narrative/700_datamapplot_for_layer.py @@ -114,9 +114,9 @@ def s3_upload_file(local_file_path, s3_key): logger.info(f"Uploading {local_file_path} to s3://{bucket_name}/{s3_key}") # For HTML files, set content type correctly - extra_args = { - 'ACL': 'public-read' # Make object publicly readable - } + # extra_args = { + # 'ACL': 'public-read' # Make object publicly readable - we don't want this, hence why we have signed urls + # } # Set the correct content type based on file extension if local_file_path.endswith('.html'): diff --git a/delphi/umap_narrative/701_static_datamapplot_for_layer.py b/delphi/umap_narrative/701_static_datamapplot_for_layer.py index 7a1f7e6d49..02ae19744c 100755 --- a/delphi/umap_narrative/701_static_datamapplot_for_layer.py +++ b/delphi/umap_narrative/701_static_datamapplot_for_layer.py @@ -348,9 +348,9 @@ def s3_upload_file(local_file_path, s3_key): logger.info(f"Uploading {local_file_path} to s3://{bucket_name}/{s3_key}") # For HTML files, set content type correctly - extra_args = { - 'ACL': 'public-read' # Make object publicly readable - } + # extra_args = { + # 'ACL': 'public-read' # Make object publicly readable - probably don't want this + # } # Set the correct content type based on file extension if local_file_path.endswith('.html'): From 7f8291fa30ee22276c821ae8afd4c5afe8ff676e Mon Sep 17 00:00:00 2001 From: tevko Date: Tue, 13 May 2025 18:05:46 -0500 Subject: [PATCH 29/42] fix comment --- delphi/umap_narrative/700_datamapplot_for_layer.py | 4 ++-- delphi/umap_narrative/701_static_datamapplot_for_layer.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/delphi/umap_narrative/700_datamapplot_for_layer.py b/delphi/umap_narrative/700_datamapplot_for_layer.py index c4abad1d4c..756418b398 100644 --- a/delphi/umap_narrative/700_datamapplot_for_layer.py +++ b/delphi/umap_narrative/700_datamapplot_for_layer.py @@ -114,9 +114,9 @@ def s3_upload_file(local_file_path, s3_key): logger.info(f"Uploading {local_file_path} to s3://{bucket_name}/{s3_key}") # For HTML files, set content type correctly - # extra_args = { + extra_args = { # 'ACL': 'public-read' # Make object publicly readable - we don't want this, hence why we have signed urls - # } + } # Set the correct content type based on file extension if local_file_path.endswith('.html'): diff --git a/delphi/umap_narrative/701_static_datamapplot_for_layer.py b/delphi/umap_narrative/701_static_datamapplot_for_layer.py index 02ae19744c..3142a48ffc 100755 --- a/delphi/umap_narrative/701_static_datamapplot_for_layer.py +++ b/delphi/umap_narrative/701_static_datamapplot_for_layer.py @@ -348,9 +348,9 @@ def s3_upload_file(local_file_path, s3_key): logger.info(f"Uploading {local_file_path} to s3://{bucket_name}/{s3_key}") # For HTML files, set content type correctly - # extra_args = { + extra_args = { # 'ACL': 'public-read' # Make object publicly readable - probably don't want this - # } + } # Set the correct content type based on file extension if local_file_path.endswith('.html'): From 99f6d12d47eaefd3ce38594587c19cc58754b0dc Mon Sep 17 00:00:00 2001 From: tevko Date: Tue, 13 May 2025 18:17:42 -0500 Subject: [PATCH 30/42] endpoint urls in s3 --- .../700_datamapplot_for_layer.py | 39 +++++++++++-------- .../701_static_datamapplot_for_layer.py | 39 +++++++++++-------- 2 files changed, 44 insertions(+), 34 deletions(-) diff --git a/delphi/umap_narrative/700_datamapplot_for_layer.py b/delphi/umap_narrative/700_datamapplot_for_layer.py index 756418b398..52e16e2820 100644 --- a/delphi/umap_narrative/700_datamapplot_for_layer.py +++ b/delphi/umap_narrative/700_datamapplot_for_layer.py @@ -132,25 +132,30 @@ def s3_upload_file(local_file_path, s3_key): s3_key, ExtraArgs=extra_args ) + + if endpoint_url: - # Generate a URL for the uploaded file - if endpoint_url.startswith('http://localhost') or endpoint_url.startswith('http://127.0.0.1'): - # For local development with MinIO - url = f"{endpoint_url}/{bucket_name}/{s3_key}" - # Clean up URL if needed - url = url.replace('///', '//') - elif 'minio' in endpoint_url: - # For Docker container access to MinIO - url = f"{endpoint_url}/{bucket_name}/{s3_key}" - url = url.replace('///', '//') - else: - # For AWS S3 - if endpoint_url.startswith('https://s3.'): - # Standard AWS S3 endpoint - url = f"https://{bucket_name}.s3.amazonaws.com/{s3_key}" - else: - # Custom S3 endpoint + # Generate a URL for the uploaded file + if endpoint_url.startswith('http://localhost') or endpoint_url.startswith('http://127.0.0.1'): + # For local development with MinIO + url = f"{endpoint_url}/{bucket_name}/{s3_key}" + # Clean up URL if needed + url = url.replace('///', '//') + elif 'minio' in endpoint_url: + # For Docker container access to MinIO url = f"{endpoint_url}/{bucket_name}/{s3_key}" + url = url.replace('///', '//') + else: + # For AWS S3 + if endpoint_url.startswith('https://s3.'): + # Standard AWS S3 endpoint + url = f"https://{bucket_name}.s3.amazonaws.com/{s3_key}" + else: + # Custom S3 endpoint + url = f"{endpoint_url}/{bucket_name}/{s3_key}" + else: + # Custom S3 endpoint + url = f"{bucket_name}/{s3_key}" logger.info(f"File uploaded successfully to {url}") return url diff --git a/delphi/umap_narrative/701_static_datamapplot_for_layer.py b/delphi/umap_narrative/701_static_datamapplot_for_layer.py index 3142a48ffc..50279ecc9d 100755 --- a/delphi/umap_narrative/701_static_datamapplot_for_layer.py +++ b/delphi/umap_narrative/701_static_datamapplot_for_layer.py @@ -367,24 +367,29 @@ def s3_upload_file(local_file_path, s3_key): ExtraArgs=extra_args ) - # Generate a URL for the uploaded file - if endpoint_url.startswith('http://localhost') or endpoint_url.startswith('http://127.0.0.1'): - # For local development with MinIO - url = f"{endpoint_url}/{bucket_name}/{s3_key}" - # Clean up URL if needed - url = url.replace('///', '//') - elif 'minio' in endpoint_url: - # For Docker container access to MinIO - url = f"{endpoint_url}/{bucket_name}/{s3_key}" - url = url.replace('///', '//') - else: - # For AWS S3 - if endpoint_url.startswith('https://s3.'): - # Standard AWS S3 endpoint - url = f"https://{bucket_name}.s3.amazonaws.com/{s3_key}" - else: - # Custom S3 endpoint + if endpoint_url: + + # Generate a URL for the uploaded file + if endpoint_url.startswith('http://localhost') or endpoint_url.startswith('http://127.0.0.1'): + # For local development with MinIO + url = f"{endpoint_url}/{bucket_name}/{s3_key}" + # Clean up URL if needed + url = url.replace('///', '//') + elif 'minio' in endpoint_url: + # For Docker container access to MinIO url = f"{endpoint_url}/{bucket_name}/{s3_key}" + url = url.replace('///', '//') + else: + # For AWS S3 + if endpoint_url.startswith('https://s3.'): + # Standard AWS S3 endpoint + url = f"https://{bucket_name}.s3.amazonaws.com/{s3_key}" + else: + # Custom S3 endpoint + url = f"{endpoint_url}/{bucket_name}/{s3_key}" + else: + # Custom S3 endpoint + url = f"{bucket_name}/{s3_key}" logger.info(f"File uploaded successfully to {url}") return url From 0fb27b7f4331188e7faf53b6913c451bff050f41 Mon Sep 17 00:00:00 2001 From: tevko Date: Tue, 13 May 2025 18:24:04 -0500 Subject: [PATCH 31/42] remove another dynamo default --- delphi/umap_narrative/702_consensus_divisive_datamapplot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/delphi/umap_narrative/702_consensus_divisive_datamapplot.py b/delphi/umap_narrative/702_consensus_divisive_datamapplot.py index 039dc7d414..e67903db21 100755 --- a/delphi/umap_narrative/702_consensus_divisive_datamapplot.py +++ b/delphi/umap_narrative/702_consensus_divisive_datamapplot.py @@ -100,7 +100,7 @@ def load_data_from_dynamodb(zid, layer_num=0): logger.info(f'Loading UMAP positions and cluster data for conversation {zid}, layer {layer_num}') # Set up DynamoDB client - endpoint_url = os.environ.get('DYNAMODB_ENDPOINT', 'http://dynamodb-local:8000') + endpoint_url = os.environ.get('DYNAMODB_ENDPOINT') dynamodb = boto3.resource('dynamodb', endpoint_url=endpoint_url, region_name=os.environ.get('AWS_REGION', 'us-east-1'), From dd1f063ff3e5d502f78643d826749055a7443b49 Mon Sep 17 00:00:00 2001 From: tevko Date: Wed, 14 May 2025 14:58:55 -0500 Subject: [PATCH 32/42] more endpoint config --- .../umap_narrative/702_consensus_divisive_datamapplot.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/delphi/umap_narrative/702_consensus_divisive_datamapplot.py b/delphi/umap_narrative/702_consensus_divisive_datamapplot.py index e67903db21..e09d9bc616 100755 --- a/delphi/umap_narrative/702_consensus_divisive_datamapplot.py +++ b/delphi/umap_narrative/702_consensus_divisive_datamapplot.py @@ -33,8 +33,8 @@ DYNAMODB_CONFIG = { 'endpoint_url': os.environ.get('DYNAMODB_ENDPOINT'), 'region': os.environ.get('AWS_REGION', 'us-east-1'), - 'access_key': os.environ.get('AWS_ACCESS_KEY_ID', 'fakeMyKeyId'), - 'secret_key': os.environ.get('AWS_SECRET_ACCESS_KEY', 'fakeSecretAccessKey') + 'access_key': os.environ.get('AWS_ACCESS_KEY_ID', None), + 'secret_key': os.environ.get('AWS_SECRET_ACCESS_KEY', None) } # Visualization settings - controls the extremity scale and color mapping @@ -59,7 +59,10 @@ # Define minimal versions of the required classes if imports fail class DynamoDBStorage: def __init__(self, endpoint_url=None): - self.endpoint_url = endpoint_url or DYNAMODB_CONFIG['endpoint_url'] + if endpoint_url: # Checks if endpoint_url is a truthy value (not None, not empty string) + self.endpoint_url = endpoint_url + else: + self.endpoint_url = None self.region = DYNAMODB_CONFIG['region'] self.dynamodb = boto3.resource('dynamodb', endpoint_url=self.endpoint_url, From cf247a30fb71f6dc08b2fc3f99d6837b54feb85e Mon Sep 17 00:00:00 2001 From: tevko Date: Fri, 16 May 2025 14:48:27 -0500 Subject: [PATCH 33/42] lint and test cleanup --- .github/workflows/cypress-tests.yml | 2 +- scripts/application_stop.sh | 11 ----------- scripts/before_install.sh | 3 +++ server/app.ts | 5 ----- server/src/config.ts | 4 ++++ server/src/routes/delphi.ts | 7 ++++--- server/src/routes/delphi/batchReports.ts | 9 +++++---- 7 files changed, 17 insertions(+), 24 deletions(-) diff --git a/.github/workflows/cypress-tests.yml b/.github/workflows/cypress-tests.yml index 7846d23905..b9936a8ba3 100644 --- a/.github/workflows/cypress-tests.yml +++ b/.github/workflows/cypress-tests.yml @@ -27,7 +27,7 @@ jobs: - name: Build and start Docker containers run: | - docker compose -f docker-compose.yml -f docker-compose.test.yml --env-file test.env --profile postgres up -d --build + docker compose -f docker-compose.yml -f docker-compose.test.yml --env-file test.env --profile postgres --profile local-services up -d --build - name: Health Check the Server http response uses: jtalk/url-health-check-action@v4 diff --git a/scripts/application_stop.sh b/scripts/application_stop.sh index 847502cc3a..e107dd744e 100644 --- a/scripts/application_stop.sh +++ b/scripts/application_stop.sh @@ -56,17 +56,6 @@ if [ -d "$DEPLOY_DIR" ]; then echo "Stopping delphi service..." /usr/local/bin/docker-compose stop delphi || echo "Warning: Failed to stop delphi service, might already be stopped." - elif [ "$SERVICE_TYPE" == "ollama" ]; then - echo "Stopping standalone ollama container..." - # Ollama runs via `docker run`, not compose on this instance type - # Check if the container exists before trying to stop it - if docker ps -q --filter name=^/ollama$ | grep -q .; then - docker stop ollama || echo "Warning: Failed to stop ollama container, might already be stopped." - else - echo "Ollama container 'ollama' not found or not running." - fi - # No docker-compose actions needed for the ollama instance type - else echo "Warning: Unknown service type '$SERVICE_TYPE' found in $SERVICE_TYPE_FILE. No specific services stopped." # Avoid running a generic 'down' as it might affect unrelated containers if any exist diff --git a/scripts/before_install.sh b/scripts/before_install.sh index 26a04b7849..d471403864 100644 --- a/scripts/before_install.sh +++ b/scripts/before_install.sh @@ -8,4 +8,7 @@ if docker ps -q --filter "name=polis-server" | grep -q .; then fi if docker ps -q --filter "name=polis-math" | grep -q .; then docker stop polis-math-1 +fi +if docker ps -q --filter "name=polis-delphi" | grep -q .; then + docker stop polis-delphi-1 fi \ No newline at end of file diff --git a/server/app.ts b/server/app.ts index 9a6612b32b..a2cad9db23 100644 --- a/server/app.ts +++ b/server/app.ts @@ -791,7 +791,6 @@ helpersInitialized.then( try { handle_POST_delphi_jobs(req, res); } catch (err) { - console.error("Error in delphi jobs creation route:", err); res.json({ status: "error", message: "Internal server error in job creation endpoint", @@ -805,7 +804,6 @@ helpersInitialized.then( try { handle_GET_delphi_reports(req, res); } catch (err) { - console.error("Error in delphi reports route:", err); res.json({ status: "error", message: "Internal server error in reports endpoint", @@ -821,7 +819,6 @@ helpersInitialized.then( try { handle_GET_delphi_visualizations(req, res); } catch (err) { - console.error("Error in delphi visualizations route:", err); res.json({ status: "error", message: "Internal server error in visualizations endpoint", @@ -835,7 +832,6 @@ helpersInitialized.then( try { handle_POST_delphi_batch_reports(req, res); } catch (err) { - console.error("Error in delphi batch reports route:", err); res.json({ status: "error", message: "Internal server error in batch reports endpoint", @@ -1586,7 +1582,6 @@ helpersInitialized.then( app.get( /^\/commentsReport\/r?[0-9][0-9A-Za-z]+(\/.*)?/, function(req, res, next) { - console.log("ROUTE DEBUG: CommentsReport route matched!"); return fetchIndexForReportPage(req, res, next); } ); diff --git a/server/src/config.ts b/server/src/config.ts index 96c7182c46..df9bc8ae21 100644 --- a/server/src/config.ts +++ b/server/src/config.ts @@ -115,6 +115,10 @@ export default { useNetworkHost: isTrue(process.env.USE_NETWORK_HOST), webserverPass: process.env.WEBSERVER_PASS as string, webserverUsername: process.env.WEBSERVER_USERNAME as string, + DYNAMODB_ENDPOINT: process.env.DYNAMODB_ENDPOINT, + AWS_REGION: process.env.AWS_REGION, + AWS_ACCESS_KEY_ID: process.env.AWS_ACCESS_KEY_ID, + AWS_SECRET_ACCESS_KEY: process.env.AWS_SECRET_ACCESS_KEY, whitelistItems: [ process.env.DOMAIN_WHITELIST_ITEM_01 || null, diff --git a/server/src/routes/delphi.ts b/server/src/routes/delphi.ts index bbf4fe233f..5e55bf9202 100644 --- a/server/src/routes/delphi.ts +++ b/server/src/routes/delphi.ts @@ -3,6 +3,7 @@ import logger from "../utils/logger"; import { DynamoDBClient, ListTablesCommand } from "@aws-sdk/client-dynamodb"; import { DynamoDBDocumentClient, QueryCommand } from "@aws-sdk/lib-dynamodb"; import { getZidFromReport } from "../utils/parameter"; +import Config from "../config"; /** * Handler for Delphi API route that retrieves LLM topic names from DynamoDB @@ -38,7 +39,7 @@ export function handle_GET_delphi(req: Request, res: Response) { // Force using local DynamoDB by hardcoding the endpoint const dynamoDBConfig: any = { - region: process.env.AWS_REGION || "us-east-1", + region: Config.AWS_REGION || "us-east-1", }; // Log what we're using @@ -56,9 +57,9 @@ export function handle_GET_delphi(req: Request, res: Response) { logger.info(`DynamoDB Config: Region: ${dynamoDBConfig.region} Endpoint: ${dynamoDBConfig.endpoint || "Default AWS endpoint"} - AWS_ACCESS_KEY_ID: ${process.env.AWS_ACCESS_KEY_ID ? "Set" : "Not set"} + AWS_ACCESS_KEY_ID: ${Config.AWS_ACCESS_KEY_ID ? "Set" : "Not set"} AWS_SECRET_ACCESS_KEY: ${ - process.env.AWS_SECRET_ACCESS_KEY ? "Set" : "Not set" + Config.AWS_SECRET_ACCESS_KEY ? "Set" : "Not set" } `); diff --git a/server/src/routes/delphi/batchReports.ts b/server/src/routes/delphi/batchReports.ts index d682f53a9b..e3ab3b2c93 100644 --- a/server/src/routes/delphi/batchReports.ts +++ b/server/src/routes/delphi/batchReports.ts @@ -4,14 +4,15 @@ import { DynamoDB } from '@aws-sdk/client-dynamodb'; import { DynamoDBDocument } from '@aws-sdk/lib-dynamodb'; import logger from "../../utils/logger"; import { getZidFromReport } from "../../utils/parameter"; +import Config from "../../config"; // Initialize DynamoDB client const dynamoDbClient = new DynamoDB({ - endpoint: process.env.DYNAMODB_ENDPOINT || 'http://dynamodb:8000', - region: process.env.AWS_REGION || 'us-west-2', + endpoint: Config.DYNAMODB_ENDPOINT || 'http://dynamodb:8000', + region: Config.AWS_REGION || 'us-west-2', credentials: { - accessKeyId: process.env.AWS_ACCESS_KEY_ID || 'DUMMYIDEXAMPLE', - secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY || 'DUMMYEXAMPLEKEY', + accessKeyId: Config.AWS_ACCESS_KEY_ID || 'DUMMYIDEXAMPLE', + secretAccessKey: Config.AWS_SECRET_ACCESS_KEY || 'DUMMYEXAMPLEKEY', }, }); From 7ebda5ca84b58211cfb6cd159a0a8975c3ee6c3f Mon Sep 17 00:00:00 2001 From: tevko Date: Fri, 16 May 2025 15:16:59 -0500 Subject: [PATCH 34/42] more lint fix --- server/src/config.ts | 4 ++++ server/src/routes/delphi/jobs.ts | 11 ++++++----- server/src/routes/delphi/reports.ts | 3 ++- server/src/routes/delphi/topics.ts | 7 ++++--- server/src/routes/delphi/visualizations.ts | 17 +++++++++-------- 5 files changed, 25 insertions(+), 17 deletions(-) diff --git a/server/src/config.ts b/server/src/config.ts index df9bc8ae21..b9804641b2 100644 --- a/server/src/config.ts +++ b/server/src/config.ts @@ -119,6 +119,10 @@ export default { AWS_REGION: process.env.AWS_REGION, AWS_ACCESS_KEY_ID: process.env.AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY: process.env.AWS_SECRET_ACCESS_KEY, + AWS_S3_ENDPOINT: process.env.AWS_S3_ENDPOINT, + AWS_S3_ACCESS_KEY_ID: process.env.AWS_S3_ACCESS_KEY_ID, + AWS_S3_SECRET_ACCESS_KEY: process.env.AWS_S3_SECRET_ACCESS_KEY, + AWS_S3_BUCKET_NAME: process.env.AWS_S3_BUCKET_NAME, whitelistItems: [ process.env.DOMAIN_WHITELIST_ITEM_01 || null, diff --git a/server/src/routes/delphi/jobs.ts b/server/src/routes/delphi/jobs.ts index 773dd012c2..92ba1208c8 100644 --- a/server/src/routes/delphi/jobs.ts +++ b/server/src/routes/delphi/jobs.ts @@ -4,16 +4,17 @@ import { DynamoDB } from "@aws-sdk/client-dynamodb"; import { DynamoDBDocument } from "@aws-sdk/lib-dynamodb"; import logger from "../../utils/logger"; import { getZidFromReport } from "../../utils/parameter"; +import Config from "../../config"; // Initialize DynamoDB client const dynamoDbClient = new DynamoDB({ // Use environment variables for endpoint and region, or the docker service name - endpoint: process.env.DYNAMODB_ENDPOINT, - region: process.env.AWS_REGION || "us-east-1", + endpoint: Config.DYNAMODB_ENDPOINT, + region: Config.AWS_REGION || "us-east-1", // For local development or Docker container credentials: { - accessKeyId: process.env.AWS_ACCESS_KEY_ID || "DUMMYIDEXAMPLE", - secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY || "DUMMYEXAMPLEKEY", + accessKeyId: Config.AWS_ACCESS_KEY_ID || "DUMMYIDEXAMPLE", + secretAccessKey: Config.AWS_SECRET_ACCESS_KEY || "DUMMYEXAMPLEKEY", }, }); @@ -195,7 +196,7 @@ export async function handle_POST_delphi_jobs( ? (error as any).code : undefined, details: - process.env.NODE_ENV === "development" ? String(error) : undefined, + Config.NODE_ENV === "development" ? String(error) : undefined, }); } } diff --git a/server/src/routes/delphi/reports.ts b/server/src/routes/delphi/reports.ts index af63ba5ee4..70268f9143 100644 --- a/server/src/routes/delphi/reports.ts +++ b/server/src/routes/delphi/reports.ts @@ -3,6 +3,7 @@ import logger from "../../utils/logger"; import { DynamoDBClient } from "@aws-sdk/client-dynamodb"; import { DynamoDBDocumentClient, ScanCommand } from "@aws-sdk/lib-dynamodb"; import { getZidFromReport } from "../../utils/parameter"; +import Config from "../../config"; /** * Handler for Delphi API route that retrieves LLM-generated reports from DynamoDB @@ -36,7 +37,7 @@ export async function handle_GET_delphi_reports(req: Request, res: Response) { // Force using local DynamoDB by hardcoding the endpoint const dynamoDBConfig: any = { - region: process.env.AWS_REGION || "us-east-1", + region: Config.AWS_REGION || "us-east-1", // Force to use the local DynamoDB endpoint endpoint: "http://dynamodb:8000" }; diff --git a/server/src/routes/delphi/topics.ts b/server/src/routes/delphi/topics.ts index f5e45abcfb..5063379c0a 100644 --- a/server/src/routes/delphi/topics.ts +++ b/server/src/routes/delphi/topics.ts @@ -3,6 +3,7 @@ import logger from "../../utils/logger"; import { DynamoDBClient, ListTablesCommand } from "@aws-sdk/client-dynamodb"; import { DynamoDBDocumentClient, QueryCommand } from "@aws-sdk/lib-dynamodb"; import { getZidFromReport } from "../../utils/parameter"; +import Config from "../../config"; /** * Handler for Delphi API route that retrieves LLM topic names from DynamoDB @@ -38,7 +39,7 @@ export function handle_GET_delphi(req: Request, res: Response) { // Force using local DynamoDB by hardcoding the endpoint const dynamoDBConfig: any = { - region: process.env.AWS_REGION || "us-east-1", + region: Config.AWS_REGION || "us-east-1", }; // Log what we're using @@ -56,9 +57,9 @@ export function handle_GET_delphi(req: Request, res: Response) { logger.info(`DynamoDB Config: Region: ${dynamoDBConfig.region} Endpoint: ${dynamoDBConfig.endpoint || "Default AWS endpoint"} - AWS_ACCESS_KEY_ID: ${process.env.AWS_ACCESS_KEY_ID ? "Set" : "Not set"} + AWS_ACCESS_KEY_ID: ${Config.AWS_ACCESS_KEY_ID ? "Set" : "Not set"} AWS_SECRET_ACCESS_KEY: ${ - process.env.AWS_SECRET_ACCESS_KEY ? "Set" : "Not set" + Config.AWS_SECRET_ACCESS_KEY ? "Set" : "Not set" } `); diff --git a/server/src/routes/delphi/visualizations.ts b/server/src/routes/delphi/visualizations.ts index b4b541c435..44b3342002 100644 --- a/server/src/routes/delphi/visualizations.ts +++ b/server/src/routes/delphi/visualizations.ts @@ -13,6 +13,7 @@ import { GetObjectCommand, } from "@aws-sdk/client-s3"; import { getSignedUrl } from "@aws-sdk/s3-request-presigner"; +import Config from "../../config"; /** * Handler for Delphi API route that retrieves visualization information @@ -63,11 +64,11 @@ export async function handle_GET_delphi_visualizations( // Configure S3 client const s3Config: any = { - region: process.env.AWS_REGION || "us-east-1", - endpoint: process.env.AWS_S3_ENDPOINT || "http://minio:9000", + region: Config.AWS_REGION || "us-east-1", + endpoint: Config.AWS_S3_ENDPOINT || "http://minio:9000", credentials: { - accessKeyId: process.env.AWS_S3_ACCESS_KEY_ID || "minioadmin", - secretAccessKey: process.env.AWS_S3_SECRET_ACCESS_KEY || "minioadmin", + accessKeyId: Config.AWS_S3_ACCESS_KEY_ID || "minioadmin", + secretAccessKey: Config.AWS_S3_SECRET_ACCESS_KEY || "minioadmin", }, forcePathStyle: true, // Required for MinIO }; @@ -76,7 +77,7 @@ export async function handle_GET_delphi_visualizations( logger.info(`S3 Config: Endpoint: ${s3Config.endpoint} Region: ${s3Config.region} - Bucket: ${process.env.AWS_S3_BUCKET_NAME || "delphi"} + Bucket: ${Config.AWS_S3_BUCKET_NAME || "polis-delphi"} `); // Create S3 client @@ -93,7 +94,7 @@ export async function handle_GET_delphi_visualizations( }); } - const bucketName = process.env.AWS_S3_BUCKET_NAME || "delphi"; + const bucketName = Config.AWS_S3_BUCKET_NAME || "polis-delphi"; // Define S3 path prefix to search // Use conversation_id instead of report_id since files are stored by conversation_id @@ -137,7 +138,7 @@ export async function handle_GET_delphi_visualizations( ); if (s3Response.Contents && s3Response.Contents.length > 0) { // Log first few keys for debugging - const keys = s3Response.Contents.slice(0, 3).map((obj) => obj.Key); + const keys = s3Response.Contents.slice(0, 3).map((obj: { Key: string}) => obj.Key); logger.info(`Sample object keys: ${JSON.stringify(keys)}`); } } catch (s3Error: any) { @@ -304,7 +305,7 @@ async function fetchJobMetadata( try { // Configure DynamoDB client const dynamoDBConfig: any = { - region: process.env.AWS_REGION || "us-east-1", + region: Config.AWS_REGION || "us-east-1", credentials: { accessKeyId: "DUMMYIDEXAMPLE", secretAccessKey: "DUMMYEXAMPLEKEY", From 3d7bf405ec94d6d098da5b83f0c61e61c0256fd2 Mon Sep 17 00:00:00 2001 From: tevko Date: Fri, 16 May 2025 15:21:13 -0500 Subject: [PATCH 35/42] defaults fixes --- server/src/routes/delphi.ts | 4 ++-- server/src/routes/delphi/batchReports.ts | 8 ++++---- server/src/routes/delphi/jobs.ts | 2 +- server/src/routes/delphi/reports.ts | 6 +++--- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/server/src/routes/delphi.ts b/server/src/routes/delphi.ts index 5e55bf9202..480bbd760f 100644 --- a/server/src/routes/delphi.ts +++ b/server/src/routes/delphi.ts @@ -49,8 +49,8 @@ export function handle_GET_delphi(req: Request, res: Response) { // For local DynamoDB, use dummy credentials dynamoDBConfig.credentials = { - accessKeyId: "DUMMYIDEXAMPLE", - secretAccessKey: "DUMMYEXAMPLEKEY", + accessKeyId: Config.AWS_ACCESS_KEY_ID, + secretAccessKey: Config.AWS_SECRET_ACCESS_KEY, }; // Log connection config for debugging diff --git a/server/src/routes/delphi/batchReports.ts b/server/src/routes/delphi/batchReports.ts index e3ab3b2c93..0125d367a0 100644 --- a/server/src/routes/delphi/batchReports.ts +++ b/server/src/routes/delphi/batchReports.ts @@ -8,11 +8,11 @@ import Config from "../../config"; // Initialize DynamoDB client const dynamoDbClient = new DynamoDB({ - endpoint: Config.DYNAMODB_ENDPOINT || 'http://dynamodb:8000', - region: Config.AWS_REGION || 'us-west-2', + endpoint: Config.DYNAMODB_ENDPOINT as string, + region: Config.AWS_REGION as string, credentials: { - accessKeyId: Config.AWS_ACCESS_KEY_ID || 'DUMMYIDEXAMPLE', - secretAccessKey: Config.AWS_SECRET_ACCESS_KEY || 'DUMMYEXAMPLEKEY', + accessKeyId: Config.AWS_ACCESS_KEY_ID as string, + secretAccessKey: Config.AWS_SECRET_ACCESS_KEY as string, }, }); diff --git a/server/src/routes/delphi/jobs.ts b/server/src/routes/delphi/jobs.ts index 92ba1208c8..73cc2b9e62 100644 --- a/server/src/routes/delphi/jobs.ts +++ b/server/src/routes/delphi/jobs.ts @@ -196,7 +196,7 @@ export async function handle_POST_delphi_jobs( ? (error as any).code : undefined, details: - Config.NODE_ENV === "development" ? String(error) : undefined, + Config.nodeEnv === "development" ? String(error) : undefined, }); } } diff --git a/server/src/routes/delphi/reports.ts b/server/src/routes/delphi/reports.ts index 70268f9143..cb67161d29 100644 --- a/server/src/routes/delphi/reports.ts +++ b/server/src/routes/delphi/reports.ts @@ -39,7 +39,7 @@ export async function handle_GET_delphi_reports(req: Request, res: Response) { const dynamoDBConfig: any = { region: Config.AWS_REGION || "us-east-1", // Force to use the local DynamoDB endpoint - endpoint: "http://dynamodb:8000" + endpoint: Config.DYNAMODB_ENDPOINT, }; // Log what we're using @@ -49,8 +49,8 @@ export async function handle_GET_delphi_reports(req: Request, res: Response) { // For local DynamoDB, use dummy credentials dynamoDBConfig.credentials = { - accessKeyId: 'DUMMYIDEXAMPLE', - secretAccessKey: 'DUMMYEXAMPLEKEY' + accessKeyId: Config.AWS_ACCESS_KEY_ID, + secretAccessKey: Config.AWS_S3_SECRET_ACCESS_KEY }; // Create DynamoDB clients From 271b53b4fd86d44b889d8a1a5b299994810ff6fd Mon Sep 17 00:00:00 2001 From: tevko Date: Mon, 19 May 2025 14:59:10 -0500 Subject: [PATCH 36/42] tsc fix --- server/src/routes/delphi/visualizations.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/routes/delphi/visualizations.ts b/server/src/routes/delphi/visualizations.ts index 44b3342002..949998fa7f 100644 --- a/server/src/routes/delphi/visualizations.ts +++ b/server/src/routes/delphi/visualizations.ts @@ -138,7 +138,7 @@ export async function handle_GET_delphi_visualizations( ); if (s3Response.Contents && s3Response.Contents.length > 0) { // Log first few keys for debugging - const keys = s3Response.Contents.slice(0, 3).map((obj: { Key: string}) => obj.Key); + const keys = s3Response.Contents.slice(0, 3).map(obj => obj.Key); logger.info(`Sample object keys: ${JSON.stringify(keys)}`); } } catch (s3Error: any) { From d127c9c4cb34cebe08488416e857c76203425bbb Mon Sep 17 00:00:00 2001 From: tevko Date: Mon, 19 May 2025 16:42:34 -0500 Subject: [PATCH 37/42] attempt free up disk space --- .github/workflows/cypress-tests.yml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/.github/workflows/cypress-tests.yml b/.github/workflows/cypress-tests.yml index b9936a8ba3..2b677d4ce5 100644 --- a/.github/workflows/cypress-tests.yml +++ b/.github/workflows/cypress-tests.yml @@ -22,6 +22,23 @@ jobs: - name: Checkout uses: actions/checkout@v4 + - name: Clean up runner space + run: | + echo "Initial disk space:" + df -h + sudo rm -rf /usr/share/dotnet + sudo rm -rf /opt/ghc + sudo rm -rf "/usr/local/share/boost" + # sudo rm -rf "$AGENT_TOOLSDIRECTORY" + + sudo apt-get clean + + # Prune Docker system aggressively + docker system prune -af --volumes + + echo "Disk space after cleanup:" + df -h + - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 From f2ae529f7e43b39400da4596ad556bb4e8178fb6 Mon Sep 17 00:00:00 2001 From: tevko Date: Mon, 19 May 2025 19:20:09 -0500 Subject: [PATCH 38/42] multi stage builds --- delphi/Dockerfile | 64 ++++++++++++++++++++++------------------------- 1 file changed, 30 insertions(+), 34 deletions(-) diff --git a/delphi/Dockerfile b/delphi/Dockerfile index d573fb12d0..9620b584ad 100644 --- a/delphi/Dockerfile +++ b/delphi/Dockerfile @@ -1,56 +1,57 @@ -# Use an official Python runtime as a parent image -# Using 3.12 as recommended in QUICK_START.md -FROM python:3.12-slim - -# Set environment variables -# Prevent Python from writing pyc files to disc +FROM python:3.12-slim AS builder ENV PYTHONDONTWRITEBYTECODE=1 -# Ensure Python output is sent straight to terminal ENV PYTHONUNBUFFERED=1 -# Install dependencies needed for building packages and cloning evoc RUN apt-get update && \ - apt-get install -y git build-essential cmake \ - gcc g++ gfortran libopenblas-dev curl && \ - apt-get clean && \ + apt-get install -y --no-install-recommends \ + git \ + build-essential \ + cmake \ + gcc \ + g++ \ + gfortran \ + libopenblas-dev \ + curl \ + && apt-get clean && \ rm -rf /var/lib/apt/lists/* - -# Set the working directory in the container WORKDIR /app -# Copy requirements file COPY requirements.txt . -# Install dependencies +# Install Python dependencies RUN pip install --no-cache-dir -r requirements.txt -# Install additional dependencies needed for the orchestrator and make sure they're in the path RUN pip install --no-cache-dir colorlog fastapi==0.115.0 pydantic -# Verify FastAPI installation -RUN pip list | grep fastapi -# Clone and install evoc RUN git clone https://github.com/TutteInstitute/evoc && \ cd evoc && \ - pip install . + pip install --no-cache-dir . + +FROM python:3.12-slim AS final + +ENV PYTHONDONTWRITEBYTECODE=1 +ENV PYTHONUNBUFFERED=1 -# Copy all the necessary files for the application +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + curl \ + && apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +WORKDIR /app + +COPY --from=builder /usr/local/lib/python3.12/site-packages /usr/local/lib/python3.12/site-packages +COPY --from=builder /usr/local/bin /usr/local/bin COPY polismath/ ./polismath/ -COPY scripts/ ./scripts/ +COPY scripts/ ./scripts/ # This copies your local 'scripts' dir to /app/scripts/ COPY umap_narrative/ ./umap_narrative/ -# Create data directory RUN mkdir -p data -# Make port 8080 available to the world outside this container -# Default port seems to be 8080 based on server.py -# Use ARG/ENV to make this configurable if needed EXPOSE 8080 -# Set PYTHONPATH to include current directory ENV PYTHONPATH "${PYTHONPATH}:/app" -# Copy the required scripts and make them executable COPY start_poller.sh . COPY run_delphi.sh . COPY create_dynamodb_tables.py . @@ -58,11 +59,6 @@ COPY setup_minio.py . COPY scripts/setup_ollama.sh ./setup_ollama.sh RUN chmod +x start_poller.sh run_delphi.sh setup_ollama.sh -# Command to: -# 1. Initialize DynamoDB tables -# 2. Set up the Ollama model based on environment variables -# 3. Start the job poller with a 2-second polling interval - CMD ["bash", "-c", "\ echo 'Ensuring DynamoDB tables are set up (runs in all environments)...'; \ python create_dynamodb_tables.py && \ @@ -78,4 +74,4 @@ CMD ["bash", "-c", "\ \ echo 'Starting job poller...' && \ python delphi/scripts/job_poller.py --interval=2\ -"] +"] \ No newline at end of file From 8417d1d566989f1b2aaefd901fd7a9b7bb616c16 Mon Sep 17 00:00:00 2001 From: tevko Date: Mon, 19 May 2025 20:00:17 -0500 Subject: [PATCH 39/42] more space optimization --- .github/workflows/cypress-tests.yml | 8 ++++++++ delphi/Dockerfile | 1 + 2 files changed, 9 insertions(+) diff --git a/.github/workflows/cypress-tests.yml b/.github/workflows/cypress-tests.yml index 2b677d4ce5..5b161c5bef 100644 --- a/.github/workflows/cypress-tests.yml +++ b/.github/workflows/cypress-tests.yml @@ -19,6 +19,14 @@ jobs: cypress-run: runs-on: ubuntu-latest steps: + - name: Maximize Build Space + uses: easimon/maximize-build-space@v10 + with: + root-reserve-mb: 8192 + remove-dotnet: 'true' + remove-android: 'true' + remove-haskell: 'true' + remove-docker-images: 'true' - name: Checkout uses: actions/checkout@v4 diff --git a/delphi/Dockerfile b/delphi/Dockerfile index f1b429265f..ac5bea81c0 100644 --- a/delphi/Dockerfile +++ b/delphi/Dockerfile @@ -20,6 +20,7 @@ WORKDIR /app COPY requirements.txt . # Install Python dependencies +RUN pip install --no-cache-dir --index-url https://download.pytorch.org/whl/cpu torch torchvision torchaudio RUN pip install --no-cache-dir -r requirements.txt RUN pip install --no-cache-dir colorlog fastapi==0.115.0 pydantic From 70f30ac9e21958a911fc5bd091d5ad85d288283a Mon Sep 17 00:00:00 2001 From: tevko Date: Mon, 19 May 2025 20:18:06 -0500 Subject: [PATCH 40/42] more optimization --- .github/workflows/cypress-tests.yml | 30 ++--- delphi/Dockerfile | 176 ++++++++++++++++------------ delphi/requirements.txt | 2 +- 3 files changed, 108 insertions(+), 100 deletions(-) diff --git a/.github/workflows/cypress-tests.yml b/.github/workflows/cypress-tests.yml index 5b161c5bef..ee6db76b61 100644 --- a/.github/workflows/cypress-tests.yml +++ b/.github/workflows/cypress-tests.yml @@ -19,32 +19,20 @@ jobs: cypress-run: runs-on: ubuntu-latest steps: - - name: Maximize Build Space - uses: easimon/maximize-build-space@v10 - with: - root-reserve-mb: 8192 - remove-dotnet: 'true' - remove-android: 'true' - remove-haskell: 'true' - remove-docker-images: 'true' - name: Checkout uses: actions/checkout@v4 - - name: Clean up runner space + - name: Clean up runner space (Targeted) run: | - echo "Initial disk space:" + echo "Initial disk space (before cleanup):" df -h - sudo rm -rf /usr/share/dotnet - sudo rm -rf /opt/ghc - sudo rm -rf "/usr/local/share/boost" - # sudo rm -rf "$AGENT_TOOLSDIRECTORY" - - sudo apt-get clean - - # Prune Docker system aggressively - docker system prune -af --volumes - - echo "Disk space after cleanup:" + echo "Removing large pre-installed software..." + sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/share/boost "$AGENT_TOOLSDIRECTORY" /opt/hostedtoolcache /usr/local/lib/android/* || echo "Some paths not found or removal failed, continuing." + echo "Cleaning apt cache..." + sudo apt-get clean -y || echo "apt-get clean failed" + echo "Pruning Docker system..." + docker system prune -af --volumes || echo "docker system prune failed" + echo "Disk space after targeted cleanup:" df -h - name: Set up Docker Buildx diff --git a/delphi/Dockerfile b/delphi/Dockerfile index ac5bea81c0..e4130168e3 100644 --- a/delphi/Dockerfile +++ b/delphi/Dockerfile @@ -1,79 +1,99 @@ -FROM python:3.12-slim AS builder -ENV PYTHONDONTWRITEBYTECODE=1 -ENV PYTHONUNBUFFERED=1 +# ---- Stage 1: Builder ---- + FROM python:3.12-slim AS builder -RUN apt-get update && \ - apt-get install -y --no-install-recommends \ - git \ - build-essential \ - cmake \ - gcc \ - g++ \ - gfortran \ - libopenblas-dev \ - curl \ - && apt-get clean && \ - rm -rf /var/lib/apt/lists/* - -WORKDIR /app - -COPY requirements.txt . - -# Install Python dependencies -RUN pip install --no-cache-dir --index-url https://download.pytorch.org/whl/cpu torch torchvision torchaudio -RUN pip install --no-cache-dir -r requirements.txt -RUN pip install --no-cache-dir colorlog fastapi==0.115.0 pydantic - -RUN git clone https://github.com/TutteInstitute/evoc && \ - cd evoc && \ - pip install --no-cache-dir . - -FROM python:3.12-slim AS final - -ENV PYTHONDONTWRITEBYTECODE=1 -ENV PYTHONUNBUFFERED=1 - -RUN apt-get update && \ - apt-get install -y --no-install-recommends \ - curl \ - && apt-get clean && \ - rm -rf /var/lib/apt/lists/* - -WORKDIR /app - -COPY --from=builder /usr/local/lib/python3.12/site-packages /usr/local/lib/python3.12/site-packages -COPY --from=builder /usr/local/bin /usr/local/bin -COPY polismath/ ./polismath/ -COPY scripts/ ./scripts/ -COPY umap_narrative/ ./umap_narrative/ - -RUN mkdir -p data - -EXPOSE 8080 - -ENV PYTHONPATH "${PYTHONPATH}:/app" - -COPY start_poller.sh . -COPY run_delphi.sh . -COPY run_delphi.py . -COPY create_dynamodb_tables.py . -COPY setup_minio.py . -COPY scripts/setup_ollama.sh ./setup_ollama.sh -RUN chmod +x start_poller.sh run_delphi.sh run_delphi.py setup_ollama.sh - -CMD ["bash", "-c", "\ - echo 'Ensuring DynamoDB tables are set up (runs in all environments)...'; \ - python create_dynamodb_tables.py && \ - echo 'DynamoDB table setup script finished.'; \ - \ - if [ -n \"${DYNAMODB_ENDPOINT}\" ]; then \ - echo 'DYNAMODB_ENDPOINT is set (value: \"${DYNAMODB_ENDPOINT}\"), assuming local/dev environment. Running additional local setup scripts...'; \ - echo 'Setting up MinIO bucket...' && python setup_minio.py && \ - echo 'Setting up Ollama model (local script)...' && ./setup_ollama.sh; \ - else \ - echo 'DYNAMODB_ENDPOINT is not set, assuming production-like environment. Skipping MinIO and local Ollama setup scripts.'; \ - fi && \ - \ - echo 'Starting job poller...' && \ - python scripts/job_poller.py --interval=2\ -"] + ENV PYTHONDONTWRITEBYTECODE=1 + ENV PYTHONUNBUFFERED=1 + + RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + git \ + build-essential \ + cmake \ + gcc \ + g++ \ + gfortran \ + libopenblas-dev \ + curl \ + && apt-get clean && \ + rm -rf /var/lib/apt/lists/* + + WORKDIR /app + + COPY requirements.txt . + + RUN pip install --no-cache-dir --index-url https://download.pytorch.org/whl/cpu \ + torch==2.3.1+cpu \ + torchvision==0.18.1+cpu \ + torchaudio==2.3.1+cpu + + RUN pip install --no-cache-dir -r requirements.txt + + RUN pip install --no-cache-dir colorlog fastapi==0.115.0 pydantic + + RUN echo "--- PyTorch Check (after requirements.txt) ---" && \ + pip show torch torchvision torchaudio && \ + python -c "import torch; print(f'Torch version: {torch.__version__}'); print(f'CUDA available: {torch.cuda.is_available()}')" && \ + echo "--- Looking for NVIDIA/CUDA libs ---" && \ + (ls -lhR /usr/local/lib/python3.12/site-packages/nvidia || echo "NVIDIA directory not found.") && \ + (ls -lhR /usr/local/lib/python3.12/site-packages/torch/lib/*cuda* || echo "No CUDA libs in torch/lib.") + + + RUN git clone https://github.com/TutteInstitute/evoc && \ + cd evoc && \ + pip install --no-cache-dir . + + RUN echo "--- PyTorch Check (after evoc install) ---" && \ + pip show torch torchvision torchaudio && \ + python -c "import torch; print(f'Torch version: {torch.__version__}'); print(f'CUDA available: {torch.cuda.is_available()}')" && \ + echo "--- Looking for NVIDIA/CUDA libs (after evoc) ---" && \ + (ls -lhR /usr/local/lib/python3.12/site-packages/nvidia || echo "NVIDIA directory not found.") && \ + (ls -lhR /usr/local/lib/python3.12/site-packages/torch/lib/*cuda* || echo "No CUDA libs in torch/lib.") + + FROM python:3.12-slim AS final + + ENV PYTHONDONTWRITEBYTECODE=1 + ENV PYTHONUNBUFFERED=1 + + RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + curl \ + && apt-get clean && \ + rm -rf /var/lib/apt/lists/* + + WORKDIR /app + + COPY --from=builder /usr/local/lib/python3.12/site-packages /usr/local/lib/python3.12/site-packages + COPY --from=builder /usr/local/bin /usr/local/bin # For any CLI tools installed by pip + + COPY polismath/ ./polismath/ + COPY scripts/ ./scripts/ + COPY umap_narrative/ ./umap_narrative/ + + RUN mkdir -p data + EXPOSE 8080 + ENV PYTHONPATH "${PYTHONPATH}:/app" + + COPY start_poller.sh . + COPY run_delphi.sh . + COPY run_delphi.py . + COPY create_dynamodb_tables.py . + COPY setup_minio.py . + COPY scripts/setup_ollama.sh ./setup_ollama.sh + RUN chmod +x start_poller.sh run_delphi.sh run_delphi.py setup_ollama.sh + + CMD ["bash", "-c", "\ + echo 'Ensuring DynamoDB tables are set up (runs in all environments)...'; \ + python create_dynamodb_tables.py && \ + echo 'DynamoDB table setup script finished.'; \ + \ + if [ -n \"${DYNAMODB_ENDPOINT}\" ]; then \ + echo 'DYNAMODB_ENDPOINT is set (value: \"${DYNAMODB_ENDPOINT}\"), assuming local/dev environment. Running additional local setup scripts...'; \ + echo 'Setting up MinIO bucket...' && python setup_minio.py && \ + echo 'Setting up Ollama model (local script)...' && ./setup_ollama.sh; \ + else \ + echo 'DYNAMODB_ENDPOINT is not set, assuming production-like environment. Skipping MinIO and local Ollama setup scripts.'; \ + fi && \ + \ + echo 'Starting job poller...' && \ + python scripts/job_poller.py --interval=2\ + "] \ No newline at end of file diff --git a/delphi/requirements.txt b/delphi/requirements.txt index d277ad770d..0c299b3ee7 100644 --- a/delphi/requirements.txt +++ b/delphi/requirements.txt @@ -18,7 +18,7 @@ colorlog>=6.9.0 umap-learn>=0.5.2 sentence-transformers>=2.2.0 hdbscan>=0.8.40 -torch>=1.11.0 +# torch>=1.11.0 numba>=0.56.4 llvmlite>=0.39.0 From b9afc0492cfe464f989e6b6e709d5b0e7b3b8c1e Mon Sep 17 00:00:00 2001 From: tevko Date: Mon, 19 May 2025 20:58:02 -0500 Subject: [PATCH 41/42] debug --- delphi/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/delphi/Dockerfile b/delphi/Dockerfile index e4130168e3..133e81fdca 100644 --- a/delphi/Dockerfile +++ b/delphi/Dockerfile @@ -63,7 +63,7 @@ WORKDIR /app COPY --from=builder /usr/local/lib/python3.12/site-packages /usr/local/lib/python3.12/site-packages - COPY --from=builder /usr/local/bin /usr/local/bin # For any CLI tools installed by pip + # COPY --from=builder /usr/local/bin /usr/local/bin # For any CLI tools installed by pip COPY polismath/ ./polismath/ COPY scripts/ ./scripts/ From c187a8b2a4d14d7462f6b6bb769810ab16b19dfe Mon Sep 17 00:00:00 2001 From: tevko Date: Mon, 19 May 2025 21:22:49 -0500 Subject: [PATCH 42/42] remove commented out docker code, readme and makefile updates --- Makefile | 2 +- README.md | 2 +- delphi/Dockerfile | 1 - 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index a4f32c4e39..7d434d6518 100644 --- a/Makefile +++ b/Makefile @@ -28,7 +28,7 @@ DETACH ?= false DETACH_ARG = $(if $(filter true,$(DETACH)),-d,) # Default compose file args -export COMPOSE_FILE_ARGS = -f docker-compose.yml -f docker-compose.dev.yml +export COMPOSE_FILE_ARGS = -f docker-compose.yml -f docker-compose.dev.yml --profile local-services COMPOSE_FILE_ARGS += $(if $(POSTGRES_DOCKER),--profile postgres,) # Set up environment-specific values diff --git a/README.md b/README.md index 781221325f..77ff539c64 100644 --- a/README.md +++ b/README.md @@ -43,7 +43,7 @@ If you're trying to set up a Polis deployment or development environment, then p Polis comes with Docker infrastructure for running a complete system, whether for a [production deployment](#-production-deployment) or a [development environment](#-development-tooling) (details for each can be found in later sections of this document). As a consequence, the only prerequisite to running Polis is that you install a recent `docker` (and Docker Desktop if you are on Mac or Windows). -If you aren't able to use Docker for some reason, the various Dockerfiles found in subdirectories (`math`, `server`, `*-client`) of this repository _can_ be used as a reference for how you'd set up a system manually. +If you aren't able to use Docker for some reason, the various Dockerfiles found in subdirectories (`math`, `server`, `delphi`, `*-client`) of this repository _can_ be used as a reference for how you'd set up a system manually. If you're interested in doing the legwork to support alternative infrastructure, please [let us know in an issue](https://github.com/compdemocracy.org/issues). ### Quick Start diff --git a/delphi/Dockerfile b/delphi/Dockerfile index 133e81fdca..f6e68a99ad 100644 --- a/delphi/Dockerfile +++ b/delphi/Dockerfile @@ -63,7 +63,6 @@ WORKDIR /app COPY --from=builder /usr/local/lib/python3.12/site-packages /usr/local/lib/python3.12/site-packages - # COPY --from=builder /usr/local/bin /usr/local/bin # For any CLI tools installed by pip COPY polismath/ ./polismath/ COPY scripts/ ./scripts/