diff --git a/.github/workflows/cypress-tests.yml b/.github/workflows/cypress-tests.yml index 7846d23905..ee6db76b61 100644 --- a/.github/workflows/cypress-tests.yml +++ b/.github/workflows/cypress-tests.yml @@ -22,12 +22,25 @@ jobs: - name: Checkout uses: actions/checkout@v4 + - name: Clean up runner space (Targeted) + run: | + echo "Initial disk space (before cleanup):" + df -h + echo "Removing large pre-installed software..." + sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/share/boost "$AGENT_TOOLSDIRECTORY" /opt/hostedtoolcache /usr/local/lib/android/* || echo "Some paths not found or removal failed, continuing." + echo "Cleaning apt cache..." + sudo apt-get clean -y || echo "apt-get clean failed" + echo "Pruning Docker system..." + docker system prune -af --volumes || echo "docker system prune failed" + echo "Disk space after targeted cleanup:" + df -h + - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - name: Build and start Docker containers run: | - docker compose -f docker-compose.yml -f docker-compose.test.yml --env-file test.env --profile postgres up -d --build + docker compose -f docker-compose.yml -f docker-compose.test.yml --env-file test.env --profile postgres --profile local-services up -d --build - name: Health Check the Server http response uses: jtalk/url-health-check-action@v4 diff --git a/Makefile b/Makefile index a4f32c4e39..7d434d6518 100644 --- a/Makefile +++ b/Makefile @@ -28,7 +28,7 @@ DETACH ?= false DETACH_ARG = $(if $(filter true,$(DETACH)),-d,) # Default compose file args -export COMPOSE_FILE_ARGS = -f docker-compose.yml -f docker-compose.dev.yml +export COMPOSE_FILE_ARGS = -f docker-compose.yml -f docker-compose.dev.yml --profile local-services COMPOSE_FILE_ARGS += $(if $(POSTGRES_DOCKER),--profile postgres,) # Set up environment-specific values diff --git a/README.md b/README.md index 8e9e4b0ec7..77ff539c64 100644 --- a/README.md +++ b/README.md @@ -43,7 +43,7 @@ If you're trying to set up a Polis deployment or development environment, then p Polis comes with Docker infrastructure for running a complete system, whether for a [production deployment](#-production-deployment) or a [development environment](#-development-tooling) (details for each can be found in later sections of this document). As a consequence, the only prerequisite to running Polis is that you install a recent `docker` (and Docker Desktop if you are on Mac or Windows). -If you aren't able to use Docker for some reason, the various Dockerfiles found in subdirectories (`math`, `server`, `*-client`) of this repository _can_ be used as a reference for how you'd set up a system manually. +If you aren't able to use Docker for some reason, the various Dockerfiles found in subdirectories (`math`, `server`, `delphi`, `*-client`) of this repository _can_ be used as a reference for how you'd set up a system manually. If you're interested in doing the legwork to support alternative infrastructure, please [let us know in an issue](https://github.com/compdemocracy.org/issues). ### Quick Start @@ -79,7 +79,7 @@ cp example.env .env ```sh -docker compose --profile postgres up --build +docker compose --profile postgres --profile local-services up --build ``` If you get a permission error, try running this command with `sudo`. @@ -89,7 +89,7 @@ To avoid having to use `sudo` in the future (on a Linux or Windows machine with Once you've built the docker images, you can run without `--build`, which may be faster. Run ```sh -docker compose --profile postgres up +docker compose --profile postgres --profile local-services up ``` or simply @@ -105,14 +105,14 @@ If you have only changed configuration values in .env, you can recreate your con fully rebuilding them with `--force-recreate`. For example: ```sh -docker compose --profile postgres down -docker compose --profile postgres up --force-recreate +docker compose --profile postgres --profile local-services down +docker compose --profile postgres --profile local-services up --force-recreate ``` To see what the environment of your containers is going to look like, run: ```sh -docker compose --profile postgres convert +docker compose --profile postgres --profile local-services convert ``` #### Using a local or remote (non-docker) database @@ -139,6 +139,22 @@ make PROD start make PROD start-rebuild ``` +### Running without Local Cloud Service Emulators +If you want to run the stack without the local MinIO and DynamoDB services (e.g., to test connecting to real AWS services configured in your .env file), simply omit the --profile local-services flag. + +Example: Run with the containerized DB but connect to external/real cloud services: + +```sh +docker compose --profile postgres up +``` + +Example: Run with an external DB and external/real cloud services (closest to production): + +```sh +docker compose up +``` + + ### Testing out your instance You can now test your setup by visiting `http://localhost:80/home`. @@ -146,7 +162,7 @@ You can now test your setup by visiting `http://localhost:80/home`. Once the index page loads, you can create an account using the `/createuser` path. You'll be logged in right away; email validation is not required. -When you're done working, you can end the process using `Ctrl+C`, or typing `docker compose --profile postgres down` +When you're done working, you can end the process using `Ctrl+C`, or typing `docker compose --profile postgres --profile local-services down` if you are running in "detached mode". ### Updating the system @@ -227,7 +243,7 @@ git config --local include.path ../.gitconfig #### Running as a background process -If you would like to run docker compose as a background process, run the `up` commands with the `--detach` flag, and use `docker compose --profile postgres down` to stop. +If you would like to run docker compose as a background process, run the `up` commands with the `--detach` flag, and use `docker compose --profile postgres --profile local-services down` to stop. #### Using Docker Machine as your development environment diff --git a/cdk/autoscaling.ts b/cdk/autoscaling.ts new file mode 100644 index 0000000000..24404e5fac --- /dev/null +++ b/cdk/autoscaling.ts @@ -0,0 +1,153 @@ + +import { Construct } from "constructs"; +import * as ec2 from 'aws-cdk-lib/aws-ec2'; +import * as autoscaling from 'aws-cdk-lib/aws-autoscaling'; +import * as cdk from 'aws-cdk-lib'; +import * as cloudwatch from 'aws-cdk-lib/aws-cloudwatch'; +import * as cloudwatch_actions from 'aws-cdk-lib/aws-cloudwatch-actions'; + +export default ( + self: Construct, + vpc: cdk.aws_ec2.Vpc, + instanceRole: cdk.aws_iam.Role, + ollamaLaunchTemplate: cdk.aws_ec2.LaunchTemplate, + logGroup: cdk.aws_logs.LogGroup, + fileSystem: cdk.aws_efs.FileSystem, + webLaunchTemplate: cdk.aws_ec2.LaunchTemplate, + mathWorkerLaunchTemplate: cdk.aws_ec2.LaunchTemplate, + delphiSmallLaunchTemplate: cdk.aws_ec2.LaunchTemplate, + delphiLargeLaunchTemplate: cdk.aws_ec2.LaunchTemplate, + ollamaNamespace: string, + alarmTopic: cdk.aws_sns.Topic +) => { + const commonAsgProps = { vpc, role: instanceRole }; + + // Ollama ASG + const asgOllama = new autoscaling.AutoScalingGroup(self, 'AsgOllama', { + vpc, + launchTemplate: ollamaLaunchTemplate, + minCapacity: 1, + maxCapacity: 3, + desiredCapacity: 1, + vpcSubnets: { subnetGroupName: 'PrivateWithEgress' }, + healthCheck: autoscaling.HealthCheck.ec2({ grace: cdk.Duration.minutes(10) }), + }); + asgOllama.node.addDependency(logGroup); + asgOllama.node.addDependency(fileSystem); // Ensure EFS is ready before instances start + + // Web ASG + const asgWeb = new autoscaling.AutoScalingGroup(self, 'Asg', { + vpc, + launchTemplate: webLaunchTemplate, + minCapacity: 2, + maxCapacity: 10, + desiredCapacity: 2, + vpcSubnets: { subnetType: ec2.SubnetType.PUBLIC }, + healthCheck: autoscaling.HealthCheck.elb({grace: cdk.Duration.minutes(5)}) + }); + + // Math Worker ASG + const asgMathWorker = new autoscaling.AutoScalingGroup(self, 'AsgMathWorker', { + vpc, + launchTemplate: mathWorkerLaunchTemplate, + minCapacity: 1, + desiredCapacity: 1, + maxCapacity: 5, + vpcSubnets: { subnetType: ec2.SubnetType.PUBLIC }, + healthCheck: autoscaling.HealthCheck.ec2({ grace: cdk.Duration.minutes(2) }), + }); + + // Delphi Small ASG + const asgDelphiSmall = new autoscaling.AutoScalingGroup(self, 'AsgDelphiSmall', { + vpc, + launchTemplate: delphiSmallLaunchTemplate, + minCapacity: 1, + desiredCapacity: 1, + maxCapacity: 5, + vpcSubnets: { subnetType: ec2.SubnetType.PRIVATE_WITH_EGRESS }, + healthCheck: autoscaling.HealthCheck.ec2({ grace: cdk.Duration.minutes(5) }), + }); + + // Delphi Large ASG + const asgDelphiLarge = new autoscaling.AutoScalingGroup(self, 'AsgDelphiLarge', { + vpc, + launchTemplate: delphiLargeLaunchTemplate, + minCapacity: 1, + desiredCapacity: 1, + maxCapacity: 3, + vpcSubnets: { subnetType: ec2.SubnetType.PRIVATE_WITH_EGRESS }, + healthCheck: autoscaling.HealthCheck.ec2({ grace: cdk.Duration.minutes(5) }), + }); + + + // --- Scaling Policies & Alarms + const mathWorkerCpuMetric = new cloudwatch.Metric({ + namespace: 'AWS/EC2', + metricName: 'CPUUtilization', + dimensionsMap: { + AutoScalingGroupName: asgMathWorker.autoScalingGroupName + }, + statistic: 'Average', + period: cdk.Duration.minutes(10), + }); + asgMathWorker.scaleToTrackMetric('CpuTracking', { + metric: mathWorkerCpuMetric, + targetValue: 50, + }); + + // Add Delphi CPU Scaling Policies & Alarms + const createDelphiCpuScaling = (asg: autoscaling.AutoScalingGroup, name: string, target: number): cloudwatch.Metric => { + const cpuMetric = new cloudwatch.Metric({ + namespace: 'AWS/EC2', + metricName: 'CPUUtilization', + dimensionsMap: { AutoScalingGroupName: asg.autoScalingGroupName }, + statistic: 'Average', + period: cdk.Duration.minutes(5), + }); + asg.scaleToTrackMetric(`${name}CpuTracking`, { + metric: cpuMetric, + targetValue: target + }); + + // High CPU Alarm + const alarm = new cloudwatch.Alarm(self, `${name}HighCpuAlarm`, { + metric: cpuMetric, + threshold: 80, // Alert if CPU > 80% + evaluationPeriods: 2, // for 2 consecutive periods (10 minutes total) + datapointsToAlarm: 2, // Ensure 2 datapoints are breaching + comparisonOperator: cloudwatch.ComparisonOperator.GREATER_THAN_THRESHOLD, + alarmDescription: `Alert when ${name} instances CPU exceeds 80% for 10 minutes`, + treatMissingData: cloudwatch.TreatMissingData.IGNORE, // Or BREACHING/NOT_BREACHING as appropriate + }); + // Add SNS action to the alarm + alarm.addAlarmAction(new cloudwatch_actions.SnsAction(alarmTopic)); + return cpuMetric; + }; + const delphiSmallCpuMetric = createDelphiCpuScaling(asgDelphiSmall, 'DelphiSmall', 60); // Target 60% CPU + const delphiLargeCpuMetric = createDelphiCpuScaling(asgDelphiLarge, 'DelphiLarge', 60); // Target 60% CPU + + // Add Ollama GPU Scaling Policy + const ollamaGpuMetric = new cloudwatch.Metric({ + namespace: ollamaNamespace, // Custom namespace from CW Agent config + metricName: 'utilization_gpu', // GPU utilization metric name from CW Agent config + dimensionsMap: { AutoScalingGroupName: asgOllama.autoScalingGroupName }, + statistic: 'Average', + period: cdk.Duration.minutes(1), + }); + asgOllama.scaleToTrackMetric('OllamaGpuScaling', { + metric: ollamaGpuMetric, + targetValue: 75, + cooldown: cdk.Duration.minutes(5), // Prevent flapping + disableScaleIn: false, // Allow scaling down + estimatedInstanceWarmup: cdk.Duration.minutes(5), // Time until instance contributes metrics meaningfully + }); + + return { + asgOllama, + asgWeb, + asgMathWorker, + asgDelphiSmall, + asgDelphiLarge, + commonAsgProps + } +} \ No newline at end of file diff --git a/cdk/codedeploy.ts b/cdk/codedeploy.ts new file mode 100644 index 0000000000..e9880ed36d --- /dev/null +++ b/cdk/codedeploy.ts @@ -0,0 +1,44 @@ +import { Construct } from "constructs"; +import * as cdk from 'aws-cdk-lib'; +import * as codedeploy from 'aws-cdk-lib/aws-codedeploy'; +import * as s3 from 'aws-cdk-lib/aws-s3'; + +export default ( + self: Construct, + instanceRole: cdk.aws_iam.Role, + asgWeb: cdk.aws_autoscaling.AutoScalingGroup, + asgMathWorker: cdk.aws_autoscaling.AutoScalingGroup, + asgDelphiSmall: cdk.aws_autoscaling.AutoScalingGroup, + asgDelphiLarge: cdk.aws_autoscaling.AutoScalingGroup, + codeDeployRole: cdk.aws_iam.Role +) => { + const application = new codedeploy.ServerApplication(self, 'CodeDeployApplication', { + applicationName: 'PolisApplication', + }); + + const deploymentBucket = new s3.Bucket(self, 'DeploymentPackageBucket', { + bucketName: `polis-deployment-packages-${cdk.Stack.of(self).account}-${cdk.Stack.of(self).region}`, + removalPolicy: cdk.RemovalPolicy.DESTROY, + autoDeleteObjects: true, + versioned: true, + publicReadAccess: false, + blockPublicAccess: s3.BlockPublicAccess.BLOCK_ALL, + }); + deploymentBucket.grantRead(instanceRole); + + // Deployment Group + const deploymentGroup = new codedeploy.ServerDeploymentGroup(self, 'DeploymentGroup', { + application, + deploymentGroupName: 'PolisDeploymentGroup', + autoScalingGroups: [asgWeb, asgMathWorker, asgDelphiSmall, asgDelphiLarge], + deploymentConfig: codedeploy.ServerDeploymentConfig.ONE_AT_A_TIME, + role: codeDeployRole, + installAgent: true, + }); + + return { + application, + deploymentBucket, + deploymentGroup + } +} \ No newline at end of file diff --git a/cdk/config/amazon-cloudwatch-agent.json b/cdk/config/amazon-cloudwatch-agent.json new file mode 100644 index 0000000000..b94922a2ab --- /dev/null +++ b/cdk/config/amazon-cloudwatch-agent.json @@ -0,0 +1,37 @@ +{ + "agent": { "metrics_collection_interval": 60, "run_as_user": "root" }, + "metrics": { + "append_dimensions": { + "AutoScalingGroupName": "${aws:AutoScalingGroupName}", + "ImageId": "${aws:ImageId}", + "InstanceId": "${aws:InstanceId}", + "InstanceType": "${aws:InstanceType}" + }, + "metrics_collected": { + "nvidia_gpu": { + "measurement": [ + {"name": "utilization_gpu", "unit": "Percent"}, + {"name": "utilization_memory", "unit": "Percent"}, + {"name": "memory_total", "unit": "Megabytes"}, + {"name": "memory_used", "unit": "Megabytes"}, + {"name": "memory_free", "unit": "Megabytes"}, + {"name": "power_draw", "unit": "Watts"}, + {"name": "temperature_gpu", "unit": "Count"} + ], + "metrics_collection_interval": 60, + "nvidia_smi_path": "/usr/bin/nvidia-smi", + "metrics_aggregation_interval": 60, + "namespace": "OllamaMetrics" + }, + "disk": { + "measurement": [ "used_percent" ], + "metrics_collection_interval": 60, + "resources": [ "/" ] + }, + "mem": { + "measurement": [ "mem_used_percent" ], + "metrics_collection_interval": 60 + } + } + } +} \ No newline at end of file diff --git a/cdk/db.ts b/cdk/db.ts new file mode 100644 index 0000000000..28dd3e6cad --- /dev/null +++ b/cdk/db.ts @@ -0,0 +1,48 @@ +import { Construct } from "constructs"; +import * as ssm from 'aws-cdk-lib/aws-ssm'; +import * as rds from 'aws-cdk-lib/aws-rds'; +import * as cdk from 'aws-cdk-lib'; +import * as ec2 from 'aws-cdk-lib/aws-ec2'; + +export default (self: Construct, vpc: cdk.aws_ec2.IVpc) => { + const dbSubnetGroup = new rds.SubnetGroup(self, 'DatabaseSubnetGroup', { + vpc, + subnetGroupName: 'PolisDatabaseSubnetGroup', + description: 'Subnet group for the postgres database', + vpcSubnets: { subnetGroupName: 'Private' }, + removalPolicy: cdk.RemovalPolicy.RETAIN, + }); + + const db = new rds.DatabaseInstance(self, 'Database', { + engine: rds.DatabaseInstanceEngine.postgres({version: rds.PostgresEngineVersion.VER_17 }), + instanceType: ec2.InstanceType.of(ec2.InstanceClass.T3, ec2.InstanceSize.LARGE), + vpc, + allocatedStorage: 20, + storageType: rds.StorageType.GP2, + credentials: rds.Credentials.fromGeneratedSecret('dbUser'), + databaseName: 'polisdb', + removalPolicy: cdk.RemovalPolicy.SNAPSHOT, + deletionProtection: true, + publiclyAccessible: false, + subnetGroup: dbSubnetGroup, + }); + + // SSM Parameters for DB connection + const dbSecretArnParam = new ssm.StringParameter(self, 'DBSecretArnParameter', { + parameterName: '/polis/db-secret-arn', + stringValue: db.secret!.secretArn, + description: 'SSM Parameter storing the ARN of the Polis Database Secret', + }); + const dbHostParam = new ssm.StringParameter(self, 'DBHostParameter', { + parameterName: '/polis/db-host', + stringValue: db.dbInstanceEndpointAddress, + description: 'SSM Parameter storing the Polis Database Host', + }); + const dbPortParam = new ssm.StringParameter(self, 'DBPortParameter', { + parameterName: '/polis/db-port', + stringValue: db.dbInstanceEndpointPort, + description: 'SSM Parameter storing the Polis Database Port', + }); + + return { dbSubnetGroup, db, dbSecretArnParam, dbHostParam, dbPortParam } +} diff --git a/cdk/dns.ts b/cdk/dns.ts new file mode 100644 index 0000000000..3baddecc28 --- /dev/null +++ b/cdk/dns.ts @@ -0,0 +1,59 @@ +import { Construct } from "constructs"; +import * as cdk from 'aws-cdk-lib'; +import * as elbv2 from 'aws-cdk-lib/aws-elasticloadbalancingv2'; +import * as acm from 'aws-cdk-lib/aws-certificatemanager'; + +export default ( + self: Construct, + vpc: cdk.aws_ec2.Vpc, + lbSecurityGroup: cdk.aws_ec2.SecurityGroup, + asgWeb: cdk.aws_autoscaling.AutoScalingGroup +) => { + const lb = new elbv2.ApplicationLoadBalancer(self, 'Lb', { + vpc, + internetFacing: true, + securityGroup: lbSecurityGroup, // Use the dedicated ALB security group + idleTimeout: cdk.Duration.seconds(300), + }); + + const webTargetGroup = new elbv2.ApplicationTargetGroup(self, 'WebAppTargetGroup', { + vpc, + port: 80, + protocol: elbv2.ApplicationProtocol.HTTP, + targets: [asgWeb], + healthCheck: { + path: "/api/v3/testConnection", + interval: cdk.Duration.seconds(300) + } + }); + + const httpListener = lb.addListener('HttpListener', { + port: 80, + open: true, + defaultTargetGroups: [webTargetGroup], + }); + + const certificate = new acm.Certificate(self, 'WebAppCertificate', { + domainName: 'pol.is', + validation: acm.CertificateValidation.fromDns(), + }); + + const httpsListener = lb.addListener('HttpsListener', { + port: 443, + certificates: [certificate], + open: true, + defaultTargetGroups: [webTargetGroup], + }); + + const webScalingPolicy = asgWeb.scaleOnRequestCount('WebScalingPolicy', { + targetRequestsPerMinute: 600, + }); + + return { + lb, + webTargetGroup, + httpListener, + httpsListener, + webScalingPolicy + } +} \ No newline at end of file diff --git a/cdk/ec2.ts b/cdk/ec2.ts new file mode 100644 index 0000000000..2f553c7e8f --- /dev/null +++ b/cdk/ec2.ts @@ -0,0 +1,25 @@ +import * as ec2 from 'aws-cdk-lib/aws-ec2'; + +export const instanceTypeWeb = ec2.InstanceType.of(ec2.InstanceClass.T3, ec2.InstanceSize.MEDIUM); +export const machineImageWeb = new ec2.AmazonLinuxImage({ generation: ec2.AmazonLinuxGeneration.AMAZON_LINUX_2023 }); +export const instanceTypeMathWorker = ec2.InstanceType.of(ec2.InstanceClass.R8G, ec2.InstanceSize.XLARGE4); +export const machineImageMathWorker = new ec2.AmazonLinuxImage({ + generation: ec2.AmazonLinuxGeneration.AMAZON_LINUX_2023, + cpuType: ec2.AmazonLinuxCpuType.ARM_64, +}); +// Delphi small instance +export const instanceTypeDelphiSmall = ec2.InstanceType.of(ec2.InstanceClass.T3, ec2.InstanceSize.LARGE); +export const machineImageDelphiSmall = new ec2.AmazonLinuxImage({ + generation: ec2.AmazonLinuxGeneration.AMAZON_LINUX_2023 +}); +// Delphi large instance +export const instanceTypeDelphiLarge = ec2.InstanceType.of(ec2.InstanceClass.C6G, ec2.InstanceSize.XLARGE4); +export const machineImageDelphiLarge = new ec2.AmazonLinuxImage({ + generation: ec2.AmazonLinuxGeneration.AMAZON_LINUX_2023, + cpuType: ec2.AmazonLinuxCpuType.ARM_64 +}); +// Ollama Instance +export const instanceTypeOllama = ec2.InstanceType.of(ec2.InstanceClass.G4DN, ec2.InstanceSize.XLARGE); // x86_64 GPU instance +export const machineImageOllama = ec2.MachineImage.genericLinux({ + 'us-east-1': 'ami-08e0cf6df13ae3ddb', +}); \ No newline at end of file diff --git a/cdk/ecr.ts b/cdk/ecr.ts new file mode 100644 index 0000000000..2733c401e3 --- /dev/null +++ b/cdk/ecr.ts @@ -0,0 +1,39 @@ +import { Construct } from "constructs"; +import * as iam from 'aws-cdk-lib/aws-iam'; +import * as ecr from 'aws-cdk-lib/aws-ecr'; +import * as cdk from 'aws-cdk-lib'; +import * as ssm from 'aws-cdk-lib/aws-ssm'; + +export default (self: Construct, instanceRole: iam.IGrantable) => { + const createEcrRepo = (name: string): ecr.Repository => { + const repo = new ecr.Repository(self, `PolisRepository${name}`, { + repositoryName: `polis/${name.toLowerCase()}`, + removalPolicy: cdk.RemovalPolicy.RETAIN, + imageScanOnPush: true, + }); + + repo.addToResourcePolicy(new iam.PolicyStatement({ + sid: 'AllowPublicPull', + effect: iam.Effect.ALLOW, + principals: [new iam.AnyPrincipal()], + actions: [ + "ecr:BatchCheckLayerAvailability", + "ecr:GetDownloadUrlForLayer", + "ecr:BatchGetImage", + ], + })); + repo.grantPull(instanceRole); // Grant pull to the shared instance role + return repo; + }; + const ecrWebRepository = createEcrRepo('Server'); + const ecrMathRepository = createEcrRepo('Math'); + const ecrDelphiRepository = createEcrRepo('Delphi'); + + // --- SSM Parameter for Image Tag + const imageTagParameter = new ssm.StringParameter(self, 'ImageTagParameter', { + parameterName: '/polis/image-tag', + stringValue: 'initial-tag', //CI/CD will update this + }); + + return { ecrWebRepository, ecrMathRepository, ecrDelphiRepository, imageTagParameter } +} \ No newline at end of file diff --git a/cdk/iamRoles.ts b/cdk/iamRoles.ts new file mode 100644 index 0000000000..49802c6aff --- /dev/null +++ b/cdk/iamRoles.ts @@ -0,0 +1,56 @@ +import * as iam from 'aws-cdk-lib/aws-iam'; +import { Construct } from 'constructs'; +import * as cdk from 'aws-cdk-lib'; + +export default (self: Construct) => { + const instanceRole = new iam.Role(self, 'InstanceRole', { + assumedBy: new iam.ServicePrincipal('ec2.amazonaws.com'), + managedPolicies: [ + iam.ManagedPolicy.fromAwsManagedPolicyName('AmazonSSMManagedInstanceCore'), + iam.ManagedPolicy.fromAwsManagedPolicyName('service-role/AmazonEC2RoleforAWSCodeDeploy'), + iam.ManagedPolicy.fromAwsManagedPolicyName('SecretsManagerReadWrite'), + iam.ManagedPolicy.fromAwsManagedPolicyName('AmazonEC2ContainerRegistryReadOnly'), + iam.ManagedPolicy.fromAwsManagedPolicyName('CloudWatchLogsFullAccess'), + iam.ManagedPolicy.fromAwsManagedPolicyName('CloudWatchAgentServerPolicy'), + ], + }); + instanceRole.addToPolicy(new iam.PolicyStatement({ + actions: ['s3:PutObject', 's3:PutObjectAcl', 's3:AbortMultipartUpload'], + resources: ['arn:aws:s3:::*', 'arn:aws:s3:::*/*'], + })); + + // IAM Role for CodeDeploy + const codeDeployRole = new iam.Role(self, 'CodeDeployRole', { + assumedBy: new iam.ServicePrincipal('codedeploy.amazonaws.com'), + managedPolicies: [ + iam.ManagedPolicy.fromAwsManagedPolicyName('service-role/AWSCodeDeployRole'), + ], + }); + const delphiJobQueueTableArn = cdk.Arn.format({ + service: 'dynamodb', + region: 'us-east-1', + account: cdk.Stack.of(self).account, + resource: 'table', + resourceName: 'Delphi_*', + }, cdk.Stack.of(self)); + + const delphiJobQueueTableIndexesArn = `${delphiJobQueueTableArn}/index/*`; + + instanceRole.addToPolicy(new iam.PolicyStatement({ + effect: iam.Effect.ALLOW, + actions: [ + "dynamodb:PutItem", + "dynamodb:GetItem", + "dynamodb:UpdateItem", + "dynamodb:DeleteItem", + "dynamodb:Query", + "dynamodb:Scan" + ], + resources: [ + delphiJobQueueTableArn, + delphiJobQueueTableIndexesArn + ], + })); + + return { instanceRole, codeDeployRole } +} \ No newline at end of file diff --git a/cdk/launchTemplates.ts b/cdk/launchTemplates.ts new file mode 100644 index 0000000000..eca0ba1bed --- /dev/null +++ b/cdk/launchTemplates.ts @@ -0,0 +1,238 @@ +import * as ec2 from 'aws-cdk-lib/aws-ec2'; +import * as cdk from 'aws-cdk-lib'; +import { Construct } from 'constructs'; +import * as s3_assets from 'aws-cdk-lib/aws-s3-assets'; + +export default ( + self: Construct, + logGroup: cdk.aws_logs.LogGroup, + ollamaNamespace: string, + ollamaModelDirectory: string, + fileSystem: cdk.aws_efs.FileSystem, + machineImageWeb: ec2.IMachineImage, + instanceTypeWeb: ec2.InstanceType, + webSecurityGroup: ec2.ISecurityGroup, + webKeyPair: ec2.IKeyPair | undefined, + instanceRole: cdk.aws_iam.IRole, + machineImageMathWorker: ec2.IMachineImage, + instanceTypeMathWorker: ec2.InstanceType, + mathWorkerSecurityGroup: ec2.ISecurityGroup, + mathWorkerKeyPair: ec2.IKeyPair | undefined, + machineImageDelphiSmall: ec2.IMachineImage, + instanceTypeDelphiSmall: ec2.InstanceType, + delphiSmallKeyPair: ec2.IKeyPair | undefined, + machineImageDelphiLarge: ec2.IMachineImage, + instanceTypeDelphiLarge: ec2.InstanceType, + delphiSecurityGroup: ec2.ISecurityGroup, + delphiLargeKeyPair: ec2.IKeyPair | undefined, + machineImageOllama: ec2.IMachineImage, + instanceTypeOllama: ec2.InstanceType, + ollamaKeyPair: ec2.IKeyPair | undefined, + ollamaSecurityGroup: ec2.ISecurityGroup +) => { + // Generic User Data function (Works with NAT Gateway for internet) + const usrdata = (CLOUDWATCH_LOG_GROUP_NAME: string, service: string, instanceSize?: string) => { + let ld: ec2.UserData; + ld = ec2.UserData.forLinux(); + ld.addCommands( + '#!/bin/bash', + 'set -e', + 'set -x', + `echo "Writing service type '${service}' to /tmp/service_type.txt"`, + `echo "${service}" > /tmp/service_type.txt`, + `echo "Contents of /tmp/service_type.txt: $(cat /tmp/service_type.txt)"`, + // If instanceSize is provided, write it to a file + instanceSize ? `echo "Writing instance size '${instanceSize}' to /tmp/instance_size.txt"` : '', + instanceSize ? `echo "${instanceSize}" > /tmp/instance_size.txt` : '', + instanceSize ? `echo "Contents of /tmp/instance_size.txt: $(cat /tmp/instance_size.txt)"` : '', + 'sudo yum update -y', + 'sudo yum install -y amazon-cloudwatch-agent -y', + 'sudo dnf install -y wget ruby docker', + 'sudo systemctl start docker', + 'sudo systemctl enable docker', + 'sudo usermod -a -G docker ec2-user', + 'sudo curl -L https://github.com/docker/compose/releases/latest/download/docker-compose-$(uname -s)-$(uname -m) -o /usr/local/bin/docker-compose', + 'sudo chmod +x /usr/local/bin/docker-compose', + 'docker-compose --version', // Verify installation + 'sudo yum install -y jq', + `export SERVICE=${service}`, + instanceSize ? `export INSTANCE_SIZE=${instanceSize}` : '', + 'exec 1>>/var/log/user-data.log 2>&1', + 'echo "Finished User Data Execution at $(date)"', + 'sudo mkdir -p /etc/docker', // Ensure /etc/docker directory exists +`cat << EOF | sudo tee /etc/docker/daemon.json +{ + "log-driver": "awslogs", + "log-opts": { + "awslogs-group": "${CLOUDWATCH_LOG_GROUP_NAME}", + "awslogs-region": "${cdk.Stack.of(self).region}", + "awslogs-stream": "${service}" + } +} +EOF`, // Ensure EOF is on a new line with no leading/trailing spaces + `sudo chmod 644 /etc/docker/daemon.json`, // Good practice to set permissions + 'sudo systemctl restart docker', + 'sudo systemctl status docker' + ); + return ld; + }; + + const ollamaUsrData = ec2.UserData.forLinux(); +// Define path for CloudWatch Agent config +// --- CloudWatch Agent Config Asset --- +const cwAgentConfigAsset = new s3_assets.Asset(self, 'CwAgentConfigAsset', { + path: 'config/amazon-cloudwatch-agent.json' // Adjust path relative to cdk project root +}); + +// Grant the instance role read access to the asset bucket +cwAgentConfigAsset.grantRead(instanceRole); +const cwAgentConfigPath = '/opt/aws/amazon-cloudwatch-agent/etc/amazon-cloudwatch-agent.json'; +const cwAgentTempPath = '/tmp/amazon-cloudwatch-agent.json'; // Temporary download location +const efsDnsName = `${fileSystem.fileSystemId}.efs.${cdk.Stack.of(self).region}.${cdk.Stack.of(self).urlSuffix}`; + +// Add commands to the Ollama UserData +ollamaUsrData.addCommands( + // Spread the base user data commands + ...usrdata(logGroup.logGroupName, "ollama").render().split('\n').filter(line => line.trim() !== ''), + + // Install EFS utilities + 'echo "Installing EFS utilities for Ollama..."', + 'sudo dnf install -y amazon-efs-utils nfs-utils', + + // Start Ollama-specific setup + 'echo "Starting Ollama specific setup..."', + 'echo "Configuring CloudWatch Agent for GPU metrics..."', + + // --- Download CW Agent config from S3 Asset --- + `echo "Downloading CW Agent config from S3..."`, + // Use aws cli to copy from the S3 location provided by the asset object + // The instance needs NAT access (which it has) and S3 permissions (granted above) + `aws s3 cp ${cwAgentConfigAsset.s3ObjectUrl} ${cwAgentTempPath}`, + // Ensure target directory exists and move the file into place + `sudo mkdir -p $(dirname ${cwAgentConfigPath})`, + `sudo mv ${cwAgentTempPath} ${cwAgentConfigPath}`, + `sudo chmod 644 ${cwAgentConfigPath}`, + `sudo chown root:root ${cwAgentConfigPath}`, // Ensure root ownership + 'echo "CW Agent config downloaded and placed."', + + // --- Enable and Start the CloudWatch Agent Service --- + 'echo "Enabling CloudWatch Agent service..."', + 'sudo systemctl enable amazon-cloudwatch-agent', + 'echo "Starting CloudWatch Agent service..."', + 'sudo systemctl start amazon-cloudwatch-agent', + 'echo "CloudWatch Agent service started."', + + // --- Mount EFS using standard NFSv4.1 --- + // Use the manually constructed EFS DNS name + `echo "Mounting EFS filesystem using NFSv4.1 and DNS Name: ${efsDnsName}"...`, // Use variable here + `sudo mkdir -p ${ollamaModelDirectory}`, // Ensure mount point exists + // Standard NFS mount command with recommended options for EFS + `sudo mount -t nfs4 -o nfsvers=4.1,rsize=1048576,wsize=1048576,hard,timeo=600,retrans=2,noresvport ${efsDnsName}:/ ${ollamaModelDirectory}`, // Use variable here + // Update fstab to use NFS4 and the DNS name for persistence + `echo "${efsDnsName}:/ ${ollamaModelDirectory} nfs4 nfsvers=4.1,rsize=1048576,wsize=1048576,hard,timeo=600,retrans=2,noresvport,_netdev 0 0" | sudo tee -a /etc/fstab`, // Use variable here + // Set ownership for the application user + `sudo chown ec2-user:ec2-user ${ollamaModelDirectory}`, + 'echo "EFS mounted successfully."', + + // --- Start Ollama container --- + 'echo "Starting Ollama container..."', + 'sudo docker run -d --name ollama \\', + ' --gpus all \\', + ' -p 0.0.0.0:11434:11434 \\', + ` -v ${ollamaModelDirectory}:/root/.ollama \\`, + ' --restart unless-stopped \\', + ' ollama/ollama serve', + + // --- Pull initial model in background --- + '(', + ' echo "Waiting for Ollama service (background task)..."', + ' sleep 60', + ' echo "Pulling default Ollama model (llama3.1:8b) in background..."', + ' sudo docker exec ollama ollama pull llama3.1:8b || echo "Failed to pull default model initially, may need manual pull later."', + ' echo "Background model pull task finished."', + ') &', + 'disown', + 'echo "Ollama setup script finished."' +); // End of ollamaUsrData.addCommands + + + // --- Launch Templates + const webLaunchTemplate = new ec2.LaunchTemplate(self, 'WebLaunchTemplate', { + machineImage: machineImageWeb, + userData: usrdata(logGroup.logGroupName, "server"), + instanceType: instanceTypeWeb, + securityGroup: webSecurityGroup, + keyPair: webKeyPair, + role: instanceRole, + }); + const mathWorkerLaunchTemplate = new ec2.LaunchTemplate(self, 'MathWorkerLaunchTemplate', { + machineImage: machineImageMathWorker, + userData: usrdata(logGroup.logGroupName, "math"), + instanceType: instanceTypeMathWorker, + securityGroup: mathWorkerSecurityGroup, + keyPair: mathWorkerKeyPair, + role: instanceRole, + }); + // Delphi Small Launch Template + const delphiSmallLaunchTemplate = new ec2.LaunchTemplate(self, 'DelphiSmallLaunchTemplate', { + machineImage: machineImageDelphiSmall, + userData: usrdata(logGroup.logGroupName, "delphi", "small"), + instanceType: instanceTypeDelphiSmall, + securityGroup: delphiSecurityGroup, + keyPair: delphiSmallKeyPair, + role: instanceRole, + blockDevices: [ + { + deviceName: '/dev/xvda', + volume: ec2.BlockDeviceVolume.ebs(50, { + volumeType: ec2.EbsDeviceVolumeType.GP3, + deleteOnTermination: true, + }), + }, + ], + }); + // Delphi Large Launch Template + const delphiLargeLaunchTemplate = new ec2.LaunchTemplate(self, 'DelphiLargeLaunchTemplate', { + machineImage: machineImageDelphiLarge, + userData: usrdata(logGroup.logGroupName, "delphi", "large"), + instanceType: instanceTypeDelphiLarge, + securityGroup: delphiSecurityGroup, + keyPair: delphiLargeKeyPair, + role: instanceRole, + blockDevices: [ + { + deviceName: '/dev/xvda', + volume: ec2.BlockDeviceVolume.ebs(100, { + volumeType: ec2.EbsDeviceVolumeType.GP3, + deleteOnTermination: true, + }), + }, + ], + }); + // Ollama Launch Template + const ollamaLaunchTemplate = new ec2.LaunchTemplate(self, 'OllamaLaunchTemplate', { + machineImage: machineImageOllama, + userData: ollamaUsrData, + instanceType: instanceTypeOllama, + securityGroup: ollamaSecurityGroup, + keyPair: ollamaKeyPair, + role: instanceRole, + blockDevices: [ + { + deviceName: '/dev/xvda', // Adjust if needed for DLAMI + volume: ec2.BlockDeviceVolume.ebs(100, { + volumeType: ec2.EbsDeviceVolumeType.GP3, + deleteOnTermination: true, + }), + }, + ], + }); + + return { + webLaunchTemplate, + mathWorkerLaunchTemplate, + delphiSmallLaunchTemplate, + delphiLargeLaunchTemplate, + ollamaLaunchTemplate + } +} \ No newline at end of file diff --git a/cdk/lib/cdk-stack.ts b/cdk/lib/cdk-stack.ts index c444da9cb6..3aa2f3030e 100644 --- a/cdk/lib/cdk-stack.ts +++ b/cdk/lib/cdk-stack.ts @@ -1,21 +1,38 @@ import * as cdk from 'aws-cdk-lib'; import * as ec2 from 'aws-cdk-lib/aws-ec2'; -import * as autoscaling from 'aws-cdk-lib/aws-autoscaling'; import * as elbv2 from 'aws-cdk-lib/aws-elasticloadbalancingv2'; -import * as rds from 'aws-cdk-lib/aws-rds'; -import * as iam from 'aws-cdk-lib/aws-iam'; -import * as s3 from 'aws-cdk-lib/aws-s3'; import * as logs from 'aws-cdk-lib/aws-logs'; -import * as codedeploy from 'aws-cdk-lib/aws-codedeploy'; import * as secretsmanager from 'aws-cdk-lib/aws-secretsmanager'; +import * as iam from 'aws-cdk-lib/aws-iam'; import * as sns from 'aws-cdk-lib/aws-sns'; import * as subscriptions from 'aws-cdk-lib/aws-sns-subscriptions'; -import * as cloudwatch from 'aws-cdk-lib/aws-cloudwatch'; -import * as acm from 'aws-cdk-lib/aws-certificatemanager'; -import * as ecr from 'aws-cdk-lib/aws-ecr'; -import * as ssm from 'aws-cdk-lib/aws-ssm'; +import * as efs from 'aws-cdk-lib/aws-efs'; import { Construct } from 'constructs'; +// custom constructs for code organization +import createPolisVPC from '../vpc'; +import { + instanceTypeWeb, + machineImageWeb, + instanceTypeMathWorker, + machineImageMathWorker, + instanceTypeDelphiSmall, + machineImageDelphiSmall, + instanceTypeDelphiLarge, + machineImageDelphiLarge, + instanceTypeOllama, + machineImageOllama +} from '../ec2'; +import createSecurityGroups from '../securityGroups'; +import createRoles from '../iamRoles'; +import createECRRepos from '../ecr'; +import createDBResources from '../db'; +import configureLaunchTemplates from '../launchTemplates'; +import createAutoScalingAndAlarms from '../autoscaling'; +import createCodedeployConfig from '../codedeploy'; +import createALBAndDNS from '../dns'; +import createSecretsAndDependencies from '../secrets'; + interface PolisStackProps extends cdk.StackProps { enableSSHAccess?: boolean; // Make optional, default to false envFile: string; @@ -25,6 +42,7 @@ interface PolisStackProps extends cdk.StackProps { mathWorkerKeyPairName?: string; // Key pair for math worker delphiSmallKeyPairName?: string; // Key pair for small Delphi instances delphiLargeKeyPairName?: string; // Key pair for large Delphi instance + ollamaKeyPairName?: string; // Key pair for Ollama instance - NEW } export class CdkStack extends cdk.Stack { @@ -32,131 +50,74 @@ export class CdkStack extends cdk.Stack { super(scope, id, props); const defaultSSHRange = '0.0.0.0/0'; + const ollamaPort = 11434; + const ollamaModelDirectory = '/efs/ollama-models'; + const ollamaNamespace = 'OllamaMetrics'; // Custom namespace for GPU metrics - const vpc = new ec2.Vpc(this, 'Vpc', { - maxAzs: 2, - natGateways: 0, - subnetConfiguration: [ - { - cidrMask: 24, - name: 'Public', - subnetType: ec2.SubnetType.PUBLIC, - }, - { - cidrMask: 24, - name: 'Private', - subnetType: ec2.SubnetType.PRIVATE_ISOLATED, // Use PRIVATE_ISOLATED - }, - ] - }); + // Create VPC + const vpc = createPolisVPC(this); const alarmTopic = new sns.Topic(this, 'AlarmTopic', { displayName: 'Polis Application Alarms', }); - alarmTopic.addSubscription(new subscriptions.EmailSubscription('tim@compdemocracy.org')); - - const logGroup = new logs.LogGroup(this, 'LogGroup'); - - const instanceTypeWeb = ec2.InstanceType.of(ec2.InstanceClass.T3, ec2.InstanceSize.MEDIUM); - const machineImageWeb = new ec2.AmazonLinuxImage({ generation: ec2.AmazonLinuxGeneration.AMAZON_LINUX_2023 }); - const instanceTypeMathWorker = ec2.InstanceType.of(ec2.InstanceClass.R8G, ec2.InstanceSize.XLARGE4); - const machineImageMathWorker = new ec2.AmazonLinuxImage({ - generation: ec2.AmazonLinuxGeneration.AMAZON_LINUX_2023, - cpuType: ec2.AmazonLinuxCpuType.ARM_64, - }); - - // Delphi small instance (cost efficient) - const instanceTypeDelphiSmall = ec2.InstanceType.of(ec2.InstanceClass.T3, ec2.InstanceSize.LARGE); - const machineImageDelphiSmall = new ec2.AmazonLinuxImage({ - generation: ec2.AmazonLinuxGeneration.AMAZON_LINUX_2023 - }); - - // Delphi large instance (performance optimized) - const instanceTypeDelphiLarge = ec2.InstanceType.of(ec2.InstanceClass.C6G, ec2.InstanceSize.XLARGE4); - const machineImageDelphiLarge = new ec2.AmazonLinuxImage({ - generation: ec2.AmazonLinuxGeneration.AMAZON_LINUX_2023, - cpuType: ec2.AmazonLinuxCpuType.ARM_64 - }); - - const webSecurityGroup = new ec2.SecurityGroup(this, 'WebSecurityGroup', { - vpc, - description: 'Allow HTTP and SSH access to web instances', - allowAllOutbound: true, - }); - - const mathWorkerSecurityGroup = new ec2.SecurityGroup(this, 'MathWorkerSG', { - vpc, - description: 'Security group for Polis math worker', - allowAllOutbound: true, - }); - - const delphiSecurityGroup = new ec2.SecurityGroup(this, 'DelphiSecurityGroup', { - vpc, - description: 'Security group for Delphi worker instances', - allowAllOutbound: true, + const logGroup = new logs.LogGroup(this, 'LogGroup', { + retention: logs.RetentionDays.ONE_MONTH, + removalPolicy: cdk.RemovalPolicy.DESTROY, }); + // Create security group + const { + webSecurityGroup, + mathWorkerSecurityGroup, + delphiSecurityGroup, + ollamaSecurityGroup, + efsSecurityGroup, + } = createSecurityGroups(vpc, this); + + // Allow Delphi -> Ollama + ollamaSecurityGroup.addIngressRule( + ec2.Peer.ipv4(vpc.vpcCidrBlock), // Allows traffic from any private IP within the VPC + ec2.Port.tcp(ollamaPort), + `Allow NLB traffic on ${ollamaPort} from VPC` + ); + // Allow Ollama -> EFS + efsSecurityGroup.addIngressRule( + ollamaSecurityGroup, + ec2.Port.tcp(2049), // NFS port + 'Allow NFS from Ollama instances' + ); + + // Conditional SSH Access if (props.enableSSHAccess) { - webSecurityGroup.addIngressRule(ec2.Peer.ipv4(props.sshAllowedIpRange || defaultSSHRange), ec2.Port.tcp(22), 'Allow SSH access'); - mathWorkerSecurityGroup.addIngressRule(ec2.Peer.ipv4(props.sshAllowedIpRange || defaultSSHRange), ec2.Port.tcp(22), 'Allow SSH access'); - delphiSecurityGroup.addIngressRule(ec2.Peer.ipv4(props.sshAllowedIpRange || defaultSSHRange), ec2.Port.tcp(22), 'Allow SSH access'); + const sshPeer = ec2.Peer.ipv4(props.sshAllowedIpRange || defaultSSHRange); + webSecurityGroup.addIngressRule(sshPeer, ec2.Port.tcp(22), 'Allow SSH access'); + mathWorkerSecurityGroup.addIngressRule(sshPeer, ec2.Port.tcp(22), 'Allow SSH access'); + delphiSecurityGroup.addIngressRule(sshPeer, ec2.Port.tcp(22), 'Allow SSH access'); + ollamaSecurityGroup.addIngressRule(sshPeer, ec2.Port.tcp(22), 'Allow SSH access'); } - // Key Pair Creation - let webKeyPair: ec2.IKeyPair | undefined; - if (props.enableSSHAccess) { - webKeyPair = props.webKeyPairName - ? ec2.KeyPair.fromKeyPairName(this, 'WebKeyPair', props.webKeyPairName) - : new ec2.KeyPair(this, 'WebKeyPair'); - } - - let mathWorkerKeyPair: ec2.IKeyPair | undefined; - if (props.enableSSHAccess) { - mathWorkerKeyPair = props.mathWorkerKeyPairName - ? ec2.KeyPair.fromKeyPairName(this, 'MathWorkerKeyPair', props.mathWorkerKeyPairName) - : new ec2.KeyPair(this, 'MathWorkerKeyPair'); - } - - let delphiSmallKeyPair: ec2.IKeyPair | undefined; - if (props.enableSSHAccess) { - delphiSmallKeyPair = props.delphiSmallKeyPairName - ? ec2.KeyPair.fromKeyPairName(this, 'DelphiSmallKeyPair', props.delphiSmallKeyPairName) - : new ec2.KeyPair(this, 'DelphiSmallKeyPair'); - } + webSecurityGroup.addIngressRule(ec2.Peer.ipv4(props.sshAllowedIpRange || defaultSSHRange), ec2.Port.tcp(22), 'Allow SSH'); // Control SSH separately + webSecurityGroup.addIngressRule(ec2.Peer.anyIpv4(), ec2.Port.tcp(80), 'Allow HTTP from anywhere'); + webSecurityGroup.addIngressRule(ec2.Peer.anyIpv4(), ec2.Port.tcp(443), 'Allow HTTPS from anywhere'); - let delphiLargeKeyPair: ec2.IKeyPair | undefined; - if (props.enableSSHAccess) { - delphiLargeKeyPair = props.delphiLargeKeyPairName - ? ec2.KeyPair.fromKeyPairName(this, 'DelphiLargeKeyPair', props.delphiLargeKeyPairName) - : new ec2.KeyPair(this, 'DelphiLargeKeyPair'); - } - const instanceRole = new iam.Role(this, 'InstanceRole', { - assumedBy: new iam.ServicePrincipal('ec2.amazonaws.com'), - managedPolicies: [ - iam.ManagedPolicy.fromAwsManagedPolicyName('AmazonSSMManagedInstanceCore'), - iam.ManagedPolicy.fromAwsManagedPolicyName('service-role/AmazonEC2RoleforAWSCodeDeploy'), - iam.ManagedPolicy.fromAwsManagedPolicyName('SecretsManagerReadWrite'), - iam.ManagedPolicy.fromAwsManagedPolicyName('AmazonEC2ContainerRegistryReadOnly'), - iam.ManagedPolicy.fromAwsManagedPolicyName('CloudWatchLogsFullAccess') - ], - }); - - instanceRole.addToPolicy(new iam.PolicyStatement({ - actions: ['s3:PutObject', 's3:PutObjectAcl', 's3:AbortMultipartUpload'], - resources: ['arn:aws:s3:::*', 'arn:aws:s3:::*/*'], - })); + // --- Key Pairs + const getKeyPair = (name: string, requestedName?: string): ec2.IKeyPair | undefined => { + if (!props.enableSSHAccess) return undefined; + return requestedName + ? ec2.KeyPair.fromKeyPairName(this, name, requestedName) + : new ec2.KeyPair(this, name); + }; + const webKeyPair = getKeyPair('WebKeyPair', props.webKeyPairName); + const mathWorkerKeyPair = getKeyPair('MathWorkerKeyPair', props.mathWorkerKeyPairName); + const delphiSmallKeyPair = getKeyPair('DelphiSmallKeyPair', props.delphiSmallKeyPairName); + const delphiLargeKeyPair = getKeyPair('DelphiLargeKeyPair', props.delphiLargeKeyPairName); + const ollamaKeyPair = getKeyPair('OllamaKeyPair', props.ollamaKeyPairName); - // IAM Role for CodeDeploy - const codeDeployRole = new iam.Role(this, 'CodeDeployRole', { - assumedBy: new iam.ServicePrincipal('codedeploy.amazonaws.com'), - managedPolicies: [ - iam.ManagedPolicy.fromAwsManagedPolicyName('service-role/AWSCodeDeployRole'), - ], - }); + const { instanceRole, codeDeployRole } = createRoles(this); - // ALB Security Group - Allow HTTP/HTTPS from anywhere + // ALB Security Group const lbSecurityGroup = new ec2.SecurityGroup(this, 'LBSecurityGroup', { vpc, description: 'Security group for the load balancer', @@ -165,416 +126,188 @@ export class CdkStack extends cdk.Stack { lbSecurityGroup.addIngressRule(ec2.Peer.anyIpv4(), ec2.Port.tcp(80), 'Allow HTTP from anywhere'); lbSecurityGroup.addIngressRule(ec2.Peer.anyIpv4(), ec2.Port.tcp(443), 'Allow HTTPS from anywhere'); - // things are dockerized so we need ECR - const ecrWebRepository = new ecr.Repository(this, 'PolisRepositoryServer', { - repositoryName: 'polis/server', - removalPolicy: cdk.RemovalPolicy.RETAIN, - imageScanOnPush: true, - }); - - ecrWebRepository.addToResourcePolicy(new iam.PolicyStatement({ // allow docker pull from anywhere - sid: 'AllowPublicPull', - effect: iam.Effect.ALLOW, - principals: [new iam.AnyPrincipal()], - actions: [ - "ecr:BatchCheckLayerAvailability", - "ecr:GetDownloadUrlForLayer", - "ecr:BatchGetImage", - ], - })); - - const ecrMathRepository = new ecr.Repository(this, 'PolisRepositoryMath', { - repositoryName: 'polis/math', - removalPolicy: cdk.RemovalPolicy.RETAIN, - imageScanOnPush: true, - }); - - ecrMathRepository.addToResourcePolicy(new iam.PolicyStatement({ - sid: 'AllowPublicPull', - effect: iam.Effect.ALLOW, - principals: [new iam.AnyPrincipal()], - actions: [ - "ecr:BatchCheckLayerAvailability", - "ecr:GetDownloadUrlForLayer", - "ecr:BatchGetImage", - ], - })); - - const ecrDelphiRepository = new ecr.Repository(this, 'PolisRepositoryDelphi', { - repositoryName: 'polis/delphi', - removalPolicy: cdk.RemovalPolicy.RETAIN, - imageScanOnPush: true, - }); - - ecrDelphiRepository.addToResourcePolicy(new iam.PolicyStatement({ - sid: 'AllowPublicPull', - effect: iam.Effect.ALLOW, - principals: [new iam.AnyPrincipal()], - actions: [ - "ecr:BatchCheckLayerAvailability", - "ecr:GetDownloadUrlForLayer", - "ecr:BatchGetImage", - ], - })); - - ecrWebRepository.grantPull(instanceRole); - ecrMathRepository.grantPull(instanceRole); - ecrDelphiRepository.grantPull(instanceRole); - - const imageTagParameter = new ssm.StringParameter(this, 'ImageTagParameter', { - parameterName: '/polis/image-tag', - stringValue: 'initial-tag', //CI/CD will update this - }); - - - // --- Web ASG --- - webSecurityGroup.addIngressRule(ec2.Peer.ipv4(props.sshAllowedIpRange || defaultSSHRange), ec2.Port.tcp(22), 'Allow SSH'); // Control SSH separately - webSecurityGroup.addIngressRule(ec2.Peer.anyIpv4(), ec2.Port.tcp(80), 'Allow HTTP from anywhere'); - webSecurityGroup.addIngressRule(ec2.Peer.anyIpv4(), ec2.Port.tcp(443), 'Allow HTTPS from anywhere'); - - // --- Postgres --- - - const dbSubnetGroup = new rds.SubnetGroup(this, 'DatabaseSubnetGroup', { - vpc, - subnetGroupName: 'PolisDatabaseSubnetGroup', - description: 'Subnet group for the postgres database', - vpcSubnets: { - subnetType: ec2.SubnetType.PRIVATE_ISOLATED, - }, - removalPolicy: cdk.RemovalPolicy.DESTROY, + // Create ECR repos + const { ecrWebRepository, ecrDelphiRepository, ecrMathRepository, imageTagParameter } = createECRRepos(this, instanceRole); + + // Create DB and related resources + const { dbSubnetGroup, db, dbSecretArnParam, dbHostParam, dbPortParam } = createDBResources(this, vpc); + + // --- EFS for Ollama Models + const fileSystemPolicyDocument = new iam.PolicyDocument({ + statements: [ + new iam.PolicyStatement({ + effect: iam.Effect.ALLOW, + actions: [ + "elasticfilesystem:ClientMount", + "elasticfilesystem:ClientWrite", + "elasticfilesystem:ClientRootAccess", + ], + principals: [new iam.AnyPrincipal()], + resources: ["*"], // Applies to the filesystem this policy is attached to + conditions: { + Bool: { "elasticfilesystem:AccessedViaMountTarget": "true" } + } + }) + ] }); - - const db = new rds.DatabaseInstance(this, 'Database', { - engine: rds.DatabaseInstanceEngine.postgres({version: rds.PostgresEngineVersion.VER_17 }), - instanceType: ec2.InstanceType.of(ec2.InstanceClass.T3, ec2.InstanceSize.LARGE), + const fileSystem = new efs.FileSystem(this, 'OllamaModelFileSystem', { vpc, - allocatedStorage: 20, - storageType: rds.StorageType.GP2, - credentials: rds.Credentials.fromGeneratedSecret('dbUser'), - databaseName: 'polisdb', - removalPolicy: cdk.RemovalPolicy.SNAPSHOT, - deletionProtection: true, - publiclyAccessible: false, - subnetGroup: dbSubnetGroup, - }); - - const dbSecretArnParam = new ssm.StringParameter(this, 'DBSecretArnParameter', { - parameterName: '/polis/db-secret-arn', - stringValue: db.secret!.secretArn, - description: 'SSM Parameter storing the ARN of the Polis Database Secret', - }); - - const dbHostParam = new ssm.StringParameter(this, 'DBHostParameter', { - parameterName: '/polis/db-host', - stringValue: db.dbInstanceEndpointAddress, - description: 'SSM Parameter storing the Polis Database Host', - }); - - const dbPortParam = new ssm.StringParameter(this, 'DBPortParameter', { - parameterName: '/polis/db-port', - stringValue: db.dbInstanceEndpointPort, - description: 'SSM Parameter storing the Polis Database Port', - }); - - const usrdata = (CLOUDWATCH_LOG_GROUP_NAME: string, service: string, instanceSize?: string) => { - let ld; - ld = ec2.UserData.forLinux(); - ld.addCommands( - '#!/bin/bash', - 'set -e', - 'set -x', - `echo "Writing service type '${service}' to /tmp/service_type.txt"`, - `echo "${service}" > /tmp/service_type.txt`, - `echo "Contents of /tmp/service_type.txt: $(cat /tmp/service_type.txt)"`, - // If instanceSize is provided, write it to a file - instanceSize ? `echo "Writing instance size '${instanceSize}' to /tmp/instance_size.txt"` : '', - instanceSize ? `echo "${instanceSize}" > /tmp/instance_size.txt` : '', - instanceSize ? `echo "Contents of /tmp/instance_size.txt: $(cat /tmp/instance_size.txt)"` : '', - 'sudo yum update -y', - 'sudo yum install -y amazon-cloudwatch-agent -y', - 'sudo dnf install -y wget ruby docker', - 'sudo systemctl start docker', - 'sudo systemctl enable docker', - 'sudo usermod -a -G docker ec2-user', - 'sudo curl -L https://github.com/docker/compose/releases/latest/download/docker-compose-$(uname -s)-$(uname -m) -o /usr/local/bin/docker-compose', - 'sudo chmod +x /usr/local/bin/docker-compose', - 'docker-compose --version', // Verify installation - 'sudo yum install -y jq', - `export SERVICE=${service}`, - instanceSize ? `export INSTANCE_SIZE=${instanceSize}` : '', - 'exec 1>>/var/log/user-data.log 2>&1', - 'echo "Finished User Data Execution at $(date)"', - 'sudo mkdir -p /etc/docker', // Ensure /etc/docker directory exists - `sudo tee /etc/docker/daemon.json << EOF -{ - "log-driver": "awslogs", - "log-opts": { - "awslogs-group": "${CLOUDWATCH_LOG_GROUP_NAME}", - "awslogs-region": "${cdk.Stack.of(this).region}", - "awslogs-stream": "${service}" - } -} -EOF`, - 'sudo systemctl restart docker', - 'sudo systemctl status docker' - ); - return ld; - }; - - // --- Launch Templates --- - const webLaunchTemplate = new ec2.LaunchTemplate(this, 'WebLaunchTemplate', { - machineImage: machineImageWeb, - userData: usrdata(logGroup.logGroupName, "server"), - instanceType: instanceTypeWeb, - securityGroup: webSecurityGroup, - keyPair: props.enableSSHAccess ? webKeyPair : undefined, // Conditionally add key pair - role: instanceRole, - }); - - const mathWorkerLaunchTemplate = new ec2.LaunchTemplate(this, 'MathWorkerLaunchTemplate', { - machineImage: machineImageMathWorker, - userData: usrdata(logGroup.logGroupName, "math"), - instanceType: instanceTypeMathWorker, - securityGroup: mathWorkerSecurityGroup, - keyPair: props.enableSSHAccess ? mathWorkerKeyPair : undefined, - role: instanceRole, - }); - - const delphiSmallLaunchTemplate = new ec2.LaunchTemplate(this, 'DelphiSmallLaunchTemplate', { - machineImage: machineImageDelphiSmall, - userData: usrdata(logGroup.logGroupName, "delphi", "small"), - instanceType: instanceTypeDelphiSmall, - securityGroup: delphiSecurityGroup, - keyPair: props.enableSSHAccess ? delphiSmallKeyPair : undefined, - role: instanceRole, - }); - - const delphiLargeLaunchTemplate = new ec2.LaunchTemplate(this, 'DelphiLargeLaunchTemplate', { - machineImage: machineImageDelphiLarge, - userData: usrdata(logGroup.logGroupName, "delphi", "large"), - instanceType: instanceTypeDelphiLarge, - securityGroup: delphiSecurityGroup, - keyPair: props.enableSSHAccess ? delphiLargeKeyPair : undefined, - role: instanceRole, - }); - - const asgWeb = new autoscaling.AutoScalingGroup(this, 'Asg', { + encrypted: true, + lifecyclePolicy: efs.LifecyclePolicy.AFTER_14_DAYS, + performanceMode: efs.PerformanceMode.GENERAL_PURPOSE, + throughputMode: efs.ThroughputMode.ELASTIC, + removalPolicy: cdk.RemovalPolicy.RETAIN, + securityGroup: efsSecurityGroup, + vpcSubnets: { subnetGroupName: 'PrivateWithEgress' }, + fileSystemPolicy: fileSystemPolicyDocument, + }); + + // launch templates + const { + webLaunchTemplate, + mathWorkerLaunchTemplate, + delphiSmallLaunchTemplate, + delphiLargeLaunchTemplate, + ollamaLaunchTemplate + } = configureLaunchTemplates(this, + logGroup, + ollamaNamespace, + ollamaModelDirectory, + fileSystem, + machineImageWeb, + instanceTypeWeb, + webSecurityGroup, + webKeyPair, + instanceRole, + machineImageMathWorker, + instanceTypeMathWorker, + mathWorkerSecurityGroup, + mathWorkerKeyPair, + machineImageDelphiSmall, + instanceTypeDelphiSmall, + delphiSmallKeyPair, + machineImageDelphiLarge, + instanceTypeDelphiLarge, + delphiSecurityGroup, + delphiLargeKeyPair, + machineImageOllama, + instanceTypeOllama, + ollamaKeyPair, + ollamaSecurityGroup + ); + + // Auto Scaling Groups and alarms + const { + asgOllama, + asgWeb, + asgMathWorker, + asgDelphiSmall, + asgDelphiLarge, + commonAsgProps + } = createAutoScalingAndAlarms( + this, vpc, - launchTemplate: webLaunchTemplate, - minCapacity: 2, - maxCapacity: 10, - desiredCapacity: 2, - vpcSubnets: { subnetType: ec2.SubnetType.PUBLIC }, - healthCheck: autoscaling.HealthCheck.elb({grace: cdk.Duration.minutes(5)}) - }); - - const asgMathWorker = new autoscaling.AutoScalingGroup(this, 'AsgMathWorker', { + instanceRole, + ollamaLaunchTemplate, + logGroup, + fileSystem, + webLaunchTemplate, + mathWorkerLaunchTemplate, + delphiSmallLaunchTemplate, + delphiLargeLaunchTemplate, + ollamaNamespace, + alarmTopic + ); + + // --- DEPLOY STUFF + const { + application, + deploymentBucket, + deploymentGroup + } = createCodedeployConfig( + this, + instanceRole, + asgWeb, + asgMathWorker, + asgDelphiSmall, + asgDelphiLarge, + codeDeployRole + ); + + // --- Ollama Network Load Balancer (Internal, in Private+Egress) + const ollamaNlb = new elbv2.NetworkLoadBalancer(this, 'OllamaNlb', { vpc, - launchTemplate: mathWorkerLaunchTemplate, - minCapacity: 1, - desiredCapacity: 1, - maxCapacity: 5, - vpcSubnets: { - subnetType: ec2.SubnetType.PUBLIC, - }, - healthCheck: autoscaling.HealthCheck.ec2({ grace: cdk.Duration.minutes(2) }), + internetFacing: false, // Internal only + crossZoneEnabled: true, + vpcSubnets: { subnetType: ec2.SubnetType.PRIVATE_WITH_EGRESS }, }); - - const mathWorkerCpuMetric = new cloudwatch.Metric({ - namespace: 'AWS/EC2', - metricName: 'CPUUtilization', - dimensionsMap: { - AutoScalingGroupName: asgMathWorker.autoScalingGroupName, - }, - statistic: 'Average', // default, config if necessary - period: cdk.Duration.minutes(10), + const ollamaListener = ollamaNlb.addListener('OllamaListener', { + port: ollamaPort, + protocol: elbv2.Protocol.TCP, }); - - asgMathWorker.scaleToTrackMetric('CpuTracking', { - metric: mathWorkerCpuMetric, - targetValue: 50, // Target 50% CPU utilization - disableScaleIn: true, // unneeded hosts will be disabled manualy - }); - - // Delphi Small Instance Auto Scaling Group - const asgDelphiSmall = new autoscaling.AutoScalingGroup(this, 'AsgDelphiSmall', { + const ollamaTargetGroup = new elbv2.NetworkTargetGroup(this, 'OllamaTargetGroup', { vpc, - launchTemplate: delphiSmallLaunchTemplate, - minCapacity: 1, - desiredCapacity: 2, - maxCapacity: 5, - vpcSubnets: { - subnetType: ec2.SubnetType.PUBLIC, - }, - healthCheck: autoscaling.HealthCheck.ec2({ grace: cdk.Duration.minutes(2) }), - }); - - // Delphi Large Instance Auto Scaling Group - const asgDelphiLarge = new autoscaling.AutoScalingGroup(this, 'AsgDelphiLarge', { - vpc, - launchTemplate: delphiLargeLaunchTemplate, - minCapacity: 0, - desiredCapacity: 1, - maxCapacity: 3, - vpcSubnets: { - subnetType: ec2.SubnetType.PUBLIC, - }, - healthCheck: autoscaling.HealthCheck.ec2({ grace: cdk.Duration.minutes(2) }), - }); - - // CPU metrics for Delphi small instances - const delphiSmallCpuMetric = new cloudwatch.Metric({ - namespace: 'AWS/EC2', - metricName: 'CPUUtilization', - dimensionsMap: { - AutoScalingGroupName: asgDelphiSmall.autoScalingGroupName, - }, - statistic: 'Average', - period: cdk.Duration.minutes(5), - }); - - // CPU metrics for Delphi large instances - const delphiLargeCpuMetric = new cloudwatch.Metric({ - namespace: 'AWS/EC2', - metricName: 'CPUUtilization', - dimensionsMap: { - AutoScalingGroupName: asgDelphiLarge.autoScalingGroupName, + port: ollamaPort, + protocol: elbv2.Protocol.TCP, + targetType: elbv2.TargetType.INSTANCE, + targets: [asgOllama], + healthCheck: { + protocol: elbv2.Protocol.TCP, + interval: cdk.Duration.seconds(30), + healthyThresholdCount: 2, + unhealthyThresholdCount: 2, }, - statistic: 'Average', - period: cdk.Duration.minutes(5), - }); - - // Scale small Delphi instances based on CPU usage - asgDelphiSmall.scaleToTrackMetric('DelphiSmallCpuTracking', { - metric: delphiSmallCpuMetric, - targetValue: 60, // Target 60% CPU utilization - scaleInCooldown: cdk.Duration.minutes(5), - scaleOutCooldown: cdk.Duration.minutes(2), + deregistrationDelay: cdk.Duration.seconds(60), }); - - // Scale large Delphi instances based on CPU usage - asgDelphiLarge.scaleToTrackMetric('DelphiLargeCpuTracking', { - metric: delphiLargeCpuMetric, - targetValue: 60, // Target 60% CPU utilization - scaleInCooldown: cdk.Duration.minutes(5), - scaleOutCooldown: cdk.Duration.minutes(2), - }); - - // CloudWatch alarms for Delphi small instances - const delphiSmallHighCpuAlarm = new cloudwatch.Alarm(this, 'DelphiSmallHighCpuAlarm', { - metric: delphiSmallCpuMetric, - threshold: 80, - evaluationPeriods: 2, - datapointsToAlarm: 2, - comparisonOperator: cloudwatch.ComparisonOperator.GREATER_THAN_THRESHOLD, - alarmDescription: 'Alert when Delphi small instances CPU exceeds 80% for 10 minutes', - }); - - delphiSmallHighCpuAlarm.addAlarmAction(new cdk.aws_cloudwatch_actions.SnsAction(alarmTopic)); - - // CloudWatch alarms for Delphi large instances - const delphiLargeHighCpuAlarm = new cloudwatch.Alarm(this, 'DelphiLargeHighCpuAlarm', { - metric: delphiLargeCpuMetric, - threshold: 80, - evaluationPeriods: 2, - datapointsToAlarm: 2, - comparisonOperator: cloudwatch.ComparisonOperator.GREATER_THAN_THRESHOLD, - alarmDescription: 'Alert when Delphi large instances CPU exceeds 80% for 10 minutes', - }); - - delphiLargeHighCpuAlarm.addAlarmAction(new cdk.aws_cloudwatch_actions.SnsAction(alarmTopic)); + ollamaListener.addTargetGroups('OllamaTg', ollamaTargetGroup); - // DEPLOY STUFF - const application = new codedeploy.ServerApplication(this, 'CodeDeployApplication', { - applicationName: 'PolisApplication', + // Secret for Ollama NLB endpoint + const ollamaServiceSecret = new secretsmanager.Secret(this, 'OllamaServiceSecret', { + secretName: '/polis/ollama-service-url', + description: 'URL for the internal Ollama service endpoint (NLB)', + // Store the NLB DNS name and port + secretStringValue: cdk.SecretValue.unsafePlainText(`http://${ollamaNlb.loadBalancerDnsName}:${ollamaPort}`), }); + ollamaServiceSecret.grantRead(instanceRole); - const deploymentBucket = new s3.Bucket(this, 'DeploymentPackageBucket', { - bucketName: `polis-deployment-packages-${cdk.Stack.of(this).account}-${cdk.Stack.of(this).region}`, - removalPolicy: cdk.RemovalPolicy.DESTROY, - autoDeleteObjects: true, - versioned: true, - publicReadAccess: false, - blockPublicAccess: s3.BlockPublicAccess.BLOCK_ALL, - }); - - deploymentBucket.grantRead(instanceRole); - - const deploymentGroup = new codedeploy.ServerDeploymentGroup(this, 'DeploymentGroup', { - application, - deploymentGroupName: 'PolisDeploymentGroup', - autoScalingGroups: [asgWeb, asgMathWorker, asgDelphiSmall, asgDelphiLarge], - deploymentConfig: codedeploy.ServerDeploymentConfig.ONE_AT_A_TIME, - role: codeDeployRole, - installAgent: true, - }); - - // Allow traffic from the web ASG to the database + // --- DB Access Rules db.connections.allowFrom(asgWeb, ec2.Port.tcp(5432), 'Allow database access from web ASG'); db.connections.allowFrom(asgMathWorker, ec2.Port.tcp(5432), 'Allow database access from math ASG'); db.connections.allowFrom(asgDelphiSmall, ec2.Port.tcp(5432), 'Allow database access from Delphi small ASG'); db.connections.allowFrom(asgDelphiLarge, ec2.Port.tcp(5432), 'Allow database access from Delphi large ASG'); - // ELB - const lb = new elbv2.ApplicationLoadBalancer(this, 'Lb', { - vpc, - internetFacing: true, - securityGroup: lbSecurityGroup, // Use the dedicated ALB security group - idleTimeout: cdk.Duration.seconds(300), - }); - - const webTargetGroup = new elbv2.ApplicationTargetGroup(this, 'WebAppTargetGroup', { + // ALB & DNS + const { + lb, + webTargetGroup, + httpListener, + httpsListener, + webScalingPolicy + } = createALBAndDNS( + this, vpc, - port: 80, - protocol: elbv2.ApplicationProtocol.HTTP, - targets: [asgWeb], - healthCheck: { - path: "/api/v3/testConnection", - interval: cdk.Duration.seconds(300) - } - }); - - const httpListener = lb.addListener('HttpListener', { - port: 80, - open: true, - defaultTargetGroups: [webTargetGroup], - }); - - // ACM Certificate Request - const certificate = new acm.Certificate(this, 'WebAppCertificate', { - domainName: 'pol.is', - validation: acm.CertificateValidation.fromDns(), - }); - - const httpsListener = lb.addListener('HttpsListener', { - port: 443, - certificates: [certificate], - open: true, - defaultTargetGroups: [webTargetGroup], - }); - - // Web Server - Target Tracking Scaling based on ALB Request Count - const webScalingPolicy = asgWeb.scaleOnRequestCount('WebScalingPolicy', { - targetRequestsPerMinute: 600, - disableScaleIn: true, // unneeded hosts will be disabled manualy - }); - - const webAppEnvVarsSecret = new secretsmanager.Secret(this, 'WebAppEnvVarsSecret', { - secretName: 'polis-web-app-env-vars', - description: 'Environment variables for the Polis web application', - }); - - asgWeb.node.addDependency(logGroup); - asgWeb.node.addDependency(webAppEnvVarsSecret); - asgMathWorker.node.addDependency(logGroup); - asgMathWorker.node.addDependency(webAppEnvVarsSecret); - asgDelphiSmall.node.addDependency(logGroup); - asgDelphiSmall.node.addDependency(webAppEnvVarsSecret); - asgDelphiLarge.node.addDependency(logGroup); - asgDelphiLarge.node.addDependency(webAppEnvVarsSecret); - asgWeb.node.addDependency(db); - asgMathWorker.node.addDependency(db); - asgDelphiSmall.node.addDependency(db); - asgDelphiLarge.node.addDependency(db); + lbSecurityGroup, + asgWeb + ); + + // --- Secrets & Dependencies - creates secrets managed in SSM, grants services permission to interact with each other, etc. + createSecretsAndDependencies( + this, + instanceRole, + db, + logGroup, + asgWeb, + asgMathWorker, + asgDelphiSmall, + asgDelphiLarge, + asgOllama, + fileSystem + ); + + // --- Outputs + new cdk.CfnOutput(this, 'LoadBalancerDNS', { value: lb.loadBalancerDnsName, description: 'Public DNS name of the Application Load Balancer' }); + new cdk.CfnOutput(this, 'OllamaNlbDnsName', { value: ollamaNlb.loadBalancerDnsName, description: 'Internal DNS Name for the Ollama Network Load Balancer'}); + new cdk.CfnOutput(this, 'OllamaServiceSecretArn', { value: ollamaServiceSecret.secretArn, description: 'ARN of the Secret containing the Ollama service URL' }); + new cdk.CfnOutput(this, 'EfsFileSystemId', { value: fileSystem.fileSystemId, description: 'ID of the EFS File System for Ollama models' }); } -} +} \ No newline at end of file diff --git a/cdk/secrets.ts b/cdk/secrets.ts new file mode 100644 index 0000000000..cbd32c3f5d --- /dev/null +++ b/cdk/secrets.ts @@ -0,0 +1,50 @@ +import { Construct } from "constructs"; +import * as cdk from 'aws-cdk-lib'; +import * as autoscaling from 'aws-cdk-lib/aws-autoscaling'; +import * as secretsmanager from 'aws-cdk-lib/aws-secretsmanager'; + +export default ( + self: Construct, + instanceRole: cdk.aws_iam.Role, + db: cdk.aws_rds.DatabaseInstance, + logGroup: cdk.aws_logs.LogGroup, + asgWeb: cdk.aws_autoscaling.AutoScalingGroup, + asgMathWorker: cdk.aws_autoscaling.AutoScalingGroup, + asgDelphiSmall: cdk.aws_autoscaling.AutoScalingGroup, + asgDelphiLarge: cdk.aws_autoscaling.AutoScalingGroup, + asgOllama: cdk.aws_autoscaling.AutoScalingGroup, + fileSystem: cdk.aws_efs.FileSystem +) => { + const webAppEnvVarsSecret = new secretsmanager.Secret(self, 'WebAppEnvVarsSecret', { + secretName: 'polis-web-app-env-vars', + description: 'Environment variables for the Polis web application', + }); + const clientAdminEnvVarsSecret = new secretsmanager.Secret(self, 'ClientAdminEnvVarsSecret', { + secretName: 'polis-client-admin-env-vars', + description: 'Environment variables for the Polis client-admin web application', + }); + + const clientReportEnvVarsSecret = new secretsmanager.Secret(self, 'ClientReportEnvVarsSecret', { + secretName: 'polis-client-report-env-vars', + description: 'Environment variables for the Polis client-report web application', + }); + webAppEnvVarsSecret.grantRead(instanceRole); + clientAdminEnvVarsSecret.grantRead(instanceRole); + clientReportEnvVarsSecret.grantRead(instanceRole); + + // Dependencies (Add ASGs to loops/lists) + const addDbDependency = (asg: autoscaling.IAutoScalingGroup) => asg.node.addDependency(db); + const addLogDependency = (asg: autoscaling.IAutoScalingGroup) => asg.node.addDependency(logGroup); + const addSecretDependency = (asg: autoscaling.IAutoScalingGroup) => asg.node.addDependency(webAppEnvVarsSecret); + + // Apply common dependencies to all ASGs + [asgWeb, asgMathWorker, asgDelphiSmall, asgDelphiLarge, asgOllama].forEach(asg => { + addLogDependency(asg); + addSecretDependency(asg); + // Only add DB dependency if the service needs it + if (asg !== asgOllama) { + addDbDependency(asg); + } + }); + asgOllama.node.addDependency(fileSystem); +} diff --git a/cdk/securityGroups.ts b/cdk/securityGroups.ts new file mode 100644 index 0000000000..ad6c577117 --- /dev/null +++ b/cdk/securityGroups.ts @@ -0,0 +1,40 @@ +import * as ec2 from 'aws-cdk-lib/aws-ec2'; +import { Construct } from 'constructs'; + +export default (vpc: ec2.IVpc, self: Construct) => { + const webSecurityGroup = new ec2.SecurityGroup(self, 'WebSecurityGroup', { + vpc, + description: 'Allow HTTP and SSH access to web instances', + allowAllOutbound: true + }); + const mathWorkerSecurityGroup = new ec2.SecurityGroup(self, 'MathWorkerSG', { + vpc, + description: 'Security group for Polis math worker', + allowAllOutbound: true + }); + // Delphi Security Group + const delphiSecurityGroup = new ec2.SecurityGroup(self, 'DelphiSecurityGroup', { + vpc, + description: 'SG for Delphi instances', + allowAllOutbound: true + }); + // Ollama Security Group + const ollamaSecurityGroup = new ec2.SecurityGroup(self, 'OllamaSecurityGroup', { + vpc, + description: 'SG for Ollama instance', + allowAllOutbound: true + }); + // EFS Security Group + const efsSecurityGroup = new ec2.SecurityGroup(self, 'EfsSecurityGroup', { + vpc, + description: 'SG for EFS mount targets', + allowAllOutbound: false + }); + return { + webSecurityGroup, + mathWorkerSecurityGroup, + delphiSecurityGroup, + ollamaSecurityGroup, + efsSecurityGroup, + } +} \ No newline at end of file diff --git a/cdk/vpc.ts b/cdk/vpc.ts new file mode 100644 index 0000000000..c9b20dfede --- /dev/null +++ b/cdk/vpc.ts @@ -0,0 +1,22 @@ +import * as ec2 from 'aws-cdk-lib/aws-ec2'; +export default (self: any) => new ec2.Vpc(self, 'Vpc', { + maxAzs: 2, + natGateways: 1, // Use 1 for non-prod/cost saving, 2+ for prod HA + subnetConfiguration: [ + { + cidrMask: 24, + name: 'Public', + subnetType: ec2.SubnetType.PUBLIC, + }, + { + cidrMask: 24, + name: 'Private', + subnetType: ec2.SubnetType.PRIVATE_ISOLATED, + }, + { + cidrMask: 24, + name: 'PrivateWithEgress', + subnetType: ec2.SubnetType.PRIVATE_WITH_EGRESS, + }, + ] +}); \ No newline at end of file diff --git a/delphi/CLAUDE.md b/delphi/CLAUDE.md index ad701e9416..d959339d6a 100644 --- a/delphi/CLAUDE.md +++ b/delphi/CLAUDE.md @@ -89,7 +89,7 @@ Always use the commands above to determine the most substantial conversation whe ```bash docker exec polis-dev-delphi-1 python -c " import boto3, json - dynamodb = boto3.resource('dynamodb', endpoint_url='http://dynamodb:8000', region_name='us-west-2') + dynamodb = boto3.resource('dynamodb', endpoint_url='http://dynamodb:8000', region_name='us-east-1') table = dynamodb.Table('Delphi_JobQueue') job_id = '' # Replace with your job ID job = table.get_item(Key={'job_id': job_id})['Item'] @@ -103,7 +103,7 @@ Always use the commands above to determine the most substantial conversation whe ```bash docker exec polis-dev-delphi-1 python -c " import boto3, json - dynamodb = boto3.resource('dynamodb', endpoint_url='http://dynamodb:8000', region_name='us-west-2') + dynamodb = boto3.resource('dynamodb', endpoint_url='http://dynamodb:8000', region_name='us-east-1') table = dynamodb.Table('Delphi_JobQueue') job_id = '' # Replace with your job ID job = table.get_item(Key={'job_id': job_id})['Item'] @@ -143,7 +143,7 @@ When connecting to DynamoDB from the Delphi container, use these settings: DYNAMODB_ENDPOINT=http://host.docker.internal:8000 AWS_ACCESS_KEY_ID=dummy AWS_SECRET_ACCESS_KEY=dummy -AWS_REGION=us-west-2 +AWS_REGION=us-east-1 ``` These are configured in run_delphi.sh for all DynamoDB operations. diff --git a/delphi/Dockerfile b/delphi/Dockerfile index 3e25ef3e93..f6e68a99ad 100644 --- a/delphi/Dockerfile +++ b/delphi/Dockerfile @@ -1,66 +1,98 @@ -# Use an official Python runtime as a parent image -# Using 3.12 as recommended in QUICK_START.md -FROM python:3.12-slim +# ---- Stage 1: Builder ---- + FROM python:3.12-slim AS builder -# Set environment variables -# Prevent Python from writing pyc files to disc -ENV PYTHONDONTWRITEBYTECODE=1 -# Ensure Python output is sent straight to terminal -ENV PYTHONUNBUFFERED=1 - -# Install dependencies needed for building packages and cloning evoc -RUN apt-get update && \ - apt-get install -y git build-essential cmake \ - gcc g++ gfortran libopenblas-dev curl && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* - - -# Set the working directory in the container -WORKDIR /app - -# Copy requirements file -COPY requirements.txt . - -# Install dependencies -RUN pip install --no-cache-dir -r requirements.txt -# Install additional dependencies needed for the orchestrator and make sure they're in the path -RUN pip install --no-cache-dir colorlog fastapi==0.115.0 pydantic -# Verify FastAPI installation -RUN pip list | grep fastapi - -# Clone and install evoc -RUN git clone https://github.com/TutteInstitute/evoc && \ - cd evoc && \ - pip install . - -# Copy all the necessary files for the application -COPY polismath/ ./polismath/ -COPY scripts/ ./scripts/ -COPY umap_narrative/ ./umap_narrative/ - -# Create data directory -RUN mkdir -p data - -# Make port 8080 available to the world outside this container -# Default port seems to be 8080 based on server.py -# Use ARG/ENV to make this configurable if needed -EXPOSE 8080 - -# Set PYTHONPATH to include current directory -ENV PYTHONPATH "${PYTHONPATH}:/app" - -# Copy the required scripts and make them executable -COPY start_poller.sh . -COPY run_delphi.sh . -COPY run_delphi.py . -COPY create_dynamodb_tables.py . -COPY setup_minio.py . -COPY scripts/setup_ollama.sh ./setup_ollama.sh -RUN chmod +x start_poller.sh run_delphi.sh run_delphi.py setup_ollama.sh - -# Command to: -# 1. Initialize DynamoDB tables -# 2. Set up the Ollama model based on environment variables -# 3. Start the job poller with a 2-second polling interval -CMD ["bash", "-c", "echo 'Setting up DynamoDB tables...' && python create_dynamodb_tables.py --endpoint-url=${DYNAMODB_ENDPOINT:-http://dynamodb:8000} && echo 'Setting up MinIO bucket...' && python setup_minio.py && echo 'Setting up Ollama model...' && ./setup_ollama.sh && echo 'Starting job poller...' && python scripts/job_poller.py --interval=2"] + ENV PYTHONDONTWRITEBYTECODE=1 + ENV PYTHONUNBUFFERED=1 + + RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + git \ + build-essential \ + cmake \ + gcc \ + g++ \ + gfortran \ + libopenblas-dev \ + curl \ + && apt-get clean && \ + rm -rf /var/lib/apt/lists/* + + WORKDIR /app + + COPY requirements.txt . + + RUN pip install --no-cache-dir --index-url https://download.pytorch.org/whl/cpu \ + torch==2.3.1+cpu \ + torchvision==0.18.1+cpu \ + torchaudio==2.3.1+cpu + + RUN pip install --no-cache-dir -r requirements.txt + + RUN pip install --no-cache-dir colorlog fastapi==0.115.0 pydantic + + RUN echo "--- PyTorch Check (after requirements.txt) ---" && \ + pip show torch torchvision torchaudio && \ + python -c "import torch; print(f'Torch version: {torch.__version__}'); print(f'CUDA available: {torch.cuda.is_available()}')" && \ + echo "--- Looking for NVIDIA/CUDA libs ---" && \ + (ls -lhR /usr/local/lib/python3.12/site-packages/nvidia || echo "NVIDIA directory not found.") && \ + (ls -lhR /usr/local/lib/python3.12/site-packages/torch/lib/*cuda* || echo "No CUDA libs in torch/lib.") + + + RUN git clone https://github.com/TutteInstitute/evoc && \ + cd evoc && \ + pip install --no-cache-dir . + + RUN echo "--- PyTorch Check (after evoc install) ---" && \ + pip show torch torchvision torchaudio && \ + python -c "import torch; print(f'Torch version: {torch.__version__}'); print(f'CUDA available: {torch.cuda.is_available()}')" && \ + echo "--- Looking for NVIDIA/CUDA libs (after evoc) ---" && \ + (ls -lhR /usr/local/lib/python3.12/site-packages/nvidia || echo "NVIDIA directory not found.") && \ + (ls -lhR /usr/local/lib/python3.12/site-packages/torch/lib/*cuda* || echo "No CUDA libs in torch/lib.") + + FROM python:3.12-slim AS final + + ENV PYTHONDONTWRITEBYTECODE=1 + ENV PYTHONUNBUFFERED=1 + + RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + curl \ + && apt-get clean && \ + rm -rf /var/lib/apt/lists/* + + WORKDIR /app + + COPY --from=builder /usr/local/lib/python3.12/site-packages /usr/local/lib/python3.12/site-packages + + COPY polismath/ ./polismath/ + COPY scripts/ ./scripts/ + COPY umap_narrative/ ./umap_narrative/ + + RUN mkdir -p data + EXPOSE 8080 + ENV PYTHONPATH "${PYTHONPATH}:/app" + + COPY start_poller.sh . + COPY run_delphi.sh . + COPY run_delphi.py . + COPY create_dynamodb_tables.py . + COPY setup_minio.py . + COPY scripts/setup_ollama.sh ./setup_ollama.sh + RUN chmod +x start_poller.sh run_delphi.sh run_delphi.py setup_ollama.sh + + CMD ["bash", "-c", "\ + echo 'Ensuring DynamoDB tables are set up (runs in all environments)...'; \ + python create_dynamodb_tables.py && \ + echo 'DynamoDB table setup script finished.'; \ + \ + if [ -n \"${DYNAMODB_ENDPOINT}\" ]; then \ + echo 'DYNAMODB_ENDPOINT is set (value: \"${DYNAMODB_ENDPOINT}\"), assuming local/dev environment. Running additional local setup scripts...'; \ + echo 'Setting up MinIO bucket...' && python setup_minio.py && \ + echo 'Setting up Ollama model (local script)...' && ./setup_ollama.sh; \ + else \ + echo 'DYNAMODB_ENDPOINT is not set, assuming production-like environment. Skipping MinIO and local Ollama setup scripts.'; \ + fi && \ + \ + echo 'Starting job poller...' && \ + python scripts/job_poller.py --interval=2\ + "] \ No newline at end of file diff --git a/delphi/create_dynamodb_tables.py b/delphi/create_dynamodb_tables.py index 12d6d41fcb..2603deece0 100644 --- a/delphi/create_dynamodb_tables.py +++ b/delphi/create_dynamodb_tables.py @@ -9,8 +9,8 @@ python create_dynamodb_tables.py [options] Options: - --endpoint-url ENDPOINT_URL DynamoDB endpoint URL (default: http://localhost:8000) - --region REGION AWS region (default: us-west-2) + --endpoint-url ENDPOINT_URL DynamoDB endpoint URL + --region REGION AWS region (default: us-east-1) --delete-existing Delete existing tables before creating new ones --evoc-only Create only EVōC tables --polismath-only Create only Polis math tables @@ -421,12 +421,12 @@ def _create_tables(dynamodb, tables, existing_tables): return created_tables -def create_tables(endpoint_url=None, region_name='us-west-2', +def create_tables(endpoint_url=None, region_name='us-east-1', delete_existing=False, evoc_only=False, polismath_only=False, aws_profile=None): # Use the environment variable if endpoint_url is not provided if endpoint_url is None: - endpoint_url = os.environ.get('DYNAMODB_ENDPOINT', 'http://localhost:8000') + endpoint_url = os.environ.get('DYNAMODB_ENDPOINT') logger.info(f"Creating tables with DynamoDB endpoint: {endpoint_url}") """ @@ -498,9 +498,9 @@ def main(): # Parse arguments parser = argparse.ArgumentParser(description='Create DynamoDB tables for Delphi system') parser.add_argument('--endpoint-url', type=str, default=None, - help='DynamoDB endpoint URL (default: use DYNAMODB_ENDPOINT env var or http://localhost:8000)') - parser.add_argument('--region', type=str, default='us-west-2', - help='AWS region (default: us-west-2)') + help='DynamoDB endpoint URL (default: use DYNAMODB_ENDPOINT env var)') + parser.add_argument('--region', type=str, default='us-east-1', + help='AWS region (default: us-east-1)') parser.add_argument('--delete-existing', action='store_true', help='Delete existing tables before creating new ones') parser.add_argument('--evoc-only', action='store_true', diff --git a/delphi/docker-compose.yml.bak b/delphi/docker-compose.yml.bak index ac6512efda..e04b62fb47 100644 --- a/delphi/docker-compose.yml.bak +++ b/delphi/docker-compose.yml.bak @@ -56,7 +56,7 @@ services: - DYNAMODB_ENDPOINT=http://host.docker.internal:8000 - AWS_ACCESS_KEY_ID=dummy - AWS_SECRET_ACCESS_KEY=dummy - - AWS_REGION=us-west-2 + - AWS_REGION=us-east-1 # Ollama settings - configurable via environment variables - OLLAMA_MODEL=${OLLAMA_MODEL:-llama3.1:8b} - OLLAMA_HOST=http://host.docker.internal:11434 # Connect to Ollama on host network diff --git a/delphi/polismath/database/dynamodb.py b/delphi/polismath/database/dynamodb.py index 86cb093b41..a52480aedd 100644 --- a/delphi/polismath/database/dynamodb.py +++ b/delphi/polismath/database/dynamodb.py @@ -22,8 +22,8 @@ class DynamoDBClient: """Client for interacting with DynamoDB for Polis math data.""" def __init__(self, - endpoint_url: Optional[str] = 'http://localhost:8000', - region_name: str = 'us-west-2', + endpoint_url: Optional[str] = None, + region_name: str = 'us-east-1', aws_access_key_id: Optional[str] = None, aws_secret_access_key: Optional[str] = None): """ diff --git a/delphi/polismath/database/postgres.py b/delphi/polismath/database/postgres.py index f43c5ad2ed..abe6843ead 100644 --- a/delphi/polismath/database/postgres.py +++ b/delphi/polismath/database/postgres.py @@ -132,11 +132,9 @@ def get_uri(self) -> str: password_str = f":{self.password}" if self.password else "" # Build URI - uri = f"postgresql://{self.user}{password_str}@{self.host}:{self.port}/{self.database}" + uri = f"postgresql://{self.user}{password_str}@{self.host}:{self.port}/{self.database}?sslmode=require" - # Add SSL mode if needed - if self.ssl_mode and self.ssl_mode != "prefer": - uri += f"?sslmode={self.ssl_mode}" + # todo - remove ssl mode if local or dev return uri diff --git a/delphi/polismath/run_math_pipeline.py b/delphi/polismath/run_math_pipeline.py index 584d0f6b8f..13e411dbae 100644 --- a/delphi/polismath/run_math_pipeline.py +++ b/delphi/polismath/run_math_pipeline.py @@ -330,8 +330,8 @@ def main(): logger.info(f"[{time.time() - start_time:.2f}s] Initializing DynamoDB client...") from polismath.database.dynamodb import DynamoDBClient # Use environment variables or sensible defaults for local/test - endpoint_url = os.environ.get('DYNAMODB_ENDPOINT', 'http://localhost:8000') - region_name = os.environ.get('AWS_REGION', 'us-west-2') + endpoint_url = os.environ.get('DYNAMODB_ENDPOINT') + region_name = os.environ.get('AWS_REGION', 'us-east-1') aws_access_key_id = os.environ.get('AWS_ACCESS_KEY_ID', 'dummy') aws_secret_access_key = os.environ.get('AWS_SECRET_ACCESS_KEY', 'dummy') dynamodb_client = DynamoDBClient( diff --git a/delphi/requirements.txt b/delphi/requirements.txt index d277ad770d..0c299b3ee7 100644 --- a/delphi/requirements.txt +++ b/delphi/requirements.txt @@ -18,7 +18,7 @@ colorlog>=6.9.0 umap-learn>=0.5.2 sentence-transformers>=2.2.0 hdbscan>=0.8.40 -torch>=1.11.0 +# torch>=1.11.0 numba>=0.56.4 llvmlite>=0.39.0 diff --git a/delphi/run_delphi.sh b/delphi/run_delphi.sh index 672b43cf3b..e2a374f9f1 100755 --- a/delphi/run_delphi.sh +++ b/delphi/run_delphi.sh @@ -71,9 +71,9 @@ echo -e "${YELLOW}Using Ollama model: $MODEL${NC}" # Set up environment for the pipeline export PYTHONPATH="/app:$PYTHONPATH" -export OLLAMA_HOST=${OLLAMA_HOST:-http://ollama:11434} +export OLLAMA_HOST=${OLLAMA_HOST} export OLLAMA_MODEL=$MODEL -export DYNAMODB_ENDPOINT=${DYNAMODB_ENDPOINT:-http://dynamodb:8000} +export DYNAMODB_ENDPOINT=${DYNAMODB_ENDPOINT} # For testing with limited votes if [ -n "$MAX_VOTES" ]; then diff --git a/delphi/scripts/delphi_cli.py b/delphi/scripts/delphi_cli.py index f3b5ecdd32..4e2d629b99 100755 --- a/delphi/scripts/delphi_cli.py +++ b/delphi/scripts/delphi_cli.py @@ -50,16 +50,18 @@ def create_elegant_header(): console.print(header) console.print() -def setup_dynamodb(endpoint_url=None, region='us-west-2'): - """Set up DynamoDB connection.""" - # Use environment variable if endpoint not provided +def setup_dynamodb(endpoint_url=None, region='us-east-1'): if endpoint_url is None: - endpoint_url = os.environ.get('DYNAMODB_ENDPOINT', 'http://localhost:8000') + endpoint_url = os.environ.get('DYNAMODB_ENDPOINT') - # For local development - if 'localhost' in endpoint_url or 'host.docker.internal' in endpoint_url: - os.environ.setdefault('AWS_ACCESS_KEY_ID', 'fakeMyKeyId') - os.environ.setdefault('AWS_SECRET_ACCESS_KEY', 'fakeSecretAccessKey') + if endpoint_url == "": + endpoint_url = None + + if endpoint_url: + local_patterns = ['localhost', 'host.docker.internal', 'dynamodb:'] + if any(pattern in endpoint_url for pattern in local_patterns): + os.environ.setdefault('AWS_ACCESS_KEY_ID', 'fakeMyKeyId') + os.environ.setdefault('AWS_SECRET_ACCESS_KEY', 'fakeSecretAccessKey') return boto3.resource('dynamodb', endpoint_url=endpoint_url, region_name=region) @@ -776,7 +778,7 @@ def main(): # Common options parser.add_argument("--endpoint-url", help="DynamoDB endpoint URL") - parser.add_argument("--region", default="us-west-2", help="AWS region") + parser.add_argument("--region", default="us-east-1", help="AWS region") # Interactive mode is the default when no arguments are provided parser.add_argument("--interactive", action="store_true", diff --git a/delphi/scripts/job_poller.py b/delphi/scripts/job_poller.py index 37d875fc57..278298dc84 100755 --- a/delphi/scripts/job_poller.py +++ b/delphi/scripts/job_poller.py @@ -9,8 +9,8 @@ python job_poller.py [options] Options: - --endpoint-url=URL DynamoDB endpoint URL (default: http://localhost:8000) - --region=REGION AWS region (default: us-west-2) + --endpoint-url=URL DynamoDB endpoint URL + --region=REGION AWS region (default: us-east-1) --interval=SECONDS Polling interval in seconds (default: 10) --max-workers=N Maximum number of concurrent workers (default: 1) --log-level=LEVEL Logging level (default: INFO) @@ -41,9 +41,9 @@ class JobProcessor: """Process jobs from the Delphi_JobQueue.""" - def __init__(self, endpoint_url=None, region='us-west-2'): + def __init__(self, endpoint_url=None, region='us-east-1'): """Initialize the job processor.""" - self.endpoint_url = endpoint_url or os.environ.get('DYNAMODB_ENDPOINT', 'http://localhost:8000') + self.endpoint_url = endpoint_url or os.environ.get('DYNAMODB_ENDPOINT') self.region = region self.worker_id = str(uuid.uuid4()) @@ -52,6 +52,10 @@ def __init__(self, endpoint_url=None, region='us-west-2'): # For local development os.environ.setdefault('AWS_ACCESS_KEY_ID', 'fakeMyKeyId') os.environ.setdefault('AWS_SECRET_ACCESS_KEY', 'fakeSecretAccessKey') + + if self.endpoint_url == "": + logger.info("DynamoDB: DYNAMODB_ENDPOINT was an empty string, treating as None for AWS default endpoint.") + self.endpoint_url = None logger.info(f"Connecting to DynamoDB at {self.endpoint_url}") self.dynamodb = boto3.resource('dynamodb', @@ -689,8 +693,8 @@ def process_job(self, job): 'visualization_path': f'visualizations/{report_id}/{job_id}', 'report_id': report_id, 'visualization_urls': { - 'interactive': f"{os.environ.get('AWS_S3_ENDPOINT', '')}/{os.environ.get('AWS_S3_BUCKET_NAME', 'delphi')}/visualizations/{report_id}/{job_id}/layer_0_datamapplot.html" - }, + 'interactive': f"{os.environ.get('AWS_S3_ENDPOINT', '')}/{os.environ.get('AWS_S3_BUCKET_NAME', 'polis-delphi')}/visualizations/{report_id}/{job_id}/layer_0_datamapplot.html" + } 'execution_finished_at': datetime.now().isoformat() } @@ -809,7 +813,7 @@ def main(): parser = argparse.ArgumentParser(description='Delphi Job Poller Service') parser.add_argument('--endpoint-url', type=str, default=None, help='DynamoDB endpoint URL') - parser.add_argument('--region', type=str, default='us-west-2', + parser.add_argument('--region', type=str, default='us-east-1', help='AWS region') parser.add_argument('--interval', type=int, default=10, help='Polling interval in seconds') @@ -829,7 +833,7 @@ def main(): signal.signal(signal.SIGTERM, signal_handler) logger.info("Starting Delphi Job Poller Service") - logger.info(f"Endpoint URL: {args.endpoint_url or os.environ.get('DYNAMODB_ENDPOINT', 'http://localhost:8000')}") + logger.info(f"Endpoint URL: {args.endpoint_url or os.environ.get('DYNAMODB_ENDPOINT')}") logger.info(f"Region: {args.region}") logger.info(f"Polling interval: {args.interval} seconds") logger.info(f"Maximum workers: {args.max_workers}") diff --git a/delphi/start_poller.sh b/delphi/start_poller.sh index 47e9aa86c4..d31e56fb3b 100755 --- a/delphi/start_poller.sh +++ b/delphi/start_poller.sh @@ -8,7 +8,7 @@ SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" POLLER_SCRIPT="$SCRIPT_DIR/scripts/job_poller.py" # Default options -ENDPOINT_URL="${DYNAMODB_ENDPOINT:-http://localhost:8000}" +ENDPOINT_URL="${DYNAMODB_ENDPOINT}" POLL_INTERVAL="${POLL_INTERVAL:-10}" LOG_LEVEL="${LOG_LEVEL:-INFO}" MAX_WORKERS="${MAX_WORKERS:-1}" diff --git a/delphi/tests/test_postgres_real_data.py b/delphi/tests/test_postgres_real_data.py index 73515e9f78..ba573b5646 100644 --- a/delphi/tests/test_postgres_real_data.py +++ b/delphi/tests/test_postgres_real_data.py @@ -40,7 +40,7 @@ def init_dynamodb(): # Create and initialize the client client = DynamoDBClient( endpoint_url='http://localhost:8000', - region_name='us-west-2', + region_name='us-east-1', aws_access_key_id='dummy', aws_secret_access_key='dummy' ) diff --git a/delphi/umap_narrative/500_generate_embedding_umap_cluster.py b/delphi/umap_narrative/500_generate_embedding_umap_cluster.py index a3b67857f9..0127494099 100755 --- a/delphi/umap_narrative/500_generate_embedding_umap_cluster.py +++ b/delphi/umap_narrative/500_generate_embedding_umap_cluster.py @@ -68,11 +68,6 @@ def setup_environment(db_host=None, db_port=None, db_name=None, db_user=None, db logger.info(f"- DATABASE: {os.environ.get('DATABASE_NAME')}") logger.info(f"- USER: {os.environ.get('DATABASE_USER')}") - # DynamoDB settings (for local DynamoDB) - # Only set if not already in environment - if not os.environ.get('DYNAMODB_ENDPOINT'): - os.environ['DYNAMODB_ENDPOINT'] = 'http://localhost:8000' - # Log the endpoint being used endpoint = os.environ.get('DYNAMODB_ENDPOINT') logger.info(f"Using DynamoDB endpoint: {endpoint}") @@ -83,7 +78,7 @@ def setup_environment(db_host=None, db_port=None, db_name=None, db_user=None, db if not os.environ.get('AWS_SECRET_ACCESS_KEY'): os.environ['AWS_SECRET_ACCESS_KEY'] = 'fakeSecretAccessKey' if not os.environ.get('AWS_REGION') and not os.environ.get('AWS_DEFAULT_REGION'): - os.environ['AWS_DEFAULT_REGION'] = 'us-west-2' + os.environ['AWS_DEFAULT_REGION'] = 'us-east-1' def fetch_conversation_data(zid): """ @@ -378,7 +373,7 @@ def process_conversation(zid, export_dynamo=True): dynamo_storage = None if export_dynamo: dynamo_storage = DynamoDBStorage( - region_name='us-west-2', + region_name='us-east-1', endpoint_url=os.environ.get('DYNAMODB_ENDPOINT') ) @@ -518,7 +513,7 @@ def main(): # Process with mock data (store in DynamoDB if requested) if not args.no_dynamo: dynamo_storage = DynamoDBStorage( - region_name='us-west-2', + region_name='us-east-1', endpoint_url=os.environ.get('DYNAMODB_ENDPOINT') ) diff --git a/delphi/umap_narrative/600_generate_llm_topic_names.py b/delphi/umap_narrative/600_generate_llm_topic_names.py index d53f5f5064..927d88fbeb 100755 --- a/delphi/umap_narrative/600_generate_llm_topic_names.py +++ b/delphi/umap_narrative/600_generate_llm_topic_names.py @@ -47,10 +47,6 @@ def setup_environment(dynamo_endpoint=None): if dynamo_endpoint: os.environ['DYNAMODB_ENDPOINT'] = dynamo_endpoint elif not os.environ.get('DYNAMODB_ENDPOINT'): - # Only set if not already in environment - if not os.environ.get('DYNAMODB_ENDPOINT'): - os.environ['DYNAMODB_ENDPOINT'] = 'http://localhost:8000' - # Log the endpoint being used endpoint = os.environ.get('DYNAMODB_ENDPOINT') logger.info(f"Using DynamoDB endpoint: {endpoint}") @@ -63,7 +59,7 @@ def setup_environment(dynamo_endpoint=None): os.environ['AWS_SECRET_ACCESS_KEY'] = 'fakeSecretAccessKey' if not os.environ.get('AWS_DEFAULT_REGION'): - os.environ['AWS_DEFAULT_REGION'] = 'us-west-2' + os.environ['AWS_DEFAULT_REGION'] = 'us-east-1' logger.info(f"DynamoDB endpoint: {os.environ.get('DYNAMODB_ENDPOINT')}") logger.info(f"AWS region: {os.environ.get('AWS_DEFAULT_REGION')}") @@ -816,7 +812,7 @@ def main(): parser.add_argument('--output_dir', type=str, default="polis_data", help='Base directory for output files (default: polis_data)') parser.add_argument('--dynamo_endpoint', type=str, default=None, - help='DynamoDB endpoint URL (default: http://localhost:8000)') + help='DynamoDB endpoint URL') parser.add_argument('--start_cluster', type=int, default=None, help='Starting cluster ID for processing a range (inclusive)') parser.add_argument('--end_cluster', type=int, default=None, diff --git a/delphi/umap_narrative/700_datamapplot_for_layer.py b/delphi/umap_narrative/700_datamapplot_for_layer.py index ce6eaa10af..a3fbe30515 100644 --- a/delphi/umap_narrative/700_datamapplot_for_layer.py +++ b/delphi/umap_narrative/700_datamapplot_for_layer.py @@ -41,22 +41,27 @@ def s3_upload_file(local_file_path, s3_key): secret_key = os.environ.get('AWS_S3_SECRET_ACCESS_KEY') bucket_name = os.environ.get('AWS_S3_BUCKET_NAME') region = os.environ.get('AWS_REGION') - - if not all([endpoint_url, access_key, secret_key, bucket_name]): - logger.error("Missing S3 configuration. Cannot upload file.") - return False + + if endpoint_url == "": + endpoint_url = None + + if access_key == "": + access_key = None + + if secret_key == "": + secret_key = None try: # Create S3 client s3_client = boto3.client( 's3', - endpoint_url=endpoint_url, - aws_access_key_id=access_key, - aws_secret_access_key=secret_key, + # endpoint_url=endpoint_url, + # aws_access_key_id=access_key, + # aws_secret_access_key=secret_key, region_name=region, # For MinIO/local development, these settings help - config=boto3.session.Config(signature_version='s3v4'), - verify=False + # config=boto3.session.Config(signature_version='s3v4'), + # verify=False ) # Check if bucket exists, create if it doesn't @@ -68,12 +73,13 @@ def s3_upload_file(local_file_path, s3_key): try: # Create the bucket - for MinIO local we don't need LocationConstraint - if region == 'us-east-1' or 'localhost' in endpoint_url or 'minio' in endpoint_url: - s3_client.create_bucket(Bucket=bucket_name) + if endpoint_url: + if region == 'us-east-1' or 'localhost' in endpoint_url or 'minio' in endpoint_url: + s3_client.create_bucket(Bucket=bucket_name) else: s3_client.create_bucket( Bucket=bucket_name, - CreateBucketConfiguration={'LocationConstraint': region} + # CreateBucketConfiguration={'LocationConstraint': region} - not in us-east-1 - but in other regions ) # Apply bucket policy to make objects public-read @@ -109,7 +115,7 @@ def s3_upload_file(local_file_path, s3_key): # For HTML files, set content type correctly extra_args = { - 'ACL': 'public-read' # Make object publicly readable + # 'ACL': 'public-read' # Make object publicly readable - we don't want this, hence why we have signed urls } # Set the correct content type based on file extension @@ -126,25 +132,30 @@ def s3_upload_file(local_file_path, s3_key): s3_key, ExtraArgs=extra_args ) + + if endpoint_url: - # Generate a URL for the uploaded file - if endpoint_url.startswith('http://localhost') or endpoint_url.startswith('http://127.0.0.1'): - # For local development with MinIO - url = f"{endpoint_url}/{bucket_name}/{s3_key}" - # Clean up URL if needed - url = url.replace('///', '//') - elif 'minio' in endpoint_url: - # For Docker container access to MinIO - url = f"{endpoint_url}/{bucket_name}/{s3_key}" - url = url.replace('///', '//') - else: - # For AWS S3 - if endpoint_url.startswith('https://s3.'): - # Standard AWS S3 endpoint - url = f"https://{bucket_name}.s3.amazonaws.com/{s3_key}" - else: - # Custom S3 endpoint + # Generate a URL for the uploaded file + if endpoint_url.startswith('http://localhost') or endpoint_url.startswith('http://127.0.0.1'): + # For local development with MinIO + url = f"{endpoint_url}/{bucket_name}/{s3_key}" + # Clean up URL if needed + url = url.replace('///', '//') + elif 'minio' in endpoint_url: + # For Docker container access to MinIO url = f"{endpoint_url}/{bucket_name}/{s3_key}" + url = url.replace('///', '//') + else: + # For AWS S3 + if endpoint_url.startswith('https://s3.'): + # Standard AWS S3 endpoint + url = f"https://{bucket_name}.s3.amazonaws.com/{s3_key}" + else: + # Custom S3 endpoint + url = f"{endpoint_url}/{bucket_name}/{s3_key}" + else: + # Custom S3 endpoint + url = f"{bucket_name}/{s3_key}" logger.info(f"File uploaded successfully to {url}") return url @@ -240,29 +251,16 @@ def setup_environment(db_host=None, db_port=None, db_name=None, db_user=None, db # DynamoDB settings (for local DynamoDB) if not os.environ.get('DYNAMODB_ENDPOINT'): - # Only set if not already in environment - if not os.environ.get('DYNAMODB_ENDPOINT'): - os.environ['DYNAMODB_ENDPOINT'] = 'http://localhost:8000' # Log the endpoint being used endpoint = os.environ.get('DYNAMODB_ENDPOINT') logger.info(f"Using DynamoDB endpoint: {endpoint}") - if not os.environ.get('AWS_ACCESS_KEY_ID'): - os.environ['AWS_ACCESS_KEY_ID'] = 'fakeMyKeyId' - if not os.environ.get('AWS_SECRET_ACCESS_KEY'): - os.environ['AWS_SECRET_ACCESS_KEY'] = 'fakeSecretAccessKey' if not os.environ.get('AWS_DEFAULT_REGION'): - os.environ['AWS_DEFAULT_REGION'] = 'us-west-2' + os.environ['AWS_DEFAULT_REGION'] = 'us-east-1' # S3 settings - if not os.environ.get('AWS_S3_ENDPOINT'): - os.environ['AWS_S3_ENDPOINT'] = 'http://localhost:9000' - if not os.environ.get('AWS_S3_ACCESS_KEY_ID'): - os.environ['AWS_S3_ACCESS_KEY_ID'] = 'minioadmin' - if not os.environ.get('AWS_S3_SECRET_ACCESS_KEY'): - os.environ['AWS_S3_SECRET_ACCESS_KEY'] = 'minioadmin' if not os.environ.get('AWS_S3_BUCKET_NAME'): - os.environ['AWS_S3_BUCKET_NAME'] = 'delphi' + os.environ['AWS_S3_BUCKET_NAME'] = 'polis-delphi' logger.info(f"S3 Storage settings:") logger.info(f"- Endpoint: {os.environ.get('AWS_S3_ENDPOINT')}") @@ -939,7 +937,7 @@ def main(): parser.add_argument('--output_dir', type=str, default=None, help='Directory to save the visualization') parser.add_argument('--dynamo_endpoint', type=str, default=None, - help='DynamoDB endpoint URL (default: http://localhost:8000)') + help='DynamoDB endpoint URL') args = parser.parse_args() diff --git a/delphi/umap_narrative/701_static_datamapplot_for_layer.py b/delphi/umap_narrative/701_static_datamapplot_for_layer.py index ad5a1b85a5..50279ecc9d 100755 --- a/delphi/umap_narrative/701_static_datamapplot_for_layer.py +++ b/delphi/umap_narrative/701_static_datamapplot_for_layer.py @@ -35,7 +35,7 @@ class DynamoDBStorage: def __init__(self, endpoint_url=None): self.endpoint_url = endpoint_url or os.environ.get("DYNAMODB_ENDPOINT", "http://dynamodb-local:8000") - self.region = os.environ.get("AWS_REGION", "us-west-2") + self.region = os.environ.get("AWS_REGION", "us-east-1") self.dynamodb = boto3.resource('dynamodb', endpoint_url=self.endpoint_url, region_name=self.region) # Define table names using the new Delphi_ naming scheme @@ -275,22 +275,27 @@ def s3_upload_file(local_file_path, s3_key): secret_key = os.environ.get('AWS_S3_SECRET_ACCESS_KEY') bucket_name = os.environ.get('AWS_S3_BUCKET_NAME') region = os.environ.get('AWS_REGION') - - if not all([endpoint_url, access_key, secret_key, bucket_name]): - logger.error("Missing S3 configuration. Cannot upload file.") - return False + + if endpoint_url == "": + endpoint_url = None + + if access_key == "": + access_key = None + + if secret_key == "": + secret_key = None try: # Create S3 client s3_client = boto3.client( 's3', - endpoint_url=endpoint_url, - aws_access_key_id=access_key, - aws_secret_access_key=secret_key, + # endpoint_url=endpoint_url, + # aws_access_key_id=access_key, + # aws_secret_access_key=secret_key, region_name=region, # For MinIO/local development, these settings help - config=boto3.session.Config(signature_version='s3v4'), - verify=False + # config=boto3.session.Config(signature_version='s3v4'), + # verify=False ) # Check if bucket exists, create if it doesn't @@ -302,12 +307,13 @@ def s3_upload_file(local_file_path, s3_key): try: # Create the bucket - for MinIO local we don't need LocationConstraint - if region == 'us-east-1' or 'localhost' in endpoint_url or 'minio' in endpoint_url: - s3_client.create_bucket(Bucket=bucket_name) + if endpoint_url: + if region == 'us-east-1' or 'localhost' in endpoint_url or 'minio' in endpoint_url: + s3_client.create_bucket(Bucket=bucket_name) else: s3_client.create_bucket( Bucket=bucket_name, - CreateBucketConfiguration={'LocationConstraint': region} + # CreateBucketConfiguration={'LocationConstraint': region} - not in us-east-1 - but in other regions ) # Apply bucket policy to make objects public-read @@ -343,7 +349,7 @@ def s3_upload_file(local_file_path, s3_key): # For HTML files, set content type correctly extra_args = { - 'ACL': 'public-read' # Make object publicly readable + # 'ACL': 'public-read' # Make object publicly readable - probably don't want this } # Set the correct content type based on file extension @@ -361,24 +367,29 @@ def s3_upload_file(local_file_path, s3_key): ExtraArgs=extra_args ) - # Generate a URL for the uploaded file - if endpoint_url.startswith('http://localhost') or endpoint_url.startswith('http://127.0.0.1'): - # For local development with MinIO - url = f"{endpoint_url}/{bucket_name}/{s3_key}" - # Clean up URL if needed - url = url.replace('///', '//') - elif 'minio' in endpoint_url: - # For Docker container access to MinIO - url = f"{endpoint_url}/{bucket_name}/{s3_key}" - url = url.replace('///', '//') - else: - # For AWS S3 - if endpoint_url.startswith('https://s3.'): - # Standard AWS S3 endpoint - url = f"https://{bucket_name}.s3.amazonaws.com/{s3_key}" - else: - # Custom S3 endpoint + if endpoint_url: + + # Generate a URL for the uploaded file + if endpoint_url.startswith('http://localhost') or endpoint_url.startswith('http://127.0.0.1'): + # For local development with MinIO + url = f"{endpoint_url}/{bucket_name}/{s3_key}" + # Clean up URL if needed + url = url.replace('///', '//') + elif 'minio' in endpoint_url: + # For Docker container access to MinIO url = f"{endpoint_url}/{bucket_name}/{s3_key}" + url = url.replace('///', '//') + else: + # For AWS S3 + if endpoint_url.startswith('https://s3.'): + # Standard AWS S3 endpoint + url = f"https://{bucket_name}.s3.amazonaws.com/{s3_key}" + else: + # Custom S3 endpoint + url = f"{endpoint_url}/{bucket_name}/{s3_key}" + else: + # Custom S3 endpoint + url = f"{bucket_name}/{s3_key}" logger.info(f"File uploaded successfully to {url}") return url @@ -414,16 +425,8 @@ def generate_static_datamapplot(zid, layer_num=0, output_dir=None): os.makedirs(container_dir, exist_ok=True) if os.path.exists("/visualizations"): os.makedirs(host_dir, exist_ok=True) - - # Make sure S3 environment variables are set - if not os.environ.get('AWS_S3_ENDPOINT'): - os.environ['AWS_S3_ENDPOINT'] = 'http://localhost:9000' - if not os.environ.get('AWS_S3_ACCESS_KEY_ID'): - os.environ['AWS_S3_ACCESS_KEY_ID'] = 'minioadmin' - if not os.environ.get('AWS_S3_SECRET_ACCESS_KEY'): - os.environ['AWS_S3_SECRET_ACCESS_KEY'] = 'minioadmin' if not os.environ.get('AWS_S3_BUCKET_NAME'): - os.environ['AWS_S3_BUCKET_NAME'] = 'delphi' + os.environ['AWS_S3_BUCKET_NAME'] = 'polis-delphi' if not os.environ.get('AWS_REGION'): os.environ['AWS_REGION'] = 'us-east-1' diff --git a/delphi/umap_narrative/702_CONSENSUS_DIVISIVE_README.md b/delphi/umap_narrative/702_CONSENSUS_DIVISIVE_README.md index 2b00c01abb..eaf94b8b31 100644 --- a/delphi/umap_narrative/702_CONSENSUS_DIVISIVE_README.md +++ b/delphi/umap_narrative/702_CONSENSUS_DIVISIVE_README.md @@ -44,7 +44,7 @@ export DATABASE_SSL_MODE="disable" # DynamoDB configuration export DYNAMODB_ENDPOINT="http://localhost:8000" -export AWS_REGION="us-west-2" +export AWS_REGION="us-east-1" export AWS_ACCESS_KEY_ID="fakeMyKeyId" export AWS_SECRET_ACCESS_KEY="fakeSecretAccessKey" diff --git a/delphi/umap_narrative/702_consensus_divisive_datamapplot.py b/delphi/umap_narrative/702_consensus_divisive_datamapplot.py index 38df3e2410..5d3b69bc01 100755 --- a/delphi/umap_narrative/702_consensus_divisive_datamapplot.py +++ b/delphi/umap_narrative/702_consensus_divisive_datamapplot.py @@ -34,10 +34,10 @@ } DYNAMODB_CONFIG = { - 'endpoint_url': os.environ.get('DYNAMODB_ENDPOINT', 'http://localhost:8000'), - 'region': os.environ.get('AWS_REGION', 'us-west-2'), - 'access_key': os.environ.get('AWS_ACCESS_KEY_ID', 'fakeMyKeyId'), - 'secret_key': os.environ.get('AWS_SECRET_ACCESS_KEY', 'fakeSecretAccessKey') + 'endpoint_url': os.environ.get('DYNAMODB_ENDPOINT'), + 'region': os.environ.get('AWS_REGION', 'us-east-1'), + 'access_key': os.environ.get('AWS_ACCESS_KEY_ID', None), + 'secret_key': os.environ.get('AWS_SECRET_ACCESS_KEY', None) } # Visualization settings - controls the extremity scale and color mapping @@ -62,7 +62,10 @@ # Define minimal versions of the required classes if imports fail class DynamoDBStorage: def __init__(self, endpoint_url=None): - self.endpoint_url = endpoint_url or DYNAMODB_CONFIG['endpoint_url'] + if endpoint_url: # Checks if endpoint_url is a truthy value (not None, not empty string) + self.endpoint_url = endpoint_url + else: + self.endpoint_url = None self.region = DYNAMODB_CONFIG['region'] self.dynamodb = boto3.resource('dynamodb', endpoint_url=self.endpoint_url, @@ -103,10 +106,10 @@ def load_data_from_dynamodb(zid, layer_num=0): logger.info(f'Loading UMAP positions and cluster data for conversation {zid}, layer {layer_num}') # Set up DynamoDB client - endpoint_url = os.environ.get('DYNAMODB_ENDPOINT', 'http://dynamodb-local:8000') + endpoint_url = os.environ.get('DYNAMODB_ENDPOINT') dynamodb = boto3.resource('dynamodb', endpoint_url=endpoint_url, - region_name=os.environ.get('AWS_REGION', 'us-west-2'), + region_name=os.environ.get('AWS_REGION', 'us-east-1'), aws_access_key_id=os.environ.get('AWS_ACCESS_KEY_ID', 'fakeMyKeyId'), aws_secret_access_key=os.environ.get('AWS_SECRET_ACCESS_KEY', 'fakeSecretAccessKey')) diff --git a/delphi/umap_narrative/800_report_topic_clusters.py b/delphi/umap_narrative/800_report_topic_clusters.py index e360d13422..321afe9164 100755 --- a/delphi/umap_narrative/800_report_topic_clusters.py +++ b/delphi/umap_narrative/800_report_topic_clusters.py @@ -66,7 +66,7 @@ def __init__(self, table_name="Delphi_NarrativeReports", disable_cache=False): self.dynamodb = boto3.resource( 'dynamodb', endpoint_url=os.environ.get('DYNAMODB_ENDPOINT'), - region_name=os.environ.get('AWS_DEFAULT_REGION', 'us-west-2'), + region_name=os.environ.get('AWS_DEFAULT_REGION', 'us-east-1'), aws_access_key_id=os.environ.get('AWS_ACCESS_KEY_ID', 'fakeMyKeyId'), aws_secret_access_key=os.environ.get('AWS_SECRET_ACCESS_KEY', 'fakeSecretAccessKey') ) @@ -654,7 +654,7 @@ def load_comment_clusters_from_dynamodb(self, conversation_id): dynamodb = boto3.resource( 'dynamodb', endpoint_url=os.environ.get('DYNAMODB_ENDPOINT', 'http://host.docker.internal:8000'), - region_name=os.environ.get('AWS_REGION', 'us-west-2'), + region_name=os.environ.get('AWS_REGION', 'us-east-1'), aws_access_key_id=os.environ.get('AWS_ACCESS_KEY_ID', 'fakeMyKeyId'), aws_secret_access_key=os.environ.get('AWS_SECRET_ACCESS_KEY', 'fakeSecretAccessKey') ) diff --git a/delphi/umap_narrative/polismath_commentgraph/DEPLOYMENT.md b/delphi/umap_narrative/polismath_commentgraph/DEPLOYMENT.md index 0a685a93a8..a6af337caf 100644 --- a/delphi/umap_narrative/polismath_commentgraph/DEPLOYMENT.md +++ b/delphi/umap_narrative/polismath_commentgraph/DEPLOYMENT.md @@ -101,13 +101,13 @@ docker build -t polis-comment-graph-lambda . ```bash # Get the ECR login -aws ecr get-login-password --region us-west-2 | docker login --username AWS --password-stdin 123456789012.dkr.ecr.us-west-2.amazonaws.com +aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin 123456789012.dkr.ecr.us-east-1.amazonaws.com # Tag the image -docker tag polis-comment-graph-lambda:latest 123456789012.dkr.ecr.us-west-2.amazonaws.com/polis-comment-graph-lambda:latest +docker tag polis-comment-graph-lambda:latest 123456789012.dkr.ecr.us-east-1.amazonaws.com/polis-comment-graph-lambda:latest # Push the image -docker push 123456789012.dkr.ecr.us-west-2.amazonaws.com/polis-comment-graph-lambda:latest +docker push 123456789012.dkr.ecr.us-east-1.amazonaws.com/polis-comment-graph-lambda:latest ``` ## Creating the Lambda Function @@ -118,9 +118,9 @@ docker push 123456789012.dkr.ecr.us-west-2.amazonaws.com/polis-comment-graph-lam aws lambda create-function \ --function-name polis-comment-graph-lambda \ --package-type Image \ - --code ImageUri=123456789012.dkr.ecr.us-west-2.amazonaws.com/polis-comment-graph-lambda:latest \ + --code ImageUri=123456789012.dkr.ecr.us-east-1.amazonaws.com/polis-comment-graph-lambda:latest \ --role arn:aws:iam::123456789012:role/lambda-execution-role \ - --environment "Variables={DATABASE_HOST=polis-db.cluster-xyz.us-west-2.rds.amazonaws.com,DATABASE_NAME=polis,DATABASE_USER=polis}" \ + --environment "Variables={DATABASE_HOST=polis-db.cluster-xyz.us-east-1.rds.amazonaws.com,DATABASE_NAME=polis,DATABASE_USER=polis}" \ --timeout 300 \ --memory-size 1024 ``` @@ -141,9 +141,9 @@ aws sns create-topic --name polis-new-comment-topic # Create a subscription for the Lambda function aws sns subscribe \ - --topic-arn arn:aws:sns:us-west-2:123456789012:polis-new-comment-topic \ + --topic-arn arn:aws:sns:us-east-1:123456789012:polis-new-comment-topic \ --protocol lambda \ - --notification-endpoint arn:aws:lambda:us-west-2:123456789012:function:polis-comment-graph-lambda + --notification-endpoint arn:aws:lambda:us-east-1:123456789012:function:polis-comment-graph-lambda # Grant permission for SNS to invoke the Lambda aws lambda add-permission \ @@ -151,7 +151,7 @@ aws lambda add-permission \ --statement-id sns-new-comment \ --action lambda:InvokeFunction \ --principal sns.amazonaws.com \ - --source-arn arn:aws:sns:us-west-2:123456789012:polis-new-comment-topic + --source-arn arn:aws:sns:us-east-1:123456789012:polis-new-comment-topic ``` ### 2. CloudWatch Scheduled Event for Batch Processing @@ -165,7 +165,7 @@ aws events put-rule \ # Add the Lambda function as a target aws events put-targets \ --rule polis-daily-processing \ - --targets "Id"="1","Arn"="arn:aws:lambda:us-west-2:123456789012:function:polis-comment-graph-lambda","Input"="{\"event_type\":\"process_conversation\",\"conversation_id\":\"all\"}" + --targets "Id"="1","Arn"="arn:aws:lambda:us-east-1:123456789012:function:polis-comment-graph-lambda","Input"="{\"event_type\":\"process_conversation\",\"conversation_id\":\"all\"}" # Grant permission for CloudWatch Events to invoke the Lambda aws lambda add-permission \ @@ -173,7 +173,7 @@ aws lambda add-permission \ --statement-id cloudwatch-daily \ --action lambda:InvokeFunction \ --principal events.amazonaws.com \ - --source-arn arn:aws:events:us-west-2:123456789012:rule/polis-daily-processing + --source-arn arn:aws:events:us-east-1:123456789012:rule/polis-daily-processing ``` ## Testing the Deployment @@ -207,7 +207,7 @@ When you need to update the Lambda function: ```bash aws lambda update-function-code \ --function-name polis-comment-graph-lambda \ - --image-uri 123456789012.dkr.ecr.us-west-2.amazonaws.com/polis-comment-graph-lambda:latest + --image-uri 123456789012.dkr.ecr.us-east-1.amazonaws.com/polis-comment-graph-lambda:latest ``` ## Technical Notes diff --git a/delphi/umap_narrative/polismath_commentgraph/README.md b/delphi/umap_narrative/polismath_commentgraph/README.md index 8e301a78e8..1f2a14a50a 100644 --- a/delphi/umap_narrative/polismath_commentgraph/README.md +++ b/delphi/umap_narrative/polismath_commentgraph/README.md @@ -96,9 +96,9 @@ The service follows a serverless architecture: 2. Push to ECR: ```bash - aws ecr get-login-password --region us-west-2 | docker login --username AWS --password-stdin 123456789012.dkr.ecr.us-west-2.amazonaws.com - docker tag polis-comment-graph-lambda:latest 123456789012.dkr.ecr.us-west-2.amazonaws.com/polis-comment-graph-lambda:latest - docker push 123456789012.dkr.ecr.us-west-2.amazonaws.com/polis-comment-graph-lambda:latest + aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin 123456789012.dkr.ecr.us-east-1.amazonaws.com + docker tag polis-comment-graph-lambda:latest 123456789012.dkr.ecr.us-east-1.amazonaws.com/polis-comment-graph-lambda:latest + docker push 123456789012.dkr.ecr.us-east-1.amazonaws.com/polis-comment-graph-lambda:latest ``` 3. Create Lambda function using the AWS CLI: @@ -106,9 +106,9 @@ The service follows a serverless architecture: aws lambda create-function \ --function-name polis-comment-graph-lambda \ --package-type Image \ - --code ImageUri=123456789012.dkr.ecr.us-west-2.amazonaws.com/polis-comment-graph-lambda:latest \ + --code ImageUri=123456789012.dkr.ecr.us-east-1.amazonaws.com/polis-comment-graph-lambda:latest \ --role arn:aws:iam::123456789012:role/lambda-execution-role \ - --environment "Variables={DATABASE_HOST=polis-db.cluster-xyz.us-west-2.rds.amazonaws.com,DATABASE_NAME=polis,DATABASE_USER=polis}" \ + --environment "Variables={DATABASE_HOST=polis-db.cluster-xyz.us-east-1.rds.amazonaws.com,DATABASE_NAME=polis,DATABASE_USER=polis}" \ --timeout 300 \ --memory-size 1024 ``` @@ -121,7 +121,7 @@ The service follows a serverless architecture: - `DATABASE_USER`: PostgreSQL username - `DATABASE_PASSWORD`: PostgreSQL password - `DYNAMODB_ENDPOINT`: Optional DynamoDB endpoint for local development -- `AWS_REGION`: AWS region for DynamoDB and other services (default: us-west-2) +- `AWS_REGION`: AWS region for DynamoDB and other services (default: us-east-1) - `MODEL_CACHE_DIR`: Directory to cache SentenceTransformer models (default: /tmp/model_cache) - `LOG_LEVEL`: Logging level (default: INFO) @@ -178,7 +178,7 @@ Then create the required tables: python -c " import boto3 dynamodb = boto3.resource('dynamodb', endpoint_url='http://localhost:8000', - region_name='us-west-2', + region_name='us-east-1', aws_access_key_id='fakeMyKeyId', aws_secret_access_key='fakeSecretAccessKey') diff --git a/delphi/umap_narrative/polismath_commentgraph/cli.py b/delphi/umap_narrative/polismath_commentgraph/cli.py index 763119fa7c..3b4679cbaa 100644 --- a/delphi/umap_narrative/polismath_commentgraph/cli.py +++ b/delphi/umap_narrative/polismath_commentgraph/cli.py @@ -289,7 +289,7 @@ def lambda_local(args): context = type('obj', (object,), { 'function_name': 'lambda_local', 'aws_request_id': '12345', - 'invoked_function_arn': 'arn:aws:lambda:us-west-2:123456789012:function:lambda_local' + 'invoked_function_arn': 'arn:aws:lambda:us-east-1:123456789012:function:lambda_local' }) # Override environment variables if provided @@ -305,21 +305,18 @@ def lambda_local(args): os.environ['DATABASE_PASSWORD'] = args.pg_password # Set up DynamoDB environment variables for local testing - # Only set if not already in environment - if not os.environ.get('DYNAMODB_ENDPOINT'): - os.environ['DYNAMODB_ENDPOINT'] = 'http://localhost:8000' # Log the endpoint being used logger.info(f"Using DynamoDB endpoint: {os.environ.get('DYNAMODB_ENDPOINT')}") os.environ['AWS_ACCESS_KEY_ID'] = 'fakeMyKeyId' os.environ['AWS_SECRET_ACCESS_KEY'] = 'fakeSecretAccessKey' - os.environ['AWS_DEFAULT_REGION'] = 'us-west-2' + os.environ['AWS_DEFAULT_REGION'] = 'us-east-1' # Reinitialize the DynamoDB storage with direct credentials from .utils.storage import DynamoDBStorage global dynamo_storage dynamo_storage = DynamoDBStorage( - region_name='us-west-2', + region_name='us-east-1', endpoint_url=os.environ.get('DYNAMODB_ENDPOINT') ) diff --git a/delphi/umap_narrative/polismath_commentgraph/utils/storage.py b/delphi/umap_narrative/polismath_commentgraph/utils/storage.py index 237f44b11e..ad8c5d1cd1 100644 --- a/delphi/umap_narrative/polismath_commentgraph/utils/storage.py +++ b/delphi/umap_narrative/polismath_commentgraph/utils/storage.py @@ -111,11 +111,7 @@ def get_uri(self) -> str: password_str = f":{self.password}" if self.password else "" # Build URI - uri = f"postgresql://{self.user}{password_str}@{self.host}:{self.port}/{self.database}" - - # Add SSL mode if needed - if self.ssl_mode and self.ssl_mode != 'prefer': - uri += f"?sslmode={self.ssl_mode}" + uri = f"postgresql://{self.user}{password_str}@{self.host}:{self.port}/{self.database}?sslmode=require" return uri @@ -379,7 +375,7 @@ def __init__(self, region_name: str = None, endpoint_url: str = None): endpoint_url: Optional endpoint URL for local DynamoDB """ # Get settings from environment variables with fallbacks - self.region_name = region_name or os.environ.get('AWS_REGION', 'us-west-2') + self.region_name = region_name or os.environ.get('AWS_REGION', 'us-east-1') self.endpoint_url = endpoint_url or os.environ.get('DYNAMODB_ENDPOINT') # Get AWS credentials from environment variables diff --git a/delphi/umap_narrative/run_pipeline.py b/delphi/umap_narrative/run_pipeline.py index 86b2e405d1..c43c54fdd2 100755 --- a/delphi/umap_narrative/run_pipeline.py +++ b/delphi/umap_narrative/run_pipeline.py @@ -73,11 +73,6 @@ def setup_environment(db_host=None, db_port=None, db_name=None, db_user=None, db # DynamoDB settings (for local DynamoDB) # Don't override if already set in environment dynamo_endpoint = os.environ.get('DYNAMODB_ENDPOINT') - if not dynamo_endpoint: - os.environ['DYNAMODB_ENDPOINT'] = 'http://localhost:8000' - logger.info("Setting default DynamoDB endpoint: http://localhost:8000") - else: - logger.info(f"Using existing DynamoDB endpoint: {dynamo_endpoint}") # Always set these credentials for local development if not already set if not os.environ.get('AWS_ACCESS_KEY_ID'): @@ -87,7 +82,7 @@ def setup_environment(db_host=None, db_port=None, db_name=None, db_user=None, db os.environ['AWS_SECRET_ACCESS_KEY'] = 'fakeSecretAccessKey' if not os.environ.get('AWS_DEFAULT_REGION') and not os.environ.get('AWS_REGION'): - os.environ['AWS_DEFAULT_REGION'] = 'us-west-2' + os.environ['AWS_DEFAULT_REGION'] = 'us-east-1' def fetch_conversation_data(zid): """ @@ -1155,7 +1150,7 @@ def process_conversation(zid, export_dynamo=True, use_ollama=False): logger.info(f"Using DynamoDB endpoint from environment: {endpoint_url}") dynamo_storage = DynamoDBStorage( - region_name='us-west-2', + region_name='us-east-1', endpoint_url=endpoint_url ) diff --git a/docker-compose.yml b/docker-compose.yml index dfd31dbb22..d0204aaf62 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -49,6 +49,7 @@ services: volumes: # Persist logs to a volume, so they can be accessed after the container is stopped. - server-logs:/app/logs + restart: unless-stopped math: image: 050917022930.dkr.ecr.us-east-1.amazonaws.com/polis/math:latest @@ -64,6 +65,7 @@ services: - WEBSERVER_PASS=${WEBSERVER_PASS} networks: - "polis-net" + restart: unless-stopped extra_hosts: - "host.docker.internal:host-gateway" @@ -78,17 +80,17 @@ services: - LOG_LEVEL=${DELPHI_LOG_LEVEL:-INFO} - DELPHI_DEV_OR_PROD=${DELPHI_DEV_OR_PROD:-prod} # DynamoDB connection settings for local mode (will be overridden in prod) - - DYNAMODB_ENDPOINT=${DYNAMODB_ENDPOINT:-http://dynamodb:8000} + - DYNAMODB_ENDPOINT=${DYNAMODB_ENDPOINT} - POLL_INTERVAL=${POLL_INTERVAL:-2} # Ollama connection - - OLLAMA_HOST=http://ollama:11434 + - OLLAMA_HOST=${OLLAMA_HOST:-http://ollama:11434} - OLLAMA_MODEL=${OLLAMA_MODEL:-llama3.1:8b} # AWS environment variables (will be provided in prod) - AWS_REGION=${AWS_REGION:-us-east-1} - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID:-} - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY:-} # S3 storage for visualization files - - AWS_S3_ENDPOINT=${AWS_S3_ENDPOINT:-http://minio:9000} + - AWS_S3_ENDPOINT=${AWS_S3_ENDPOINT} - AWS_S3_ACCESS_KEY_ID=${AWS_S3_ACCESS_KEY_ID:-minioadmin} - AWS_S3_SECRET_ACCESS_KEY=${AWS_S3_SECRET_ACCESS_KEY:-minioadmin} - AWS_S3_BUCKET_NAME=${AWS_S3_BUCKET_NAME:-delphi} @@ -101,9 +103,6 @@ services: - DATABASE_USER=${POSTGRES_USER:-christian} - DATABASE_PASSWORD=${POSTGRES_PASSWORD:-polis123} - DATABASE_SSL_MODE=${DATABASE_SSL_MODE:-disable} - depends_on: - - dynamodb - - ollama networks: - "polis-net" extra_hosts: @@ -113,6 +112,7 @@ services: limits: memory: ${DELPHI_CONTAINER_MEMORY:-4g} cpus: ${DELPHI_CONTAINER_CPUS:-2} + restart: unless-stopped postgres: restart: always @@ -181,6 +181,8 @@ services: user: root labels: polis_tag: ${TAG:-dev} + profiles: + - local-services # Ollama for LLM processing ollama: @@ -215,6 +217,8 @@ services: command: server /data --console-address ":9001" networks: - "polis-net" + profiles: + - local-services networks: polis-net: diff --git a/example.env b/example.env index 62d3307a3c..a53875d002 100644 --- a/example.env +++ b/example.env @@ -127,8 +127,7 @@ MAX_REPORT_CACHE_DURATION= ###### DYNAMODB ###### # When using local DynamoDB, this should be http://dynamodb:8000. -# In production, set DYNAMODB_ENDPOINT to the cloud endpoint (e.g. https://dynamodb.us-west-2.amazonaws.com), -# or simply leave it blank. +# In production leave it blank. DYNAMODB_ENDPOINT=http://dynamodb:8000 ###### S3 STORAGE ###### diff --git a/scripts/after_install.sh b/scripts/after_install.sh index 30e76a63c2..9fa64d0348 100644 --- a/scripts/after_install.sh +++ b/scripts/after_install.sh @@ -2,6 +2,9 @@ set -e set -x +# MINIMAL CHANGE: Ensure parent directory exists before trying to cd into it +sudo mkdir -p /opt/polis + cd /opt/polis sudo yum install -y git GIT_REPO_URL="https://github.com/compdemocracy/polis.git" @@ -9,9 +12,11 @@ GIT_BRANCH="stable" if [ ! -d "polis" ]; then echo "Cloning public repository from $GIT_REPO_URL, branch: $GIT_BRANCH (HTTPS - Public Repo)" - git clone -b "$GIT_BRANCH" "$GIT_REPO_URL" polis + # MINIMAL CHANGE: Add sudo to the clone command + sudo git clone --depth 1 -b "$GIT_BRANCH" "$GIT_REPO_URL" polis else echo "Polis directory already exists, skipping cloning, pulling instead" + # No change needed here if 'else' block is entered, as subsequent commands already use sudo fi cd polis @@ -22,7 +27,7 @@ sudo git reset --hard origin/$GIT_BRANCH && sudo git pull # --- Fetch pre-configured .env from SSM Parameter Store --- PRE_CONFIGURED_ENV=$(aws secretsmanager get-secret-value --secret-id polis-web-app-env-vars --query SecretString --output text --region us-east-1) - +# Original check if [ -z "$PRE_CONFIGURED_ENV" ]; then echo "Error: Could not retrieve pre-configured .env from SSM Parameter polis-web-app-env-vars" exit 1 @@ -32,10 +37,11 @@ echo "Retrieved pre-configured .env from SSM Parameter" # --- Create/Overwrite .env file with pre-configured content --- echo "Creating/Overwriting .env file with pre-configured content from SSM" -echo "$PRE_CONFIGURED_ENV" > .env +echo "$PRE_CONFIGURED_ENV" | sudo tee .env > /dev/null echo ".env file created/overwritten with pre-configured content." # --- Database Configuration and Environment Variables from Secrets Manager --- +# Original logic and commands preserved # 1. Get Secret ARN from SSM Parameter SECRET_ARN=$(aws ssm get-parameter --name /polis/db-secret-arn --query 'Parameter.Value' --output text --region us-east-1) @@ -65,45 +71,54 @@ DB_PORT=$(aws ssm get-parameter --name "/polis/db-port" --query 'Parameter.Value # --- Construct DATABASE_URL using values from Secrets Manager AND SSM Parameters --- -DATABASE_URL="postgres://${DB_USERNAME}:${DB_PASSWORD}@${DB_HOST}:${DB_PORT}/${DB_NAME}" +DATABASE_URL="postgres://${DB_USERNAME}:${DB_PASSWORD}@${DB_HOST}:${DB_PORT}/${DB_NAME}?sslmode=require" -echo "Constructed DATABASE_URL: $DATABASE_URL" +echo "Constructed DATABASE_URL: $DATABASE_URL" # Original logging # --- Append DATABASE_URL to the end of .env --- echo "Appending DATABASE_URL to .env" -echo "DATABASE_URL=$DATABASE_URL" >> .env - -echo "--- Final .env file content (Appended DATABASE_URL) ---" -cat .env +printf "\nDATABASE_URL=%s\n" "$DATABASE_URL" | sudo tee -a .env > /dev/null +# Original service detection SERVICE_FROM_FILE=$(cat /tmp/service_type.txt) echo "DEBUG: Service type read from /tmp/service_type.txt: [$SERVICE_FROM_FILE]" +# Original Docker cleanup/start logic echo "Stopping and removing existing Docker containers..." -/usr/local/bin/docker-compose down || true # Stop all services, ignore errors if none running -docker rm -f $(docker ps -aq) || true # Forcefully remove all containers, ignore errors +sudo /usr/local/bin/docker-compose down || true +sudo docker rm -f $(docker ps -aq) || true echo "Docker containers stopped and removed." -yes | docker system prune -a --filter "until=72h" +yes | sudo docker system prune -a --filter "until=72h" echo "Docker cache cleared" -/usr/local/bin/docker-compose config +sudo /usr/local/bin/docker-compose config if [ "$SERVICE_FROM_FILE" == "server" ]; then echo "Starting docker-compose up for 'server' and 'nginx-proxy' services" - /usr/local/bin/docker-compose up -d server nginx-proxy --build --force-recreate + sudo /usr/local/bin/docker-compose up -d server nginx-proxy --build --force-recreate elif [ "$SERVICE_FROM_FILE" == "math" ]; then echo "Starting docker-compose up for 'math' service" - /usr/local/bin/docker-compose up -d math --build --force-recreate + sudo /usr/local/bin/docker-compose up -d math --build --force-recreate elif [ "$SERVICE_FROM_FILE" == "delphi" ]; then echo "Starting docker-compose up for 'delphi' service" - - # Check if instance size file exists + echo "Fetching Ollama Service URL for Delphi..." + OLLAMA_URL=$(aws secretsmanager get-secret-value --secret-id /polis/ollama-service-url --query SecretString --output text --region us-east-1) + + if [ -z "$OLLAMA_URL" ]; then + echo "Error: Could not retrieve Ollama Service URL from Secrets Manager: /polis/ollama-service-url" + exit 1 + fi + echo "Retrieved Ollama Service URL." + + echo "Appending OLLAMA_HOST to .env for Delphi" + printf "\nOLLAMA_HOST=%s\n" "$OLLAMA_URL" | sudo tee -a .env > /dev/null + echo "OLLAMA_HOST appended." + if [ -f "/tmp/instance_size.txt" ]; then INSTANCE_SIZE=$(cat /tmp/instance_size.txt) echo "Instance size detected: $INSTANCE_SIZE" - - # Set environment variables based on instance size + if [ "$INSTANCE_SIZE" == "small" ]; then echo "Configuring delphi for small instance" export DELPHI_INSTANCE_TYPE="small" @@ -126,13 +141,12 @@ elif [ "$SERVICE_FROM_FILE" == "delphi" ]; then export DELPHI_CONTAINER_MEMORY="4g" export DELPHI_CONTAINER_CPUS="1" fi - - # Add environment variables to .env file - echo "DELPHI_INSTANCE_TYPE=$DELPHI_INSTANCE_TYPE" >> .env - echo "DELPHI_MAX_WORKERS=$DELPHI_MAX_WORKERS" >> .env - echo "DELPHI_WORKER_MEMORY=$DELPHI_WORKER_MEMORY" >> .env - echo "DELPHI_CONTAINER_MEMORY=$DELPHI_CONTAINER_MEMORY" >> .env - echo "DELPHI_CONTAINER_CPUS=$DELPHI_CONTAINER_CPUS" >> .env + + printf "\nDELPHI_INSTANCE_TYPE=%s\n" "$DELPHI_INSTANCE_TYPE" | sudo tee -a .env > /dev/null + printf "DELPHI_MAX_WORKERS=%s\n" "$DELPHI_MAX_WORKERS" | sudo tee -a .env > /dev/null + printf "DELPHI_WORKER_MEMORY=%s\n" "$DELPHI_WORKER_MEMORY" | sudo tee -a .env > /dev/null + printf "DELPHI_CONTAINER_MEMORY=%s\n" "$DELPHI_CONTAINER_MEMORY" | sudo tee -a .env > /dev/null + printf "DELPHI_CONTAINER_CPUS=%s\n" "$DELPHI_CONTAINER_CPUS" | sudo tee -a .env > /dev/null else echo "Instance size file not found, using default configuration" export DELPHI_INSTANCE_TYPE="default" @@ -140,17 +154,16 @@ elif [ "$SERVICE_FROM_FILE" == "delphi" ]; then export DELPHI_WORKER_MEMORY="1g" export DELPHI_CONTAINER_MEMORY="4g" export DELPHI_CONTAINER_CPUS="1" - - echo "DELPHI_INSTANCE_TYPE=$DELPHI_INSTANCE_TYPE" >> .env - echo "DELPHI_MAX_WORKERS=$DELPHI_MAX_WORKERS" >> .env - echo "DELPHI_WORKER_MEMORY=$DELPHI_WORKER_MEMORY" >> .env - echo "DELPHI_CONTAINER_MEMORY=$DELPHI_CONTAINER_MEMORY" >> .env - echo "DELPHI_CONTAINER_CPUS=$DELPHI_CONTAINER_CPUS" >> .env + + printf "\nDELPHI_INSTANCE_TYPE=%s\n" "$DELPHI_INSTANCE_TYPE" | sudo tee -a .env > /dev/null + printf "DELPHI_MAX_WORKERS=%s\n" "$DELPHI_MAX_WORKERS" | sudo tee -a .env > /dev/null + printf "DELPHI_WORKER_MEMORY=%s\n" "$DELPHI_WORKER_MEMORY" | sudo tee -a .env > /dev/null + printf "DELPHI_CONTAINER_MEMORY=%s\n" "$DELPHI_CONTAINER_MEMORY" | sudo tee -a .env > /dev/null + printf "DELPHI_CONTAINER_CPUS=%s\n" "$DELPHI_CONTAINER_CPUS" | sudo tee -a .env > /dev/null fi - - # Start delphi service - /usr/local/bin/docker-compose up -d delphi --build --force-recreate + + sudo /usr/local/bin/docker-compose up -d delphi --build --force-recreate else echo "Error: Unknown service type: [$SERVICE_FROM_FILE]. Starting all services (default docker-compose up -d)" - /usr/local/bin/docker-compose up -d --build --force-recreate # Fallback + sudo /usr/local/bin/docker-compose up -d --build --force-recreate fi \ No newline at end of file diff --git a/scripts/application_stop.sh b/scripts/application_stop.sh index bbfacddf64..e107dd744e 100644 --- a/scripts/application_stop.sh +++ b/scripts/application_stop.sh @@ -1,10 +1,70 @@ #!/bin/bash -set -e -set -x - - cd /opt/polis -if [ "$SERVICE" = "server" ]; then - /usr/local/bin/docker-compose stop server -elif [ "$SERVICE" = "math" ]; then - /usr/local/bin/docker-compose stop math -fi \ No newline at end of file +# This script runs during the ApplicationStop lifecycle event in CodeDeploy. +# It stops the relevant Docker containers based on the instance's role. + +set -e # Exit immediately if a command exits with a non-zero status. +set -x # Print commands and their arguments as they are executed. + +echo "Executing ApplicationStop hook..." + +# --- Configuration --- +# Directory where the docker-compose.yml file for the *current* deployment resides +# Adjust this path if your deployment process places files elsewhere +DEPLOY_DIR="/opt/polis/polis" +# File indicating the role of this instance (created by UserData/AfterInstall) +SERVICE_TYPE_FILE="/tmp/service_type.txt" + +# --- Determine Service Type --- +if [ -f "$SERVICE_TYPE_FILE" ]; then + SERVICE_TYPE=$(cat "$SERVICE_TYPE_FILE") + echo "Detected service type: $SERVICE_TYPE" +else + echo "Warning: Service type file not found at $SERVICE_TYPE_FILE. Assuming nothing specific needs to be stopped by this script." + # Exit cleanly as we don't know what to stop, or maybe the instance role changed. + # CodeDeploy will likely proceed, and the AfterInstall script handles cleanup anyway. + exit 0 +fi + +# --- Stop Services based on Type --- + +# Check if the deployment directory exists (where docker-compose.yml should be) +if [ -d "$DEPLOY_DIR" ]; then + cd "$DEPLOY_DIR" + echo "Changed directory to $DEPLOY_DIR" + + # Check if docker-compose command exists + if ! command -v /usr/local/bin/docker-compose &> /dev/null; then + echo "Error: docker-compose command not found at /usr/local/bin/docker-compose. Cannot stop services." + # Exit with error because compose is expected if the directory exists and type isn't ollama + if [ "$SERVICE_TYPE" != "ollama" ]; then + exit 1 + fi + fi + + if [ "$SERVICE_TYPE" == "server" ]; then + echo "Stopping server-related services (server, nginx-proxy, file-server)..." + # Stop services related to the 'server' type instance (as started in AfterInstall) + /usr/local/bin/docker-compose stop server nginx-proxy file-server || echo "Warning: Failed to stop server component(s), might already be stopped." + # Optional: Use 'down' if you want to remove networks etc. during stop, but 'stop' is usually sufficient here. + # /usr/local/bin/docker-compose down --remove-orphans server nginx-proxy file-server || echo "Warning..." + + elif [ "$SERVICE_TYPE" == "math" ]; then + echo "Stopping math service..." + /usr/local/bin/docker-compose stop math || echo "Warning: Failed to stop math service, might already be stopped." + + elif [ "$SERVICE_TYPE" == "delphi" ]; then + echo "Stopping delphi service..." + /usr/local/bin/docker-compose stop delphi || echo "Warning: Failed to stop delphi service, might already be stopped." + + else + echo "Warning: Unknown service type '$SERVICE_TYPE' found in $SERVICE_TYPE_FILE. No specific services stopped." + # Avoid running a generic 'down' as it might affect unrelated containers if any exist + fi + +else + echo "Warning: Deployment directory $DEPLOY_DIR not found. Assuming no services need stopping." + # Exit cleanly if the directory isn't there, as nothing from this app could be running + exit 0 +fi + +echo "ApplicationStop hook finished successfully for service type: $SERVICE_TYPE." \ No newline at end of file diff --git a/scripts/before_install.sh b/scripts/before_install.sh index 26a04b7849..d471403864 100644 --- a/scripts/before_install.sh +++ b/scripts/before_install.sh @@ -8,4 +8,7 @@ if docker ps -q --filter "name=polis-server" | grep -q .; then fi if docker ps -q --filter "name=polis-math" | grep -q .; then docker stop polis-math-1 +fi +if docker ps -q --filter "name=polis-delphi" | grep -q .; then + docker stop polis-delphi-1 fi \ No newline at end of file diff --git a/server/app.ts b/server/app.ts index 9a6612b32b..a2cad9db23 100644 --- a/server/app.ts +++ b/server/app.ts @@ -791,7 +791,6 @@ helpersInitialized.then( try { handle_POST_delphi_jobs(req, res); } catch (err) { - console.error("Error in delphi jobs creation route:", err); res.json({ status: "error", message: "Internal server error in job creation endpoint", @@ -805,7 +804,6 @@ helpersInitialized.then( try { handle_GET_delphi_reports(req, res); } catch (err) { - console.error("Error in delphi reports route:", err); res.json({ status: "error", message: "Internal server error in reports endpoint", @@ -821,7 +819,6 @@ helpersInitialized.then( try { handle_GET_delphi_visualizations(req, res); } catch (err) { - console.error("Error in delphi visualizations route:", err); res.json({ status: "error", message: "Internal server error in visualizations endpoint", @@ -835,7 +832,6 @@ helpersInitialized.then( try { handle_POST_delphi_batch_reports(req, res); } catch (err) { - console.error("Error in delphi batch reports route:", err); res.json({ status: "error", message: "Internal server error in batch reports endpoint", @@ -1586,7 +1582,6 @@ helpersInitialized.then( app.get( /^\/commentsReport\/r?[0-9][0-9A-Za-z]+(\/.*)?/, function(req, res, next) { - console.log("ROUTE DEBUG: CommentsReport route matched!"); return fetchIndexForReportPage(req, res, next); } ); diff --git a/server/src/config.ts b/server/src/config.ts index 96c7182c46..b9804641b2 100644 --- a/server/src/config.ts +++ b/server/src/config.ts @@ -115,6 +115,14 @@ export default { useNetworkHost: isTrue(process.env.USE_NETWORK_HOST), webserverPass: process.env.WEBSERVER_PASS as string, webserverUsername: process.env.WEBSERVER_USERNAME as string, + DYNAMODB_ENDPOINT: process.env.DYNAMODB_ENDPOINT, + AWS_REGION: process.env.AWS_REGION, + AWS_ACCESS_KEY_ID: process.env.AWS_ACCESS_KEY_ID, + AWS_SECRET_ACCESS_KEY: process.env.AWS_SECRET_ACCESS_KEY, + AWS_S3_ENDPOINT: process.env.AWS_S3_ENDPOINT, + AWS_S3_ACCESS_KEY_ID: process.env.AWS_S3_ACCESS_KEY_ID, + AWS_S3_SECRET_ACCESS_KEY: process.env.AWS_S3_SECRET_ACCESS_KEY, + AWS_S3_BUCKET_NAME: process.env.AWS_S3_BUCKET_NAME, whitelistItems: [ process.env.DOMAIN_WHITELIST_ITEM_01 || null, diff --git a/server/src/routes/delphi.ts b/server/src/routes/delphi.ts index 8f61b32ac8..480bbd760f 100644 --- a/server/src/routes/delphi.ts +++ b/server/src/routes/delphi.ts @@ -3,61 +3,64 @@ import logger from "../utils/logger"; import { DynamoDBClient, ListTablesCommand } from "@aws-sdk/client-dynamodb"; import { DynamoDBDocumentClient, QueryCommand } from "@aws-sdk/lib-dynamodb"; import { getZidFromReport } from "../utils/parameter"; +import Config from "../config"; /** * Handler for Delphi API route that retrieves LLM topic names from DynamoDB */ export function handle_GET_delphi(req: Request, res: Response) { logger.info("Delphi API request received"); - + // Get report_id from request const report_id = req.query.report_id as string; - + if (!report_id) { - return res.json({ - status: "error", - message: "report_id is required" + return res.json({ + status: "error", + message: "report_id is required", }); } // Extract zid from report_id - we need this to query DynamoDB getZidFromReport(report_id) - .then(zid => { + .then((zid) => { if (!zid) { return res.json({ status: "error", message: "Could not find conversation for report_id", - report_id: report_id + report_id: report_id, }); } const conversation_id = zid.toString(); - logger.info(`Fetching Delphi LLM topics for conversation_id: ${conversation_id}`); + logger.info( + `Fetching Delphi LLM topics for conversation_id: ${conversation_id}` + ); // Force using local DynamoDB by hardcoding the endpoint const dynamoDBConfig: any = { - region: process.env.AWS_REGION || "us-east-1", - // Force to use the local DynamoDB endpoint - endpoint: "http://dynamodb:8000" + region: Config.AWS_REGION || "us-east-1", }; - + // Log what we're using logger.info(`Forcing local DynamoDB connection: Endpoint: ${dynamoDBConfig.endpoint} Region: ${dynamoDBConfig.region}`); - + // For local DynamoDB, use dummy credentials dynamoDBConfig.credentials = { - accessKeyId: 'DUMMYIDEXAMPLE', - secretAccessKey: 'DUMMYEXAMPLEKEY' + accessKeyId: Config.AWS_ACCESS_KEY_ID, + secretAccessKey: Config.AWS_SECRET_ACCESS_KEY, }; // Log connection config for debugging logger.info(`DynamoDB Config: Region: ${dynamoDBConfig.region} Endpoint: ${dynamoDBConfig.endpoint || "Default AWS endpoint"} - AWS_ACCESS_KEY_ID: ${process.env.AWS_ACCESS_KEY_ID ? "Set" : "Not set"} - AWS_SECRET_ACCESS_KEY: ${process.env.AWS_SECRET_ACCESS_KEY ? "Set" : "Not set"} + AWS_ACCESS_KEY_ID: ${Config.AWS_ACCESS_KEY_ID ? "Set" : "Not set"} + AWS_SECRET_ACCESS_KEY: ${ + Config.AWS_SECRET_ACCESS_KEY ? "Set" : "Not set" + } `); // Create DynamoDB clients @@ -66,7 +69,7 @@ export function handle_GET_delphi(req: Request, res: Response) { marshallOptions: { convertEmptyValues: true, removeUndefinedValues: true, - } + }, }); // Table name for LLM topic names @@ -76,50 +79,64 @@ export function handle_GET_delphi(req: Request, res: Response) { try { // Create a command to list all tables const listTablesCommand = new ListTablesCommand({}); - + // Log that we're checking tables logger.info(`Checking DynamoDB tables...`); - + // Execute the command and handle results - client.send(listTablesCommand) - .then(tableData => { + client + .send(listTablesCommand) + .then((tableData) => { // Make sure TableNames is defined const tableNames = tableData.TableNames || []; - logger.info(`Found ${tableNames.length} DynamoDB tables: ${JSON.stringify(tableNames)}`); - + logger.info( + `Found ${tableNames.length} DynamoDB tables: ${JSON.stringify( + tableNames + )}` + ); + // Check if our table exists const tableExists = tableNames.includes(tableName); logger.info(`Table ${tableName} exists: ${tableExists}`); - + if (!tableExists) { // If table doesn't exist, return a helpful message // Also provide info on how to create the table return res.json({ status: "success", message: `Table ${tableName} not found in DynamoDB.`, - hint: "The table may need to be created by running the Delphi pipeline", + hint: + "The table may need to be created by running the Delphi pipeline", report_id: report_id, conversation_id: conversation_id, available_tables: tableNames, - topics: {} + topics: {}, }); } - + // If we get here, the table exists, proceed with query proceedWithQuery(); }) - .catch(err => { + .catch((err) => { logger.error(`Error listing DynamoDB tables: ${err.message}`); logger.error(`Error type: ${err.name}`); if (err.code === "UnrecognizedClientException") { - logger.error("This error usually indicates an authentication issue with DynamoDB"); + logger.error( + "This error usually indicates an authentication issue with DynamoDB" + ); logger.error("Check AWS credentials and region settings"); } else if (err.name === "NetworkingError") { - logger.error(`Cannot connect to DynamoDB endpoint: ${dynamoDBConfig.endpoint}`); - logger.error("Check if the DynamoDB service is running and accessible from the server container"); - logger.error("Consider testing with: curl " + dynamoDBConfig.endpoint); + logger.error( + `Cannot connect to DynamoDB endpoint: ${dynamoDBConfig.endpoint}` + ); + logger.error( + "Check if the DynamoDB service is running and accessible from the server container" + ); + logger.error( + "Consider testing with: curl " + dynamoDBConfig.endpoint + ); } - + // If we can't list tables, we should still try the query // It might be a permissions issue where we can query but not list logger.info("Proceeding with query anyway..."); @@ -133,7 +150,7 @@ export function handle_GET_delphi(req: Request, res: Response) { logger.info("Proceeding with query anyway..."); proceedWithQuery(); } - + // Function to execute the actual query function proceedWithQuery() { // Query parameters to get LLM topic names for the conversation @@ -141,181 +158,197 @@ export function handle_GET_delphi(req: Request, res: Response) { TableName: tableName, KeyConditionExpression: "conversation_id = :cid", ExpressionAttributeValues: { - ":cid": conversation_id - } + ":cid": conversation_id, + }, }; - + // Log that we're executing the query logger.info(`Executing DynamoDB query: ${JSON.stringify(params)}`); // Query DynamoDB - docClient.send(new QueryCommand(params)) - .then(data => { - // Early return if no items found - if (!data.Items || data.Items.length === 0) { + docClient + .send(new QueryCommand(params)) + .then((data) => { + // Early return if no items found + if (!data.Items || data.Items.length === 0) { + return res.json({ + status: "success", + message: "No LLM topics found for this conversation", + report_id: report_id, + conversation_id: conversation_id, + topics: {}, + }); + } + + // Process results - organize topics by run, then by layer, then by cluster + // Group by creation timestamp and model to identify different runs + const items = data.Items; + + // First group by model and creation date (truncate to day for grouping) + const runGroups: Record = {}; + + items.forEach((item) => { + const modelName = item.model_name || "unknown"; + const createdAt = item.created_at || ""; + const createdDate = createdAt.substring(0, 10); // Take just the date part YYYY-MM-DD + + // Create a run key based on model and creation date + const runKey = `${modelName}_${createdDate}`; + + if (!runGroups[runKey]) { + runGroups[runKey] = []; + } + + runGroups[runKey].push(item); + }); + + // Now organize each run into layers and clusters + const allRuns: Record = {}; + + Object.entries(runGroups).forEach(([runKey, runItems]) => { + const topicsByLayer: Record> = {}; + + // Process each item in this run + runItems.forEach((item) => { + const layerId = item.layer_id; + const clusterId = item.cluster_id; + + // Initialize layer if it doesn't exist + if (!topicsByLayer[layerId]) { + topicsByLayer[layerId] = {}; + } + + // Add topic to its layer + topicsByLayer[layerId][clusterId] = { + topic_name: item.topic_name, + model_name: item.model_name, + created_at: item.created_at, + topic_key: item.topic_key, + }; + }); + + // Get sample data to represent the run + const sampleItem = runItems[0]; + + // Add run with metadata + allRuns[runKey] = { + model_name: sampleItem.model_name, + created_date: sampleItem.created_at?.substring(0, 10), + topics_by_layer: topicsByLayer, + item_count: runItems.length, + }; + }); + + // Return all runs, with the most recent runs first + const sortedRuns = Object.entries(allRuns) + .sort(([keyA, runA], [keyB, runB]) => { + // Sort by created_date in descending order (newest first) + const dateA = runA.created_date || ""; + const dateB = runB.created_date || ""; + return dateB.localeCompare(dateA); + }) + .reduce((acc, [key, value]) => { + acc[key] = value; + return acc; + }, {} as Record); + + // Return the results return res.json({ status: "success", - message: "No LLM topics found for this conversation", + message: "LLM topics retrieved successfully", report_id: report_id, conversation_id: conversation_id, - topics: {} + runs: sortedRuns, }); - } - - // Process results - organize topics by run, then by layer, then by cluster - // Group by creation timestamp and model to identify different runs - const items = data.Items; - - // First group by model and creation date (truncate to day for grouping) - const runGroups: Record = {}; - - items.forEach(item => { - const modelName = item.model_name || 'unknown'; - const createdAt = item.created_at || ''; - const createdDate = createdAt.substring(0, 10); // Take just the date part YYYY-MM-DD - - // Create a run key based on model and creation date - const runKey = `${modelName}_${createdDate}`; - - if (!runGroups[runKey]) { - runGroups[runKey] = []; + }) + .catch((err) => { + // Check if this is a "table not found" error + if (err.name === "ResourceNotFoundException") { + logger.warn( + `DynamoDB table not found: Delphi_CommentClustersLLMTopicNames` + ); + return res.json({ + status: "success", + message: "Delphi topic service not available yet", + hint: + "The table may need to be created by running the Delphi pipeline", + report_id: report_id, + conversation_id: conversation_id, + topics: {}, + }); } - - runGroups[runKey].push(item); - }); - // Now organize each run into layers and clusters - const allRuns: Record = {}; - - Object.entries(runGroups).forEach(([runKey, runItems]) => { - const topicsByLayer: Record> = {}; - - // Process each item in this run - runItems.forEach(item => { - const layerId = item.layer_id; - const clusterId = item.cluster_id; - - // Initialize layer if it doesn't exist - if (!topicsByLayer[layerId]) { - topicsByLayer[layerId] = {}; - } - - // Add topic to its layer - topicsByLayer[layerId][clusterId] = { - topic_name: item.topic_name, - model_name: item.model_name, - created_at: item.created_at, - topic_key: item.topic_key - }; - }); - - // Get sample data to represent the run - const sampleItem = runItems[0]; - - // Add run with metadata - allRuns[runKey] = { - model_name: sampleItem.model_name, - created_date: sampleItem.created_at?.substring(0, 10), - topics_by_layer: topicsByLayer, - item_count: runItems.length - }; - }); + // Log detailed error information + logger.error(`Error querying DynamoDB: ${err.message}`); + logger.error(`Error type: ${err.name}`); + logger.error(`Error code: ${err.$metadata?.httpStatusCode}`); + + // Format a helpful message based on the error type + let helpMessage = ""; + + // Check credentials error + if (err.name === "CredentialsProviderError") { + logger.error( + "AWS credential issue - check environment variables" + ); + helpMessage = + "AWS credential issue - check AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment variables"; + } + + // Check connection error + if (err.name === "NetworkingError") { + logger.error( + `Network error connecting to DynamoDB endpoint: ${ + dynamoDBConfig.endpoint || "default" + }` + ); + helpMessage = `Network error connecting to DynamoDB at ${ + dynamoDBConfig.endpoint || "default" + } - check if DynamoDB service is running and accessible`; + } + + // Check permissions error + if (err.name === "AccessDeniedException") { + logger.error( + "AWS permissions issue - credentials do not have access to this DynamoDB table" + ); + helpMessage = + "AWS permissions issue - credentials do not have access to this DynamoDB table"; + } + + // If validation error + if (err.name === "ValidationException") { + logger.error(`DynamoDB validation error: ${err.message}`); + helpMessage = `DynamoDB validation error: ${err.message} - check table schema or partition key`; + } + + // Try to log more details if available + try { + logger.error(JSON.stringify(err, null, 2)); + } catch (e) { + logger.error("Could not stringify error object"); + } - // Return all runs, with the most recent runs first - const sortedRuns = Object.entries(allRuns) - .sort(([keyA, runA], [keyB, runB]) => { - // Sort by created_date in descending order (newest first) - const dateA = runA.created_date || ''; - const dateB = runB.created_date || ''; - return dateB.localeCompare(dateA); - }) - .reduce((acc, [key, value]) => { - acc[key] = value; - return acc; - }, {} as Record); - - // Return the results - return res.json({ - status: "success", - message: "LLM topics retrieved successfully", - report_id: report_id, - conversation_id: conversation_id, - runs: sortedRuns - }); - }) - .catch(err => { - // Check if this is a "table not found" error - if (err.name === "ResourceNotFoundException") { - logger.warn(`DynamoDB table not found: Delphi_CommentClustersLLMTopicNames`); return res.json({ - status: "success", - message: "Delphi topic service not available yet", - hint: "The table may need to be created by running the Delphi pipeline", + status: "success", // Use success to avoid frontend errors + message: "Error querying DynamoDB", + error: err.message, + error_type: err.name, + help: helpMessage, report_id: report_id, conversation_id: conversation_id, - topics: {} + topics: {}, // Return empty topics to avoid client-side errors }); - } - - // Log detailed error information - logger.error(`Error querying DynamoDB: ${err.message}`); - logger.error(`Error type: ${err.name}`); - logger.error(`Error code: ${err.$metadata?.httpStatusCode}`); - - // Format a helpful message based on the error type - let helpMessage = ""; - - // Check credentials error - if (err.name === "CredentialsProviderError") { - logger.error("AWS credential issue - check environment variables"); - helpMessage = "AWS credential issue - check AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment variables"; - } - - // Check connection error - if (err.name === "NetworkingError") { - logger.error(`Network error connecting to DynamoDB endpoint: ${dynamoDBConfig.endpoint || "default"}`); - helpMessage = `Network error connecting to DynamoDB at ${dynamoDBConfig.endpoint || "default"} - check if DynamoDB service is running and accessible`; - } - - // Check permissions error - if (err.name === "AccessDeniedException") { - logger.error("AWS permissions issue - credentials do not have access to this DynamoDB table"); - helpMessage = "AWS permissions issue - credentials do not have access to this DynamoDB table"; - } - - // If validation error - if (err.name === "ValidationException") { - logger.error(`DynamoDB validation error: ${err.message}`); - helpMessage = `DynamoDB validation error: ${err.message} - check table schema or partition key`; - } - - // Try to log more details if available - try { - logger.error(JSON.stringify(err, null, 2)); - } catch (e) { - logger.error("Could not stringify error object"); - } - - return res.json({ - status: "success", // Use success to avoid frontend errors - message: "Error querying DynamoDB", - error: err.message, - error_type: err.name, - help: helpMessage, - report_id: report_id, - conversation_id: conversation_id, - topics: {} // Return empty topics to avoid client-side errors }); - }); } }) - .catch(err => { + .catch((err) => { logger.error(`Error in delphi endpoint: ${err}`); return res.json({ status: "error", message: "Error processing request", error: err.message, - report_id: report_id + report_id: report_id, }); }); -} \ No newline at end of file +} diff --git a/server/src/routes/delphi/batchReports.ts b/server/src/routes/delphi/batchReports.ts index e5bb3d92ea..6de77c9271 100644 --- a/server/src/routes/delphi/batchReports.ts +++ b/server/src/routes/delphi/batchReports.ts @@ -4,14 +4,15 @@ import { DynamoDB } from "@aws-sdk/client-dynamodb"; import { DynamoDBDocument } from "@aws-sdk/lib-dynamodb"; import logger from "../../utils/logger"; import { getZidFromReport } from "../../utils/parameter"; +import Config from "../../config"; // Initialize DynamoDB client const dynamoDbClient = new DynamoDB({ - endpoint: process.env.DYNAMODB_ENDPOINT || "http://dynamodb:8000", - region: process.env.AWS_REGION || "us-west-2", + endpoint: Config.DYNAMODB_ENDPOINT as string, + region: Config.AWS_REGION as string, credentials: { - accessKeyId: process.env.AWS_ACCESS_KEY_ID || "DUMMYIDEXAMPLE", - secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY || "DUMMYEXAMPLEKEY", + accessKeyId: Config.AWS_ACCESS_KEY_ID as string, + secretAccessKey: Config.AWS_SECRET_ACCESS_KEY as string, }, }); diff --git a/server/src/routes/delphi/jobs.ts b/server/src/routes/delphi/jobs.ts index ade3883e5a..73cc2b9e62 100644 --- a/server/src/routes/delphi/jobs.ts +++ b/server/src/routes/delphi/jobs.ts @@ -1,19 +1,20 @@ -import { v4 as uuidv4 } from 'uuid'; -import { Request, Response } from 'express'; -import { DynamoDB } from '@aws-sdk/client-dynamodb'; -import { DynamoDBDocument } from '@aws-sdk/lib-dynamodb'; -import logger from '../../utils/logger'; -import { getZidFromReport } from '../../utils/parameter'; +import { v4 as uuidv4 } from "uuid"; +import { Request, Response } from "express"; +import { DynamoDB } from "@aws-sdk/client-dynamodb"; +import { DynamoDBDocument } from "@aws-sdk/lib-dynamodb"; +import logger from "../../utils/logger"; +import { getZidFromReport } from "../../utils/parameter"; +import Config from "../../config"; // Initialize DynamoDB client const dynamoDbClient = new DynamoDB({ // Use environment variables for endpoint and region, or the docker service name - endpoint: process.env.DYNAMODB_ENDPOINT || 'http://dynamodb:8000', - region: process.env.AWS_REGION || 'us-west-2', + endpoint: Config.DYNAMODB_ENDPOINT, + region: Config.AWS_REGION || "us-east-1", // For local development or Docker container credentials: { - accessKeyId: process.env.AWS_ACCESS_KEY_ID || 'DUMMYIDEXAMPLE', - secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY || 'DUMMYEXAMPLEKEY', + accessKeyId: Config.AWS_ACCESS_KEY_ID || "DUMMYIDEXAMPLE", + secretAccessKey: Config.AWS_SECRET_ACCESS_KEY || "DUMMYEXAMPLEKEY", }, }); @@ -21,56 +22,64 @@ const dynamoDbClient = new DynamoDB({ const docClient = DynamoDBDocument.from(dynamoDbClient); // Handler for POST /api/v3/delphi/jobs - Create a new Delphi job -export async function handle_POST_delphi_jobs(req: Request, res: Response): Promise { +export async function handle_POST_delphi_jobs( + req: Request, + res: Response +): Promise { try { - logger.info(`Creating Delphi job with parameters: ${JSON.stringify(req.body)}`); - + logger.info( + `Creating Delphi job with parameters: ${JSON.stringify(req.body)}` + ); + // Extract parameters from request body - const { + const { report_id, conversation_id, - job_type = 'FULL_PIPELINE', + job_type = "FULL_PIPELINE", priority = 50, max_votes, batch_size, - model = 'claude-3-7-sonnet-20250219', - include_topics = true + model = "claude-3-7-sonnet-20250219", + include_topics = true, } = req.body; - + // Validate required parameters - if ((!report_id && !conversation_id)) { + if (!report_id && !conversation_id) { res.status(400).json({ - status: 'error', - error: 'Missing required parameter: either report_id or conversation_id must be provided', + status: "error", + error: + "Missing required parameter: either report_id or conversation_id must be provided", }); return; } - + // Convert report_id to conversation_id if needed // Assuming there's a mapping function or service to do this - const zid = conversation_id || (report_id ? await getConversationIdFromReportId(report_id) : null); - + const zid = + conversation_id || + (report_id ? await getConversationIdFromReportId(report_id) : null); + if (!zid) { res.status(400).json({ - status: 'error', - error: 'Could not determine conversation ID', + status: "error", + error: "Could not determine conversation ID", }); return; } - + // Generate a unique job ID const job_id = uuidv4(); - + // Current timestamp in ISO format const now = new Date().toISOString(); - + // Build job configuration based on the Python CLI implementation const jobConfig: any = {}; - - if (job_type === 'FULL_PIPELINE') { + + if (job_type === "FULL_PIPELINE") { // Full pipeline configs const stages = []; - + // PCA stage const pcaConfig: any = {}; if (max_votes) { @@ -80,120 +89,138 @@ export async function handle_POST_delphi_jobs(req: Request, res: Response): Prom pcaConfig.batch_size = parseInt(batch_size, 10); } stages.push({ stage: "PCA", config: pcaConfig }); - + // UMAP stage stages.push({ stage: "UMAP", config: { n_neighbors: 15, - min_dist: 0.1 - } + min_dist: 0.1, + }, }); - + // Report stage stages.push({ stage: "REPORT", config: { model: model, - include_topics: include_topics - } + include_topics: include_topics, + }, }); - + // Add stages and visualizations to job config jobConfig.stages = stages; jobConfig.visualizations = ["basic", "enhanced", "multilayer"]; } - + // Create job item with version number for optimistic locking const jobItem = { - job_id: job_id, // Primary key - status: 'PENDING', // Secondary index key - created_at: now, // Secondary index key + job_id: job_id, // Primary key + status: "PENDING", // Secondary index key + created_at: now, // Secondary index key updated_at: now, - version: 1, // Version for optimistic locking - started_at: "", // Using empty strings for nullable fields + version: 1, // Version for optimistic locking + started_at: "", // Using empty strings for nullable fields completed_at: "", - worker_id: "none", // Non-empty placeholder for index + worker_id: "none", // Non-empty placeholder for index job_type: job_type, priority: parseInt(String(priority), 10), - conversation_id: String(zid), // Using conversation_id + conversation_id: String(zid), // Using conversation_id retry_count: 0, max_retries: 3, - timeout_seconds: 7200, // 2 hours default timeout + timeout_seconds: 7200, // 2 hours default timeout job_config: JSON.stringify(jobConfig), job_results: JSON.stringify({}), logs: JSON.stringify({ entries: [ { timestamp: now, - level: 'INFO', - message: `Job created for conversation ${zid}` - } + level: "INFO", + message: `Job created for conversation ${zid}`, + }, ], - log_location: "" + log_location: "", }), - created_by: 'api' + created_by: "api", }; - + // Put item in DynamoDB try { - logger.info(`Putting job item in DynamoDB: ${JSON.stringify({ - TableName: 'Delphi_JobQueue', - Item: { job_id: jobItem.job_id, conversation_id: jobItem.conversation_id } - })}`); - + logger.info( + `Putting job item in DynamoDB: ${JSON.stringify({ + TableName: "Delphi_JobQueue", + Item: { + job_id: jobItem.job_id, + conversation_id: jobItem.conversation_id, + }, + })}` + ); + await docClient.put({ - TableName: 'Delphi_JobQueue', - Item: jobItem + TableName: "Delphi_JobQueue", + Item: jobItem, }); - + // Return success with job ID res.json({ - status: 'success', + status: "success", job_id: job_id, - conversation_id: zid + conversation_id: zid, }); } catch (dbError) { - logger.error(`Error writing to DynamoDB: ${dbError instanceof Error ? dbError.message : dbError}`); + logger.error( + `Error writing to DynamoDB: ${ + dbError instanceof Error ? dbError.message : dbError + }` + ); throw dbError; // Let the outer catch handle it } - } catch (error) { - logger.error(`Error creating Delphi job: ${error instanceof Error ? error.message : error}`); + logger.error( + `Error creating Delphi job: ${ + error instanceof Error ? error.message : error + }` + ); // Log more details for better debugging if (error instanceof Error) { logger.error(`Error name: ${error.name}`); logger.error(`Error stack: ${error.stack}`); } - + // Return detailed error for debugging res.status(500).json({ - status: 'error', - error: error instanceof Error ? error.message : 'Unknown error', - code: error instanceof Error && 'code' in error ? (error as any).code : undefined, - details: process.env.NODE_ENV === 'development' ? String(error) : undefined + status: "error", + error: error instanceof Error ? error.message : "Unknown error", + code: + error instanceof Error && "code" in error + ? (error as any).code + : undefined, + details: + Config.nodeEnv === "development" ? String(error) : undefined, }); } } // Helper function to get conversation_id from report_id -async function getConversationIdFromReportId(report_id: string): Promise { +async function getConversationIdFromReportId( + report_id: string +): Promise { try { logger.info(`Getting conversation_id for report_id: ${report_id}`); - + // Use the existing util function if available, otherwise implement here - if (typeof getZidFromReport === 'function') { + if (typeof getZidFromReport === "function") { const zid = await getZidFromReport(report_id); // Ensure we return a string or null to match the function signature return zid !== null ? zid.toString() : null; } - + // Strip the 'r' prefix if it exists (e.g., r123abc -> 123abc) let normalized_report_id = report_id; - if (report_id.startsWith('r') && report_id.length > 1) { + if (report_id.startsWith("r") && report_id.length > 1) { normalized_report_id = report_id.substring(1); } - + // In this case, we need to query the zid from the zinvites table // The report_id is the same as the zinvite const query = ` @@ -201,25 +228,29 @@ async function getConversationIdFromReportId(report_id: string): Promise { + .then((zid) => { if (!zid) { return res.json({ status: "error", message: "Could not find conversation for report_id", - report_id: report_id + report_id: report_id, }); } const conversation_id = zid.toString(); - logger.info(`Fetching Delphi LLM topics for conversation_id: ${conversation_id}`); + logger.info( + `Fetching Delphi LLM topics for conversation_id: ${conversation_id}` + ); // Force using local DynamoDB by hardcoding the endpoint const dynamoDBConfig: any = { - region: process.env.AWS_REGION || "us-east-1", - // Force to use the local DynamoDB endpoint - endpoint: "http://dynamodb:8000" + region: Config.AWS_REGION || "us-east-1", }; - + // Log what we're using logger.info(`Forcing local DynamoDB connection: Endpoint: ${dynamoDBConfig.endpoint} Region: ${dynamoDBConfig.region}`); - + // For local DynamoDB, use dummy credentials dynamoDBConfig.credentials = { - accessKeyId: 'DUMMYIDEXAMPLE', - secretAccessKey: 'DUMMYEXAMPLEKEY' + accessKeyId: "DUMMYIDEXAMPLE", + secretAccessKey: "DUMMYEXAMPLEKEY", }; // Log connection config for debugging logger.info(`DynamoDB Config: Region: ${dynamoDBConfig.region} Endpoint: ${dynamoDBConfig.endpoint || "Default AWS endpoint"} - AWS_ACCESS_KEY_ID: ${process.env.AWS_ACCESS_KEY_ID ? "Set" : "Not set"} - AWS_SECRET_ACCESS_KEY: ${process.env.AWS_SECRET_ACCESS_KEY ? "Set" : "Not set"} + AWS_ACCESS_KEY_ID: ${Config.AWS_ACCESS_KEY_ID ? "Set" : "Not set"} + AWS_SECRET_ACCESS_KEY: ${ + Config.AWS_SECRET_ACCESS_KEY ? "Set" : "Not set" + } `); // Create DynamoDB clients @@ -66,7 +69,7 @@ export function handle_GET_delphi(req: Request, res: Response) { marshallOptions: { convertEmptyValues: true, removeUndefinedValues: true, - } + }, }); // Table name for LLM topic names @@ -76,50 +79,64 @@ export function handle_GET_delphi(req: Request, res: Response) { try { // Create a command to list all tables const listTablesCommand = new ListTablesCommand({}); - + // Log that we're checking tables logger.info(`Checking DynamoDB tables...`); - + // Execute the command and handle results - client.send(listTablesCommand) - .then(tableData => { + client + .send(listTablesCommand) + .then((tableData) => { // Make sure TableNames is defined const tableNames = tableData.TableNames || []; - logger.info(`Found ${tableNames.length} DynamoDB tables: ${JSON.stringify(tableNames)}`); - + logger.info( + `Found ${tableNames.length} DynamoDB tables: ${JSON.stringify( + tableNames + )}` + ); + // Check if our table exists const tableExists = tableNames.includes(tableName); logger.info(`Table ${tableName} exists: ${tableExists}`); - + if (!tableExists) { // If table doesn't exist, return a helpful message // Also provide info on how to create the table return res.json({ status: "success", message: `Table ${tableName} not found in DynamoDB.`, - hint: "The table may need to be created by running the Delphi pipeline", + hint: + "The table may need to be created by running the Delphi pipeline", report_id: report_id, conversation_id: conversation_id, available_tables: tableNames, - topics: {} + topics: {}, }); } - + // If we get here, the table exists, proceed with query proceedWithQuery(); }) - .catch(err => { + .catch((err) => { logger.error(`Error listing DynamoDB tables: ${err.message}`); logger.error(`Error type: ${err.name}`); if (err.code === "UnrecognizedClientException") { - logger.error("This error usually indicates an authentication issue with DynamoDB"); + logger.error( + "This error usually indicates an authentication issue with DynamoDB" + ); logger.error("Check AWS credentials and region settings"); } else if (err.name === "NetworkingError") { - logger.error(`Cannot connect to DynamoDB endpoint: ${dynamoDBConfig.endpoint}`); - logger.error("Check if the DynamoDB service is running and accessible from the server container"); - logger.error("Consider testing with: curl " + dynamoDBConfig.endpoint); + logger.error( + `Cannot connect to DynamoDB endpoint: ${dynamoDBConfig.endpoint}` + ); + logger.error( + "Check if the DynamoDB service is running and accessible from the server container" + ); + logger.error( + "Consider testing with: curl " + dynamoDBConfig.endpoint + ); } - + // If we can't list tables, we should still try the query // It might be a permissions issue where we can query but not list logger.info("Proceeding with query anyway..."); @@ -133,7 +150,7 @@ export function handle_GET_delphi(req: Request, res: Response) { logger.info("Proceeding with query anyway..."); proceedWithQuery(); } - + // Function to execute the actual query function proceedWithQuery() { // Query parameters to get LLM topic names for the conversation @@ -141,181 +158,197 @@ export function handle_GET_delphi(req: Request, res: Response) { TableName: tableName, KeyConditionExpression: "conversation_id = :cid", ExpressionAttributeValues: { - ":cid": conversation_id - } + ":cid": conversation_id, + }, }; - + // Log that we're executing the query logger.info(`Executing DynamoDB query: ${JSON.stringify(params)}`); // Query DynamoDB - docClient.send(new QueryCommand(params)) - .then(data => { - // Early return if no items found - if (!data.Items || data.Items.length === 0) { + docClient + .send(new QueryCommand(params)) + .then((data) => { + // Early return if no items found + if (!data.Items || data.Items.length === 0) { + return res.json({ + status: "success", + message: "No LLM topics found for this conversation", + report_id: report_id, + conversation_id: conversation_id, + topics: {}, + }); + } + + // Process results - organize topics by run, then by layer, then by cluster + // Group by creation timestamp and model to identify different runs + const items = data.Items; + + // First group by model and creation date (truncate to day for grouping) + const runGroups: Record = {}; + + items.forEach((item) => { + const modelName = item.model_name || "unknown"; + const createdAt = item.created_at || ""; + const createdDate = createdAt.substring(0, 10); // Take just the date part YYYY-MM-DD + + // Create a run key based on model and creation date + const runKey = `${modelName}_${createdDate}`; + + if (!runGroups[runKey]) { + runGroups[runKey] = []; + } + + runGroups[runKey].push(item); + }); + + // Now organize each run into layers and clusters + const allRuns: Record = {}; + + Object.entries(runGroups).forEach(([runKey, runItems]) => { + const topicsByLayer: Record> = {}; + + // Process each item in this run + runItems.forEach((item) => { + const layerId = item.layer_id; + const clusterId = item.cluster_id; + + // Initialize layer if it doesn't exist + if (!topicsByLayer[layerId]) { + topicsByLayer[layerId] = {}; + } + + // Add topic to its layer + topicsByLayer[layerId][clusterId] = { + topic_name: item.topic_name, + model_name: item.model_name, + created_at: item.created_at, + topic_key: item.topic_key, + }; + }); + + // Get sample data to represent the run + const sampleItem = runItems[0]; + + // Add run with metadata + allRuns[runKey] = { + model_name: sampleItem.model_name, + created_date: sampleItem.created_at?.substring(0, 10), + topics_by_layer: topicsByLayer, + item_count: runItems.length, + }; + }); + + // Return all runs, with the most recent runs first + const sortedRuns = Object.entries(allRuns) + .sort(([keyA, runA], [keyB, runB]) => { + // Sort by created_date in descending order (newest first) + const dateA = runA.created_date || ""; + const dateB = runB.created_date || ""; + return dateB.localeCompare(dateA); + }) + .reduce((acc, [key, value]) => { + acc[key] = value; + return acc; + }, {} as Record); + + // Return the results return res.json({ status: "success", - message: "No LLM topics found for this conversation", + message: "LLM topics retrieved successfully", report_id: report_id, conversation_id: conversation_id, - topics: {} + runs: sortedRuns, }); - } - - // Process results - organize topics by run, then by layer, then by cluster - // Group by creation timestamp and model to identify different runs - const items = data.Items; - - // First group by model and creation date (truncate to day for grouping) - const runGroups: Record = {}; - - items.forEach(item => { - const modelName = item.model_name || 'unknown'; - const createdAt = item.created_at || ''; - const createdDate = createdAt.substring(0, 10); // Take just the date part YYYY-MM-DD - - // Create a run key based on model and creation date - const runKey = `${modelName}_${createdDate}`; - - if (!runGroups[runKey]) { - runGroups[runKey] = []; + }) + .catch((err) => { + // Check if this is a "table not found" error + if (err.name === "ResourceNotFoundException") { + logger.warn( + `DynamoDB table not found: Delphi_CommentClustersLLMTopicNames` + ); + return res.json({ + status: "success", + message: "Delphi topic service not available yet", + hint: + "The table may need to be created by running the Delphi pipeline", + report_id: report_id, + conversation_id: conversation_id, + topics: {}, + }); } - - runGroups[runKey].push(item); - }); - // Now organize each run into layers and clusters - const allRuns: Record = {}; - - Object.entries(runGroups).forEach(([runKey, runItems]) => { - const topicsByLayer: Record> = {}; - - // Process each item in this run - runItems.forEach(item => { - const layerId = item.layer_id; - const clusterId = item.cluster_id; - - // Initialize layer if it doesn't exist - if (!topicsByLayer[layerId]) { - topicsByLayer[layerId] = {}; - } - - // Add topic to its layer - topicsByLayer[layerId][clusterId] = { - topic_name: item.topic_name, - model_name: item.model_name, - created_at: item.created_at, - topic_key: item.topic_key - }; - }); - - // Get sample data to represent the run - const sampleItem = runItems[0]; - - // Add run with metadata - allRuns[runKey] = { - model_name: sampleItem.model_name, - created_date: sampleItem.created_at?.substring(0, 10), - topics_by_layer: topicsByLayer, - item_count: runItems.length - }; - }); + // Log detailed error information + logger.error(`Error querying DynamoDB: ${err.message}`); + logger.error(`Error type: ${err.name}`); + logger.error(`Error code: ${err.$metadata?.httpStatusCode}`); + + // Format a helpful message based on the error type + let helpMessage = ""; + + // Check credentials error + if (err.name === "CredentialsProviderError") { + logger.error( + "AWS credential issue - check environment variables" + ); + helpMessage = + "AWS credential issue - check AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment variables"; + } + + // Check connection error + if (err.name === "NetworkingError") { + logger.error( + `Network error connecting to DynamoDB endpoint: ${ + dynamoDBConfig.endpoint || "default" + }` + ); + helpMessage = `Network error connecting to DynamoDB at ${ + dynamoDBConfig.endpoint || "default" + } - check if DynamoDB service is running and accessible`; + } + + // Check permissions error + if (err.name === "AccessDeniedException") { + logger.error( + "AWS permissions issue - credentials do not have access to this DynamoDB table" + ); + helpMessage = + "AWS permissions issue - credentials do not have access to this DynamoDB table"; + } + + // If validation error + if (err.name === "ValidationException") { + logger.error(`DynamoDB validation error: ${err.message}`); + helpMessage = `DynamoDB validation error: ${err.message} - check table schema or partition key`; + } + + // Try to log more details if available + try { + logger.error(JSON.stringify(err, null, 2)); + } catch (e) { + logger.error("Could not stringify error object"); + } - // Return all runs, with the most recent runs first - const sortedRuns = Object.entries(allRuns) - .sort(([keyA, runA], [keyB, runB]) => { - // Sort by created_date in descending order (newest first) - const dateA = runA.created_date || ''; - const dateB = runB.created_date || ''; - return dateB.localeCompare(dateA); - }) - .reduce((acc, [key, value]) => { - acc[key] = value; - return acc; - }, {} as Record); - - // Return the results - return res.json({ - status: "success", - message: "LLM topics retrieved successfully", - report_id: report_id, - conversation_id: conversation_id, - runs: sortedRuns - }); - }) - .catch(err => { - // Check if this is a "table not found" error - if (err.name === "ResourceNotFoundException") { - logger.warn(`DynamoDB table not found: Delphi_CommentClustersLLMTopicNames`); return res.json({ - status: "success", - message: "Delphi topic service not available yet", - hint: "The table may need to be created by running the Delphi pipeline", + status: "success", // Use success to avoid frontend errors + message: "Error querying DynamoDB", + error: err.message, + error_type: err.name, + help: helpMessage, report_id: report_id, conversation_id: conversation_id, - topics: {} + topics: {}, // Return empty topics to avoid client-side errors }); - } - - // Log detailed error information - logger.error(`Error querying DynamoDB: ${err.message}`); - logger.error(`Error type: ${err.name}`); - logger.error(`Error code: ${err.$metadata?.httpStatusCode}`); - - // Format a helpful message based on the error type - let helpMessage = ""; - - // Check credentials error - if (err.name === "CredentialsProviderError") { - logger.error("AWS credential issue - check environment variables"); - helpMessage = "AWS credential issue - check AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment variables"; - } - - // Check connection error - if (err.name === "NetworkingError") { - logger.error(`Network error connecting to DynamoDB endpoint: ${dynamoDBConfig.endpoint || "default"}`); - helpMessage = `Network error connecting to DynamoDB at ${dynamoDBConfig.endpoint || "default"} - check if DynamoDB service is running and accessible`; - } - - // Check permissions error - if (err.name === "AccessDeniedException") { - logger.error("AWS permissions issue - credentials do not have access to this DynamoDB table"); - helpMessage = "AWS permissions issue - credentials do not have access to this DynamoDB table"; - } - - // If validation error - if (err.name === "ValidationException") { - logger.error(`DynamoDB validation error: ${err.message}`); - helpMessage = `DynamoDB validation error: ${err.message} - check table schema or partition key`; - } - - // Try to log more details if available - try { - logger.error(JSON.stringify(err, null, 2)); - } catch (e) { - logger.error("Could not stringify error object"); - } - - return res.json({ - status: "success", // Use success to avoid frontend errors - message: "Error querying DynamoDB", - error: err.message, - error_type: err.name, - help: helpMessage, - report_id: report_id, - conversation_id: conversation_id, - topics: {} // Return empty topics to avoid client-side errors }); - }); } }) - .catch(err => { + .catch((err) => { logger.error(`Error in delphi endpoint: ${err}`); return res.json({ status: "error", message: "Error processing request", error: err.message, - report_id: report_id + report_id: report_id, }); }); -} \ No newline at end of file +} diff --git a/server/src/routes/delphi/visualizations.ts b/server/src/routes/delphi/visualizations.ts index 854da926db..949998fa7f 100644 --- a/server/src/routes/delphi/visualizations.ts +++ b/server/src/routes/delphi/visualizations.ts @@ -2,32 +2,40 @@ import { Request, Response } from "express"; import logger from "../../utils/logger"; import { getZidFromReport } from "../../utils/parameter"; import { DynamoDBClient } from "@aws-sdk/client-dynamodb"; -import { DynamoDBDocumentClient, QueryCommand, ScanCommand } from "@aws-sdk/lib-dynamodb"; -import { - S3Client, - ListObjectsV2Command, - GetObjectCommand +import { + DynamoDBDocumentClient, + QueryCommand, + ScanCommand, +} from "@aws-sdk/lib-dynamodb"; +import { + S3Client, + ListObjectsV2Command, + GetObjectCommand, } from "@aws-sdk/client-s3"; import { getSignedUrl } from "@aws-sdk/s3-request-presigner"; +import Config from "../../config"; /** * Handler for Delphi API route that retrieves visualization information */ -export async function handle_GET_delphi_visualizations(req: Request, res: Response) { +export async function handle_GET_delphi_visualizations( + req: Request, + res: Response +) { logger.info("Delphi visualizations API request received"); - + try { // Get report_id from request const report_id = req.query.report_id as string; const jobId = req.query.job_id as string; - + if (!report_id) { - return res.json({ - status: "error", - message: "report_id is required" + return res.json({ + status: "error", + message: "report_id is required", }); } - + // Extract zid from report_id let zid; try { @@ -37,37 +45,39 @@ export async function handle_GET_delphi_visualizations(req: Request, res: Respon return res.json({ status: "error", message: "Could not find conversation for report_id", - report_id + report_id, }); } - + if (!zid) { return res.json({ status: "error", message: "Could not find conversation for report_id", - report_id + report_id, }); } const conversation_id = zid.toString(); - logger.info(`Fetching visualizations for report_id: ${report_id}, conversation_id: ${conversation_id}`); + logger.info( + `Fetching visualizations for report_id: ${report_id}, conversation_id: ${conversation_id}` + ); // Configure S3 client const s3Config: any = { - region: process.env.AWS_REGION || "us-east-1", - endpoint: process.env.AWS_S3_ENDPOINT || "http://minio:9000", + region: Config.AWS_REGION || "us-east-1", + endpoint: Config.AWS_S3_ENDPOINT || "http://minio:9000", credentials: { - accessKeyId: process.env.AWS_S3_ACCESS_KEY_ID || "minioadmin", - secretAccessKey: process.env.AWS_S3_SECRET_ACCESS_KEY || "minioadmin" + accessKeyId: Config.AWS_S3_ACCESS_KEY_ID || "minioadmin", + secretAccessKey: Config.AWS_S3_SECRET_ACCESS_KEY || "minioadmin", }, - forcePathStyle: true // Required for MinIO + forcePathStyle: true, // Required for MinIO }; // Log S3 connection info logger.info(`S3 Config: Endpoint: ${s3Config.endpoint} Region: ${s3Config.region} - Bucket: ${process.env.AWS_S3_BUCKET_NAME || "delphi"} + Bucket: ${Config.AWS_S3_BUCKET_NAME || "polis-delphi"} `); // Create S3 client @@ -80,16 +90,16 @@ export async function handle_GET_delphi_visualizations(req: Request, res: Respon status: "error", message: "Failed to initialize S3 client", error: err.message || String(err), - report_id + report_id, }); } - - const bucketName = process.env.AWS_S3_BUCKET_NAME || "delphi"; + + const bucketName = Config.AWS_S3_BUCKET_NAME || "polis-delphi"; // Define S3 path prefix to search // Use conversation_id instead of report_id since files are stored by conversation_id - const prefix = jobId - ? `visualizations/${conversation_id}/${jobId}/` + const prefix = jobId + ? `visualizations/${conversation_id}/${jobId}/` : `visualizations/${conversation_id}/`; // Get job metadata from DynamoDB if available @@ -107,17 +117,25 @@ export async function handle_GET_delphi_visualizations(req: Request, res: Respon const listObjectsParams = { Bucket: bucketName, Prefix: prefix, - MaxKeys: 1000 // Increase if you expect more than 1000 objects + MaxKeys: 1000, // Increase if you expect more than 1000 objects }; - + // Enhanced logging for debugging - logger.info(`Listing S3 objects with params: ${JSON.stringify(listObjectsParams)}`); + logger.info( + `Listing S3 objects with params: ${JSON.stringify(listObjectsParams)}` + ); try { - s3Response = await s3Client.send(new ListObjectsV2Command(listObjectsParams)); - + s3Response = await s3Client.send( + new ListObjectsV2Command(listObjectsParams) + ); + // Log successful response - logger.info(`S3 listing successful. Found ${s3Response.Contents?.length || 0} objects.`); + logger.info( + `S3 listing successful. Found ${ + s3Response.Contents?.length || 0 + } objects.` + ); if (s3Response.Contents && s3Response.Contents.length > 0) { // Log first few keys for debugging const keys = s3Response.Contents.slice(0, 3).map(obj => obj.Key); @@ -136,10 +154,10 @@ export async function handle_GET_delphi_visualizations(req: Request, res: Respon message: "Error listing visualizations", error: err.message || String(err), report_id, - conversation_id + conversation_id, }); } - + // Check if files were found if (!s3Response.Contents || s3Response.Contents.length === 0) { return res.json({ @@ -148,53 +166,59 @@ export async function handle_GET_delphi_visualizations(req: Request, res: Respon report_id, conversation_id, visualizations: [], - jobs: jobMetadata + jobs: jobMetadata, }); } // Group visualizations by job const visualizationsByJob: Record = {}; - + // Process each object for (const obj of s3Response.Contents) { const key = obj.Key || ""; - + // Parse job ID from the key // Expected format: visualizations/{report_id}/{job_id}/layer_{layer_id}_datamapplot.html - const keyParts = key.split('/'); - + const keyParts = key.split("/"); + if (keyParts.length < 4) continue; // Skip if doesn't match expected format - + const currentJobId = keyParts[2]; const fileName = keyParts[3]; - + // Skip if not an HTML file - if (!fileName.endsWith('.html') && !fileName.endsWith('.png') && !fileName.endsWith('.svg')) { + if ( + !fileName.endsWith(".html") && + !fileName.endsWith(".png") && + !fileName.endsWith(".svg") + ) { continue; } - + // Parse layer ID const layerMatch = fileName.match(/layer_(\d+)/); const layerId = layerMatch ? parseInt(layerMatch[1]) : null; - + if (layerId === null) continue; // Skip if can't determine layer - + // Generate a signed URL for this object let url; try { const getObjectParams = { Bucket: bucketName, - Key: key + Key: key, }; - + // Instead of using presigned URLs that don't work across network boundaries, // just return a direct URL to the object that can be accessed from the browser url = `http://localhost:9000/${bucketName}/${key}`; } catch (err: any) { - logger.error(`Error generating signed URL for ${key}: ${err.message || err}`); + logger.error( + `Error generating signed URL for ${key}: ${err.message || err}` + ); continue; // Skip this file and continue } - + // Determine visualization type let type = "unknown"; if (fileName.includes("datamapplot.html")) { @@ -206,12 +230,12 @@ export async function handle_GET_delphi_visualizations(req: Request, res: Respon } else if (fileName.includes("static.svg")) { type = "static_svg"; } - + // Initialize job array if needed if (!visualizationsByJob[currentJobId]) { visualizationsByJob[currentJobId] = []; } - + // Add to the job's visualizations visualizationsByJob[currentJobId].push({ key, @@ -219,29 +243,31 @@ export async function handle_GET_delphi_visualizations(req: Request, res: Respon layerId, type, lastModified: obj.LastModified, - size: obj.Size + size: obj.Size, }); } - + // Sort visualizations by layer ID - Object.values(visualizationsByJob).forEach(visArray => { + Object.values(visualizationsByJob).forEach((visArray) => { visArray.sort((a, b) => (a.layerId || 0) - (b.layerId || 0)); }); - + // Combine job metadata with visualizations - const jobsWithVisualizations = Object.keys(visualizationsByJob).map(jobId => { - const jobInfo = jobMetadata[jobId] || { - jobId, - status: "unknown", - createdAt: null - }; - - return { - ...jobInfo, - visualizations: visualizationsByJob[jobId] - }; - }); - + const jobsWithVisualizations = Object.keys(visualizationsByJob).map( + (jobId) => { + const jobInfo = jobMetadata[jobId] || { + jobId, + status: "unknown", + createdAt: null, + }; + + return { + ...jobInfo, + visualizations: visualizationsByJob[jobId], + }; + } + ); + // Sort jobs by createdAt (newest first) jobsWithVisualizations.sort((a, b) => { const dateA = a.createdAt ? new Date(a.createdAt).getTime() : 0; @@ -255,9 +281,8 @@ export async function handle_GET_delphi_visualizations(req: Request, res: Respon message: "Visualizations retrieved successfully", report_id, conversation_id, - jobs: jobsWithVisualizations + jobs: jobsWithVisualizations, }); - } catch (err: any) { logger.error(`Error in delphi visualizations endpoint: ${err.message}`); logger.error(err.stack); @@ -265,7 +290,7 @@ export async function handle_GET_delphi_visualizations(req: Request, res: Respon status: "error", message: "Error processing request", error: err.message, - report_id: req.query.report_id as string + report_id: req.query.report_id as string, }); } } @@ -273,16 +298,18 @@ export async function handle_GET_delphi_visualizations(req: Request, res: Respon /** * Fetch job metadata from DynamoDB */ -async function fetchJobMetadata(report_id: string, conversation_id: string): Promise> { +async function fetchJobMetadata( + report_id: string, + conversation_id: string +): Promise> { try { // Configure DynamoDB client const dynamoDBConfig: any = { - region: process.env.AWS_REGION || "us-east-1", - endpoint: "http://dynamodb:8000", // Local DynamoDB + region: Config.AWS_REGION || "us-east-1", credentials: { - accessKeyId: 'DUMMYIDEXAMPLE', - secretAccessKey: 'DUMMYEXAMPLEKEY' - } + accessKeyId: "DUMMYIDEXAMPLE", + secretAccessKey: "DUMMYEXAMPLEKEY", + }, }; // Create DynamoDB clients @@ -291,7 +318,7 @@ async function fetchJobMetadata(report_id: string, conversation_id: string): Pro marshallOptions: { convertEmptyValues: true, removeUndefinedValues: true, - } + }, }); // Scan for jobs by conversation ID (using scan instead of query since the index may not exist) @@ -300,19 +327,19 @@ async function fetchJobMetadata(report_id: string, conversation_id: string): Pro TableName: "Delphi_JobQueue", FilterExpression: "conversation_id = :cid", ExpressionAttributeValues: { - ":cid": conversation_id - } + ":cid": conversation_id, + }, }; try { logger.info(`Scanning for jobs with conversation_id: ${conversation_id}`); const scanResponse = await docClient.send(new ScanCommand(scanParams)); - + if (!scanResponse.Items || scanResponse.Items.length === 0) { logger.info(`No jobs found for conversation ${conversation_id}`); return {}; } - + // Process jobs from scan return processJobItems(scanResponse.Items); } catch (err: any) { @@ -330,19 +357,19 @@ async function fetchJobMetadata(report_id: string, conversation_id: string): Pro */ function processJobItems(items: any[]): Record { const jobMap: Record = {}; - + for (const item of items) { const job_id = item.job_id; - + jobMap[job_id] = { jobId: job_id, status: item.status || "unknown", createdAt: item.created_at || null, startedAt: item.started_at || null, completedAt: item.completed_at || null, - results: item.job_results ? JSON.parse(item.job_results) : null + results: item.job_results ? JSON.parse(item.job_results) : null, }; } - + return jobMap; -} \ No newline at end of file +}