From 6164de97f332d167c5cecf08c773dda241438331 Mon Sep 17 00:00:00 2001 From: lucasmelogithub Date: Fri, 4 Oct 2024 08:33:05 -0500 Subject: [PATCH 1/3] Add OPEA ChatQna example --- examples/gen-ai-xeon-opea-chatqna/README.md | 66 +++++++++++++++++++ .../gen-ai-xeon-opea-chatqna/cloud_init.yml | 18 +++++ examples/gen-ai-xeon-opea-chatqna/main.tf | 38 +++++++++++ .../gen-ai-xeon-opea-chatqna/variables.tf | 12 ++++ main.tf | 4 +- variables.tf | 4 +- versions.tf | 2 +- 7 files changed, 140 insertions(+), 4 deletions(-) create mode 100644 examples/gen-ai-xeon-opea-chatqna/README.md create mode 100644 examples/gen-ai-xeon-opea-chatqna/cloud_init.yml create mode 100644 examples/gen-ai-xeon-opea-chatqna/main.tf create mode 100644 examples/gen-ai-xeon-opea-chatqna/variables.tf diff --git a/examples/gen-ai-xeon-opea-chatqna/README.md b/examples/gen-ai-xeon-opea-chatqna/README.md new file mode 100644 index 0000000..acfb368 --- /dev/null +++ b/examples/gen-ai-xeon-opea-chatqna/README.md @@ -0,0 +1,66 @@ +

+ Intel Logo +

+ +# Intel® Optimized Cloud Modules for Terraform + +© Copyright 2024, Intel Corporation + +## GCP C4 Instance with 5th Generation Intel® Xeon® Scalable Processor (Emerald Rapids) & Open Platform for Enterprise AI (OPEA) ChatQnA Example + +This demo will showcase Retrieval Augmented Generation (RAG) CPU inference using 5th Gen Xeon Scalable Processors on GCP using the OPEA ChatQnA Example. For more information about OPEA, go [here](https://opea.dev/). For more information on this specific example, go [here](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA). + +## Usage + +## After cloning the repo, modify /examples/gen-ai-xeon-opea-chatqna/variables.tf to add your Huggingface Token + +Some Models required a Token. Modify the Huggingface Token variable to your specific Huggingface Token, for information on creating a Huggingface token go [here](https://huggingface.co/docs/hub/en/security-tokens) + +```hcl +variable "huggingface_token" { + description = "Huggingface Token" + default = " " + type = string +} +``` + +**If needed, also modify values on /examples/gen-ai-xeon-opea-chatqna/main.tf to match your needs** + +## GCP Cloud Shell Usage + +1. Log on to GCP Portal +2. Enter the GCP Cloud Shell (terminal button on top right of page) +3. Run the following commands in order: + + +```bash +git clone https://github.com/intel/terraform-intel-gcp-vm.git +cd terraform-intel-gcp-vm/examples/gen-ai-xeon-opea-chatqna +# ADD TOKEN TO variables.tf (SEE ABOVE) +terraform init +terraform plan +terraform apply + +# (enter your GCP project ID and "yes" to confirm) + +``` + +After the Terraform module successfully creates the GCP VM instance, **wait ~15 minutes** for the module to launch the containers and download the LLMs before continuing. + +## Accessing the Demo + +You can access the demos using the following: + +- OPEA ChatQnA: `http://yourpublicip:5174` + + +## Deleting the Demo + +To delete the demo, run `terraform destroy` to delete all resources created. + +## Pre-requisites for running on a local Workstation (disregard if using GCP Cloud Shell) + +1. Google Cloud CLI: https://cloud.google.com/sdk/docs/install +2. CGP account access configured: https://registry.terraform.io/providers/hashicorp/google/latest/docs/guides/provider_reference.html#running-terraform-on-your-workstation +3. Terraform: https://learn.hashicorp.com/tutorials/terraform/install-cli +4. Git: https://git-scm.com/book/en/v2/Getting-Started-Installing-Git diff --git a/examples/gen-ai-xeon-opea-chatqna/cloud_init.yml b/examples/gen-ai-xeon-opea-chatqna/cloud_init.yml new file mode 100644 index 0000000..47b0621 --- /dev/null +++ b/examples/gen-ai-xeon-opea-chatqna/cloud_init.yml @@ -0,0 +1,18 @@ +#cloud-config +package_update: true +package_upgrade: true + +package: + - git + +runcmd: + - apt install ansible -y + - git clone https://github.com/intel/optimized-cloud-recipes.git /tmp/optimized-cloud-recipes + - cd /tmp/optimized-cloud-recipes/recipes/ai-opea-chatqna-xeon + - cp opea.sh /etc/profile.d/opea.sh + - echo 'export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}' | sudo tee -a /etc/profile.d/opea.sh + - chmod +x /etc/profile.d/opea.sh + - source /etc/profile.d/opea.sh + - ansible-playbook recipe.yml + + diff --git a/examples/gen-ai-xeon-opea-chatqna/main.tf b/examples/gen-ai-xeon-opea-chatqna/main.tf new file mode 100644 index 0000000..f5158ad --- /dev/null +++ b/examples/gen-ai-xeon-opea-chatqna/main.tf @@ -0,0 +1,38 @@ +#Random ID to minimize the chances of name conflicts +resource "random_id" "rid" { + byte_length = 3 +} + +#GCP Linux VM with Intel OPEA ChatQnA +module "linux_vm" { + source = "../.." #intel/gcp-vm/intel + project = var.project + boot_image_project = "ubuntu-os-cloud" + boot_image_family = "ubuntu-2204-lts" + name = "lmelo-ai-opea-chatqna-${random_id.rid.dec}" + zone = "us-east4-a" #"us-central1-a" + machine_type = "c4-highcpu-48" + allow_stopping_for_update = true + tags = ["lmelo-ai-opea-chatqna-${random_id.rid.dec}"] + user_data = templatefile("./cloud_init.yml", { HUGGINGFACEHUB_API_TOKEN = var.huggingface_token }) + access_config = [{ + nat_ip = null + public_ptr_domain_name = null + network_tier = "PREMIUM" + }, ] +} + +#Required firewall rules +resource "google_compute_firewall" "rules" { + project = var.project + name = "lmelo-ai-opea-chatqna-${random_id.rid.dec}" + network = "default" + description = "Allows access to OPEA AI ChatQnA" + + allow { + protocol = "tcp" + ports = ["22","80", "443", "6379", "8001", "6006", "6007", "6000", "7000", "8808", "8000", "8888", "5173", "5174", "9009", "9000"] + } + source_ranges = ["0.0.0.0/0"] + target_tags = ["lmelo-ai-opea-chatqna-${random_id.rid.dec}"] +} diff --git a/examples/gen-ai-xeon-opea-chatqna/variables.tf b/examples/gen-ai-xeon-opea-chatqna/variables.tf new file mode 100644 index 0000000..ab1f16c --- /dev/null +++ b/examples/gen-ai-xeon-opea-chatqna/variables.tf @@ -0,0 +1,12 @@ +variable "project" { + type = string + default = "551221341017" + description = "Enter GCP Project ID" +} + +# Variable for Huggingface Token +variable "huggingface_token" { + description = "Enter a Huggingface Token to be used to download the models" + default = "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" + type = string +} \ No newline at end of file diff --git a/main.tf b/main.tf index 81a52fd..d238501 100644 --- a/main.tf +++ b/main.tf @@ -7,12 +7,14 @@ # Intel CPU, we are not populating the min CPU platform. We are using the default CPU platform that GCP will provide for these older generation of instances locals { - machine_type_regex = "^([cemn][123u])" + machine_type_regex = "^([cemn][1234u])" machine_types = { "n2": "Intel Ice Lake", "c3": "Intel Sapphire Rapids", "m3": "Intel Ice Lake", "c2": "Intel Cascade Lake" + "c4": null + "n4": null "n1": null "m1": null "m2": null diff --git a/variables.tf b/variables.tf index 9cba265..b62ab82 100644 --- a/variables.tf +++ b/variables.tf @@ -103,13 +103,13 @@ variable "boot_image_family" { variable "boot_disk_size" { type = number description = "Size of the OS disk" - default = 100 + default = 500 } variable "boot_disk_type" { type = string description = "Disk type associated with the OS disk. Values can be either pd-ssd, local-ssd, or pd-standard" - default = "pd-ssd" + default = null } variable "boot_image_project" { diff --git a/versions.tf b/versions.tf index 74804cc..2743115 100644 --- a/versions.tf +++ b/versions.tf @@ -4,7 +4,7 @@ terraform { required_providers { google = { source = "hashicorp/google" - version = "~> 5.11" + version = "~> 6.5" } } } From 76feccc3a413f81ee47028cbbc7caf2823d1d57e Mon Sep 17 00:00:00 2001 From: lucasmelogithub Date: Fri, 4 Oct 2024 08:36:01 -0500 Subject: [PATCH 2/3] Clean main.tf and variables.tf --- examples/gen-ai-xeon-opea-chatqna/main.tf | 12 ++++++------ examples/gen-ai-xeon-opea-chatqna/variables.tf | 3 +-- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/examples/gen-ai-xeon-opea-chatqna/main.tf b/examples/gen-ai-xeon-opea-chatqna/main.tf index f5158ad..f7c2a54 100644 --- a/examples/gen-ai-xeon-opea-chatqna/main.tf +++ b/examples/gen-ai-xeon-opea-chatqna/main.tf @@ -5,15 +5,15 @@ resource "random_id" "rid" { #GCP Linux VM with Intel OPEA ChatQnA module "linux_vm" { - source = "../.." #intel/gcp-vm/intel + source = "intel/gcp-vm/intel" project = var.project boot_image_project = "ubuntu-os-cloud" boot_image_family = "ubuntu-2204-lts" - name = "lmelo-ai-opea-chatqna-${random_id.rid.dec}" - zone = "us-east4-a" #"us-central1-a" + name = "ai-opea-chatqna-${random_id.rid.dec}" + zone = "us-east4-a" machine_type = "c4-highcpu-48" allow_stopping_for_update = true - tags = ["lmelo-ai-opea-chatqna-${random_id.rid.dec}"] + tags = ["ai-opea-chatqna-${random_id.rid.dec}"] user_data = templatefile("./cloud_init.yml", { HUGGINGFACEHUB_API_TOKEN = var.huggingface_token }) access_config = [{ nat_ip = null @@ -25,7 +25,7 @@ module "linux_vm" { #Required firewall rules resource "google_compute_firewall" "rules" { project = var.project - name = "lmelo-ai-opea-chatqna-${random_id.rid.dec}" + name = "ai-opea-chatqna-${random_id.rid.dec}" network = "default" description = "Allows access to OPEA AI ChatQnA" @@ -34,5 +34,5 @@ resource "google_compute_firewall" "rules" { ports = ["22","80", "443", "6379", "8001", "6006", "6007", "6000", "7000", "8808", "8000", "8888", "5173", "5174", "9009", "9000"] } source_ranges = ["0.0.0.0/0"] - target_tags = ["lmelo-ai-opea-chatqna-${random_id.rid.dec}"] + target_tags = ["ai-opea-chatqna-${random_id.rid.dec}"] } diff --git a/examples/gen-ai-xeon-opea-chatqna/variables.tf b/examples/gen-ai-xeon-opea-chatqna/variables.tf index ab1f16c..9c0608f 100644 --- a/examples/gen-ai-xeon-opea-chatqna/variables.tf +++ b/examples/gen-ai-xeon-opea-chatqna/variables.tf @@ -1,12 +1,11 @@ variable "project" { type = string - default = "551221341017" description = "Enter GCP Project ID" } # Variable for Huggingface Token variable "huggingface_token" { description = "Enter a Huggingface Token to be used to download the models" - default = "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" + default = " " type = string } \ No newline at end of file From c578649f631f5a174bbd56a6bbfc942e2b64d321 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Fri, 4 Oct 2024 13:38:24 +0000 Subject: [PATCH 3/3] terraform-docs: automated action --- README.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 8d3fa45..67578c9 100644 --- a/README.md +++ b/README.md @@ -182,13 +182,13 @@ Note that this example may create resources. Run `terraform destroy` when you do | Name | Version | |------|---------| | [terraform](#requirement\_terraform) | >= 1.6 | -| [google](#requirement\_google) | ~> 5.11 | +| [google](#requirement\_google) | ~> 6.5 | ## Providers | Name | Version | |------|---------| -| [google](#provider\_google) | ~> 5.11 | +| [google](#provider\_google) | ~> 6.5 | ## Modules @@ -205,16 +205,16 @@ No modules. | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| -| [access\_config](#input\_access\_config) | Access configurations, i.e. IPs via which this instance can be accessed via the Internet. Omit to ensure that the instance is not accessible from the Internet. If omitted, ssh provisioners will not work unless Terraform can send traffic to the instance's network. This can be represented as multiple maps |
list(object({
nat_ip = optional(string, null)
public_ptr_domain_name = optional(string)
network_tier = optional(string)
}))
| `[]` | no | +| [access\_config](#input\_access\_config) | Access configurations, i.e. IPs via which this instance can be accessed via the Internet. Omit to ensure that the instance is not accessible from the Internet. If omitted, ssh provisioners will not work unless Terraform can send traffic to the instance's network. This can be represented as multiple maps |
list(object({
nat_ip = optional(string, null)
public_ptr_domain_name = optional(string)
network_tier = optional(string)
}))
| `[]` | no | | [allow\_stopping\_for\_update](#input\_allow\_stopping\_for\_update) | If true, allows Terraform to stop the instance to update its properties | `bool` | `null` | no | | [automatic\_restart](#input\_automatic\_restart) | Specifies if the instance should be restarted if it was terminated by Compute Engine (not a user). | `bool` | `true` | no | | [boot\_disk\_auto\_delete](#input\_boot\_disk\_auto\_delete) | Whether the disk will be auto-deleted when the instance is deleted. | `bool` | `true` | no | | [boot\_disk\_byo\_encryption\_key](#input\_boot\_disk\_byo\_encryption\_key) | A 256-bit [customer-supplied encryption key] (https://cloud.google.com/compute/docs/disks/customer-supplied-encryption), encoded in RFC 4648 base64 to encrypt this disk. | `string` | `null` | no | | [boot\_disk\_labels](#input\_boot\_disk\_labels) | A set of key/value label pairs assigned to the disk. This field is only applicable for persistent disks. | `map(string)` | `{}` | no | | [boot\_disk\_mode](#input\_boot\_disk\_mode) | The mode in which to attach this disk, either READ\_WRITE or READ\_ONLY. | `string` | `"READ_WRITE"` | no | -| [boot\_disk\_size](#input\_boot\_disk\_size) | Size of the OS disk | `number` | `100` | no | +| [boot\_disk\_size](#input\_boot\_disk\_size) | Size of the OS disk | `number` | `500` | no | | [boot\_disk\_source](#input\_boot\_disk\_source) | The name or self\_link of the existing disk (such as those managed by google\_compute\_disk) or disk image. | `string` | `null` | no | -| [boot\_disk\_type](#input\_boot\_disk\_type) | Disk type associated with the OS disk. Values can be either pd-ssd, local-ssd, or pd-standard | `string` | `"pd-ssd"` | no | +| [boot\_disk\_type](#input\_boot\_disk\_type) | Disk type associated with the OS disk. Values can be either pd-ssd, local-ssd, or pd-standard | `string` | `null` | no | | [boot\_image\_family](#input\_boot\_image\_family) | The image from which to initialize this disk | `string` | `"ubuntu-2204-lts"` | no | | [boot\_image\_project](#input\_boot\_image\_project) | The ID of the project in which the source image resides. | `string` | `"ubuntu-os-cloud"` | no | | [can\_ip\_forward](#input\_can\_ip\_forward) | Conditional that allows sending and receiving of packets with non-matching source or destination IPs. | `bool` | `false` | no | @@ -226,7 +226,7 @@ No modules. | [enable\_secure\_boot](#input\_enable\_secure\_boot) | Verify the digital signature of all boot components, and halt the boot process if signature verification fails. | `bool` | `false` | no | | [enable\_vtpm](#input\_enable\_vtpm) | Use a virtualized trusted platform module, which is a specialized computer chip you can use to encrypt objects like keys and certificates. | `bool` | `true` | no | | [hostname](#input\_hostname) | A custom hostname for the instance. Must be a fully qualified DNS name and RFC-1035-valid | `string` | `null` | no | -| [ipv6\_access\_config](#input\_ipv6\_access\_config) | Access configurations, i.e. IPs via which this instance can be accessed via the Internet. Omit to ensure that the instance is not accessible from the Internet. If omitted, ssh provisioners will not work unless Terraform can send traffic to the instance's network. This can be represented as multiple maps |
list(object({
public_ptr_domain_name = optional(string, null)
network_tier = optional(string, null)
}))
| `[]` | no | +| [ipv6\_access\_config](#input\_ipv6\_access\_config) | Access configurations, i.e. IPs via which this instance can be accessed via the Internet. Omit to ensure that the instance is not accessible from the Internet. If omitted, ssh provisioners will not work unless Terraform can send traffic to the instance's network. This can be represented as multiple maps |
list(object({
public_ptr_domain_name = optional(string, null)
network_tier = optional(string, null)
}))
| `[]` | no | | [machine\_type](#input\_machine\_type) | The machine type to create | `string` | `"c3-standard-4"` | no | | [name](#input\_name) | A unique name for the resource, required by GCE. Changing this forces a new resource to be created. | `string` | n/a | yes | | [network](#input\_network) | The name or self\_link of the network to attach this interface to. | `string` | `"default"` | no | @@ -236,7 +236,7 @@ No modules. | [preemptible](#input\_preemptible) | Specifies if the instance is preemptible. If this field is set to true, then automatic\_restart must be set to false. | `bool` | `false` | no | | [project](#input\_project) | The ID of the project in which the resource resides. | `string` | `""` | no | | [provisioning\_model](#input\_provisioning\_model) | Describe the type of preemptible VM. This field accepts the value STANDARD or SPOT | `string` | `"STANDARD"` | no | -| [service\_account](#input\_service\_account) | Service account and scopes that will be associated with the GCE instance. |
object({
service_email = optional(string, null)
scopes = optional(set(string), [])
})
| `{}` | no | +| [service\_account](#input\_service\_account) | Service account and scopes that will be associated with the GCE instance. |
object({
service_email = optional(string, null)
scopes = optional(set(string), [])
})
| `{}` | no | | [stack\_type](#input\_stack\_type) | he stack type for this network interface to identify whether the IPv6 feature is enabled or not. | `string` | `"IPV4_ONLY"` | no | | [subnetwork](#input\_subnetwork) | The name or self\_link of the subnetwork to attach this interface to. Either network or subnetwork must be provided. | `string` | `null` | no | | [subnetwork\_project](#input\_subnetwork\_project) | The project in which the subnetwork belongs. If the subnetwork is a name and this field is not provided, the provider project is used. | `string` | `null` | no |