Windows Integration Tests #235
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Workflow intended to run containerd integration tests on Windows. | |
name: Windows Integration Tests | |
on: | |
workflow_dispatch: | |
workflow_call: | |
secrets: | |
AZURE_SUB_ID: | |
required: true | |
AZURE_CREDS: | |
required: true | |
GCP_SERVICE_ACCOUNT: | |
required: true | |
GCP_WORKLOAD_IDENTITY_PROVIDER: | |
required: true | |
env: | |
AZURE_DEFAULT_LOCATION: westeurope | |
AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUB_ID }} | |
AZURE_DEFAULT_VM_SIZE: Standard_D2s_v3 | |
PASSWORD: Passw0rdAdmin # temp for testing, will be generated | |
DEFAULT_ADMIN_USERNAME: azureuser | |
SSH_OPTS: "-o ServerAliveInterval=20 -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null" | |
REMOTE_VM_BIN_PATH: "c:\\containerd\\bin" | |
BUSYBOX_TESTING_IMAGE_REF: "registry.k8s.io/e2e-test-images/busybox:1.29-2" | |
RESOURCE_CONSUMER_TESTING_IMAGE_REF: "registry.k8s.io/e2e-test-images/resource-consumer:1.10" | |
WEBSERVER_TESTING_IMAGE_REF: "registry.k8s.io/e2e-test-images/nginx:1.14-2" | |
permissions: # added using https://github.com/step-security/secure-workflows | |
contents: read | |
jobs: | |
winIntegration: | |
# NOTE: the following permissions are required by `google-github-actions/auth`: | |
permissions: | |
contents: 'read' | |
id-token: 'write' | |
strategy: | |
# NOTE(aznashwan): this will permit all other jobs from the matrix to finish and | |
# upload their results even if one has a failing non-test-task: | |
# (e.g. hitting resource limits in the `AZTestVMCreate` task) | |
fail-fast: false | |
matrix: | |
win_ver: [ltsc2019, ltsc2022] | |
include: | |
- win_ver: ltsc2019 | |
AZURE_IMG: "MicrosoftWindowsServer:WindowsServer:2019-Datacenter:17763.4377.230505" | |
AZURE_RESOURCE_GROUP: ctrd-integration-ltsc2019-${{ github.run_id }} | |
GOOGLE_BUCKET: "containerd-integration/logs/windows-ltsc2019/" | |
- win_ver: ltsc2022 | |
AZURE_IMG: "MicrosoftWindowsServer:WindowsServer:2022-datacenter-smalldisk-g2:20348.1607.230310" | |
AZURE_RESOURCE_GROUP: ctrd-integration-ltsc2022-${{ github.run_id }} | |
GOOGLE_BUCKET: "containerd-integration/logs/windows-ltsc2022/" | |
runs-on: ubuntu-latest | |
timeout-minutes: 90 | |
steps: | |
- uses: actions/checkout@v3 | |
- name: Install required packages | |
run: | | |
sudo apt-get install xmlstarlet -y | |
- name: PrepareArtifacts | |
run: | | |
STARTED_TIME=$(date +%s) | |
LOGS_DIR=$HOME/$STARTED_TIME | |
echo "STARTED_TIME=$STARTED_TIME" >> $GITHUB_ENV | |
echo "LOGS_DIR=$LOGS_DIR" >> $GITHUB_ENV | |
echo "VM_INTEGRATION_LOGFILE=/c/Logs/integration.log" >> $GITHUB_ENV | |
echo "VM_CRI_INTEGRATION_LOGFILE=/c/Logs/cri-integration.log" >> $GITHUB_ENV | |
mkdir -p $LOGS_DIR/artifacts | |
jq -n --arg node temp --arg timestamp $STARTED_TIME '$timestamp|tonumber|{timestamp:.,$node}' > $LOGS_DIR/started.json | |
- name: Generate ssh key pair | |
run: | | |
mkdir -p $HOME/.ssh/ | |
ssh-keygen -t rsa -b 4096 -C "ci@containerd.com" -f $HOME/.ssh/id_rsa -q -N "" | |
echo "SSH_PUB_KEY=$(cat ~/.ssh/id_rsa.pub)" >> $GITHUB_ENV | |
- name: AZLogin | |
uses: azure/login@v1 | |
with: | |
creds: ${{ secrets.AZURE_CREDS }} | |
- name: AZResourceGroupCreate | |
uses: azure/CLI@v1 | |
with: | |
inlinescript: | | |
az group create -n ${{ matrix.AZURE_RESOURCE_GROUP }} -l ${{ env.AZURE_DEFAULT_LOCATION }} --tags creationTimestamp=$(date -u '+%Y-%m-%dT%H:%M:%SZ') | |
- name: AZTestVMCreate | |
uses: azure/CLI@v1 | |
with: | |
inlinescript: | | |
DETAILS=$(az vm create -n winTestVM --admin-username ${{ env.DEFAULT_ADMIN_USERNAME }} --admin-password ${{ env.PASSWORD }} --image ${{ matrix.AZURE_IMG }} -g ${{ matrix.AZURE_RESOURCE_GROUP }} --nsg-rule SSH --size ${{ env.AZURE_DEFAULT_VM_SIZE }} --public-ip-sku Standard -o json) | |
PUB_IP=$(echo $DETAILS | jq -r .publicIpAddress) | |
if [ "$PUB_IP" == "null" ] | |
then | |
RETRY=0 | |
while [ "$PUB_IP" == "null" ] || [ $RETRY -le 5 ] | |
do | |
sleep 5 | |
PUB_IP=$(az vm show -d -g ${{ matrix.AZURE_RESOURCE_GROUP }} -n winTestVM -o json --query publicIps | jq -r) | |
RETRY=$(( $RETRY + 1 )) | |
done | |
fi | |
if [ "$PUB_IP" == "null" ] | |
then | |
echo "failed to fetch public IP" | |
exit 1 | |
fi | |
echo "VM_PUB_IP=$PUB_IP" >> $GITHUB_ENV | |
- name: EnableAZVMSSH | |
uses: azure/CLI@v1 | |
with: | |
inlinescript: | | |
az vm run-command invoke --command-id RunPowerShellScript -n winTestVM -g ${{ matrix.AZURE_RESOURCE_GROUP }} --scripts @$GITHUB_WORKSPACE/script/setup/enable_ssh_windows.ps1 --parameters 'SSHPublicKey=${{ env.SSH_PUB_KEY }}' | |
- name: TestSSHConnection | |
run: | | |
if ! ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "hostname"; | |
then | |
exit 1 | |
fi | |
- name: InstallContainerFeature | |
run: | | |
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "powershell.exe -command { Install-WindowsFeature -Name 'Containers' -Restart }" | |
- name: WaitForVMToRestart | |
timeout-minutes: 5 | |
run: | | |
# give the vm 30 seconds to actually stop. SSH server might actually respond while server is shutting down. | |
sleep 30 | |
while [ ! $( ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "hostname") ]; | |
do | |
echo "Unable to connect to azurevm" | |
done | |
echo "Connection reestablished. VM restarted succesfully." | |
- name: CreateNatNetwork | |
run: | | |
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "powershell.exe -command { curl.exe -L 'https://raw.githubusercontent.com/microsoft/SDN/master/Kubernetes/windows/hns.psm1' -o hns.psm1 }" | |
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "powershell.exe -command { Import-Module .\hns.psm1 ; New-HnsNetwork -Type NAT -Name nat -AddressPrefix 172.19.208.0/20 -Gateway 172.19.208.1 }" | |
- name: PrepareTestingEnv | |
run: | | |
scp -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} $GITHUB_WORKSPACE/script/setup/prepare_env_windows.ps1 azureuser@${{ env.VM_PUB_IP }}:/prepare_env_windows.ps1 | |
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "c:\\prepare_env_windows.ps1" | |
- name: MakeContainerDBins | |
run: | | |
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "git clone http://github.com/containerd/containerd c:\\containerd " | |
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "cd c:\containerd ; make binaries" | |
- name: RunIntegrationTests | |
id: RunIntegrationTests | |
# NOTE(aznashwan): this is set to continue-on-error to allow the workflow to run until | |
# the reports are converted/uploaded to GCloud so as to show up on testgrid.k8s.io too. | |
continue-on-error: true | |
run: | | |
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "sh.exe -s" << EOF | |
cd /c/containerd | |
export EXTRA_TESTFLAGS="-timeout=20m" | |
set -o pipefail | |
make integration | tee ${{ env.VM_INTEGRATION_LOGFILE }} | |
EOF | |
echo 'SUCCEEDED=1' >> $GITHUB_OUTPUT | |
- name: PrepareRepoList | |
run: | | |
cat > repolist.toml << EOF | |
busybox = "${{ env.BUSYBOX_TESTING_IMAGE_REF }}" | |
ResourceConsumer = "${{ env.RESOURCE_CONSUMER_TESTING_IMAGE_REF }}" | |
EOF | |
cat > cri-test-images.yaml << EOF | |
defaultTestContainerImage: ${{ env.BUSYBOX_TESTING_IMAGE_REF }} | |
webServerTestImage: ${{ env.WEBSERVER_TESTING_IMAGE_REF }} | |
EOF | |
scp -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} repolist.toml azureuser@${{ env.VM_PUB_IP }}:c:/repolist.toml | |
scp -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} cri-test-images.yaml azureuser@${{ env.VM_PUB_IP }}:c:/cri-test-images.yaml | |
- name: RunCRIIntegrationTests | |
id: RunCRIIntegrationTests | |
# NOTE(aznashwan): this is set to continue-on-error to allow the workflow to run until | |
# the reports are converted/uploaded to GCloud so as to show up on testgrid.k8s.io too. | |
continue-on-error: true | |
run: | | |
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "sh.exe -s" <<EOF | |
cd c:/containerd | |
./script/setup/install-cni-windows | |
ls '/c/Program Files/containerd/cni/conf/' | |
cat '/c/Program Files/containerd/cni/conf/'* | |
export TEST_IMAGE_LIST=c:/repolist.toml | |
set -o pipefail | |
make cri-integration | tee ${{ env.VM_CRI_INTEGRATION_LOGFILE }} | |
EOF | |
echo 'SUCCEEDED=1' >> $GITHUB_OUTPUT | |
- name: GetCritestRepo | |
run: | | |
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "git clone https://github.com/kubernetes-sigs/cri-tools c:/cri-tools" | |
- name: BuildCritest | |
run: | | |
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "sh.exe -c 'cd /c/cri-tools && make critest'" | |
- name: RunCritest | |
id: RunCritest | |
# NOTE(aznashwan): this is set to continue-on-error to allow the workflow to run until | |
# the reports are converted/uploaded to GCloud so as to show up on testgrid.k8s.io too. | |
continue-on-error: true | |
run: | | |
# This test is exceedingly flaky only on ws2022 so skip for now to keep CI happy. | |
# Info: https://github.com/containerd/containerd/issues/6652 | |
SKIP="" | |
if [ '${{ matrix.win_ver }}' == 'ltsc2022' ];then | |
SKIP='-ginkgo.skip="runtime should support exec with tty=true and stdin=true"' | |
fi | |
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "powershell.exe -command { C:\containerd\bin\containerd.exe --log-level=debug --log-file=C:/logs/containerd.log --service-name containerd --register-service ; Set-Service containerd -StartupType Automatic; Start-Service containerd }" | |
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "sh.exe -s" <<EOF | |
sleep 5 | |
set -o pipefail | |
C:/cri-tools/build/bin/windows/amd64/critest.exe $SKIP --runtime-endpoint='npipe://./pipe/containerd-containerd' --test-images-file='c:/cri-test-images.yaml' --report-dir='c:/Logs' -ginkgo.junit-report="C:\Logs\junit_critest.xml" | tee c:/Logs/critest.log | |
EOF | |
echo 'SUCCEEDED=1' >> $GITHUB_OUTPUT | |
- name: PullLogsFromWinNode | |
run: | | |
# Generate JUnit reports from the stdouts of the tests: | |
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "sh.exe -c 'touch ${{ env.VM_INTEGRATION_LOGFILE }}; cat ${{ env.VM_INTEGRATION_LOGFILE }} | go-junit-report.exe > /c/Logs/junit_integration.xml'" | |
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "sh.exe -c 'touch ${{ env.VM_CRI_INTEGRATION_LOGFILE }}; cat ${{ env.VM_CRI_INTEGRATION_LOGFILE }} | go-junit-report.exe > /c/Logs/junit_cri_integration.xml'" | |
# Copy over all the JUnit reports: | |
scp -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }}:c:/Logs/*.xml ${{ env.LOGS_DIR }}/artifacts/ | |
for f in $(ls ${{ env.LOGS_DIR }}/artifacts/*.xml); do | |
xmlstarlet ed -d "/testsuites/testsuite/properties" $f > ${{ env.LOGS_DIR }}/$(basename $f) | |
mv ${{ env.LOGS_DIR }}/$(basename $f) $f | |
done | |
- name: FinishJob | |
run: | | |
jq -n --arg result SUCCESS --arg timestamp $(date +%s) '$timestamp|tonumber|{timestamp:.,$result}' > ${{ env.LOGS_DIR }}/finished.json | |
echo "${{ env.STARTED_TIME }}" > ${{ github.workspace }}/latest-build.txt | |
- name: AssignGcpCreds | |
id: AssignGcpCreds | |
run: | | |
echo 'GCP_SERVICE_ACCOUNT=${{ secrets.GCP_SERVICE_ACCOUNT }}' >> $GITHUB_OUTPUT | |
echo 'GCP_WORKLOAD_IDENTITY_PROVIDER=${{ secrets.GCP_WORKLOAD_IDENTITY_PROVIDER }}' >> $GITHUB_OUTPUT | |
- name: AuthGcp | |
uses: google-github-actions/auth@v0 | |
if: steps.AssignGcpCreds.outputs.GCP_SERVICE_ACCOUNT && steps.AssignGcpCreds.outputs.GCP_WORKLOAD_IDENTITY_PROVIDER | |
with: | |
service_account: ${{ secrets.GCP_SERVICE_ACCOUNT }} | |
workload_identity_provider: ${{ secrets.GCP_WORKLOAD_IDENTITY_PROVIDER }} | |
- name: UploadJobReport | |
uses: google-github-actions/upload-cloud-storage@v0.10.4 | |
if: steps.AssignGcpCreds.outputs.GCP_SERVICE_ACCOUNT && steps.AssignGcpCreds.outputs.GCP_WORKLOAD_IDENTITY_PROVIDER | |
with: | |
path: ${{ github.workspace }}/latest-build.txt | |
destination: ${{ matrix.GOOGLE_BUCKET }} | |
parent: false | |
- name: UploadLogsDir | |
uses: google-github-actions/upload-cloud-storage@v0.10.4 | |
if: steps.AssignGcpCreds.outputs.GCP_SERVICE_ACCOUNT && steps.AssignGcpCreds.outputs.GCP_WORKLOAD_IDENTITY_PROVIDER | |
with: | |
path: ${{ env.LOGS_DIR }} | |
destination: ${{ matrix.GOOGLE_BUCKET }}${{ env.STARTED_TIME}} | |
parent: false | |
- name: Check all CI stages succeeded | |
uses: actions/github-script@v6 | |
with: | |
script: | | |
const stepResults = { | |
RunIntegrationTests: "${{ steps.RunIntegrationTests.outputs.SUCCEEDED }}", | |
RunCRIIntegrationTests: "${{ steps.RunCRIIntegrationTests.outputs.SUCCEEDED }}", | |
RunCritest: "${{ steps.RunCritest.outputs.SUCCEEDED }}", | |
}; | |
let failedTasks = []; | |
for( [step, result] of Object.entries(stepResults) ) { | |
if (result != "1") { | |
failedTasks.push(step); | |
} | |
}; | |
if (failedTasks.length != 0) { | |
core.setFailed(`One or more CI stages have failed. Please review the outputs of the following stepts: ${failedTasks}.`); | |
}; | |
- name: ResourceCleanup | |
if: always() | |
uses: azure/CLI@v1 | |
with: | |
inlinescript: | | |
az group delete -g ${{ matrix.AZURE_RESOURCE_GROUP }} --yes |