Skip to content

Windows Integration Tests #235

Windows Integration Tests

Windows Integration Tests #235

# Workflow intended to run containerd integration tests on Windows.
name: Windows Integration Tests
on:
workflow_dispatch:
workflow_call:
secrets:
AZURE_SUB_ID:
required: true
AZURE_CREDS:
required: true
GCP_SERVICE_ACCOUNT:
required: true
GCP_WORKLOAD_IDENTITY_PROVIDER:
required: true
env:
AZURE_DEFAULT_LOCATION: westeurope
AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUB_ID }}
AZURE_DEFAULT_VM_SIZE: Standard_D2s_v3
PASSWORD: Passw0rdAdmin # temp for testing, will be generated
DEFAULT_ADMIN_USERNAME: azureuser
SSH_OPTS: "-o ServerAliveInterval=20 -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null"
REMOTE_VM_BIN_PATH: "c:\\containerd\\bin"
BUSYBOX_TESTING_IMAGE_REF: "registry.k8s.io/e2e-test-images/busybox:1.29-2"
RESOURCE_CONSUMER_TESTING_IMAGE_REF: "registry.k8s.io/e2e-test-images/resource-consumer:1.10"
WEBSERVER_TESTING_IMAGE_REF: "registry.k8s.io/e2e-test-images/nginx:1.14-2"
permissions: # added using https://github.com/step-security/secure-workflows
contents: read
jobs:
winIntegration:
# NOTE: the following permissions are required by `google-github-actions/auth`:
permissions:
contents: 'read'
id-token: 'write'
strategy:
# NOTE(aznashwan): this will permit all other jobs from the matrix to finish and
# upload their results even if one has a failing non-test-task:
# (e.g. hitting resource limits in the `AZTestVMCreate` task)
fail-fast: false
matrix:
win_ver: [ltsc2019, ltsc2022]
include:
- win_ver: ltsc2019
AZURE_IMG: "MicrosoftWindowsServer:WindowsServer:2019-Datacenter:17763.4377.230505"
AZURE_RESOURCE_GROUP: ctrd-integration-ltsc2019-${{ github.run_id }}
GOOGLE_BUCKET: "containerd-integration/logs/windows-ltsc2019/"
- win_ver: ltsc2022
AZURE_IMG: "MicrosoftWindowsServer:WindowsServer:2022-datacenter-smalldisk-g2:20348.1607.230310"
AZURE_RESOURCE_GROUP: ctrd-integration-ltsc2022-${{ github.run_id }}
GOOGLE_BUCKET: "containerd-integration/logs/windows-ltsc2022/"
runs-on: ubuntu-latest
timeout-minutes: 90
steps:
- uses: actions/checkout@v3
- name: Install required packages
run: |
sudo apt-get install xmlstarlet -y
- name: PrepareArtifacts
run: |
STARTED_TIME=$(date +%s)
LOGS_DIR=$HOME/$STARTED_TIME
echo "STARTED_TIME=$STARTED_TIME" >> $GITHUB_ENV
echo "LOGS_DIR=$LOGS_DIR" >> $GITHUB_ENV
echo "VM_INTEGRATION_LOGFILE=/c/Logs/integration.log" >> $GITHUB_ENV
echo "VM_CRI_INTEGRATION_LOGFILE=/c/Logs/cri-integration.log" >> $GITHUB_ENV
mkdir -p $LOGS_DIR/artifacts
jq -n --arg node temp --arg timestamp $STARTED_TIME '$timestamp|tonumber|{timestamp:.,$node}' > $LOGS_DIR/started.json
- name: Generate ssh key pair
run: |
mkdir -p $HOME/.ssh/
ssh-keygen -t rsa -b 4096 -C "ci@containerd.com" -f $HOME/.ssh/id_rsa -q -N ""
echo "SSH_PUB_KEY=$(cat ~/.ssh/id_rsa.pub)" >> $GITHUB_ENV
- name: AZLogin
uses: azure/login@v1
with:
creds: ${{ secrets.AZURE_CREDS }}
- name: AZResourceGroupCreate
uses: azure/CLI@v1
with:
inlinescript: |
az group create -n ${{ matrix.AZURE_RESOURCE_GROUP }} -l ${{ env.AZURE_DEFAULT_LOCATION }} --tags creationTimestamp=$(date -u '+%Y-%m-%dT%H:%M:%SZ')
- name: AZTestVMCreate
uses: azure/CLI@v1
with:
inlinescript: |
DETAILS=$(az vm create -n winTestVM --admin-username ${{ env.DEFAULT_ADMIN_USERNAME }} --admin-password ${{ env.PASSWORD }} --image ${{ matrix.AZURE_IMG }} -g ${{ matrix.AZURE_RESOURCE_GROUP }} --nsg-rule SSH --size ${{ env.AZURE_DEFAULT_VM_SIZE }} --public-ip-sku Standard -o json)
PUB_IP=$(echo $DETAILS | jq -r .publicIpAddress)
if [ "$PUB_IP" == "null" ]
then
RETRY=0
while [ "$PUB_IP" == "null" ] || [ $RETRY -le 5 ]
do
sleep 5
PUB_IP=$(az vm show -d -g ${{ matrix.AZURE_RESOURCE_GROUP }} -n winTestVM -o json --query publicIps | jq -r)
RETRY=$(( $RETRY + 1 ))
done
fi
if [ "$PUB_IP" == "null" ]
then
echo "failed to fetch public IP"
exit 1
fi
echo "VM_PUB_IP=$PUB_IP" >> $GITHUB_ENV
- name: EnableAZVMSSH
uses: azure/CLI@v1
with:
inlinescript: |
az vm run-command invoke --command-id RunPowerShellScript -n winTestVM -g ${{ matrix.AZURE_RESOURCE_GROUP }} --scripts @$GITHUB_WORKSPACE/script/setup/enable_ssh_windows.ps1 --parameters 'SSHPublicKey=${{ env.SSH_PUB_KEY }}'
- name: TestSSHConnection
run: |
if ! ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "hostname";
then
exit 1
fi
- name: InstallContainerFeature
run: |
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "powershell.exe -command { Install-WindowsFeature -Name 'Containers' -Restart }"
- name: WaitForVMToRestart
timeout-minutes: 5
run: |
# give the vm 30 seconds to actually stop. SSH server might actually respond while server is shutting down.
sleep 30
while [ ! $( ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "hostname") ];
do
echo "Unable to connect to azurevm"
done
echo "Connection reestablished. VM restarted succesfully."
- name: CreateNatNetwork
run: |
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "powershell.exe -command { curl.exe -L 'https://raw.githubusercontent.com/microsoft/SDN/master/Kubernetes/windows/hns.psm1' -o hns.psm1 }"
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "powershell.exe -command { Import-Module .\hns.psm1 ; New-HnsNetwork -Type NAT -Name nat -AddressPrefix 172.19.208.0/20 -Gateway 172.19.208.1 }"
- name: PrepareTestingEnv
run: |
scp -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} $GITHUB_WORKSPACE/script/setup/prepare_env_windows.ps1 azureuser@${{ env.VM_PUB_IP }}:/prepare_env_windows.ps1
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "c:\\prepare_env_windows.ps1"
- name: MakeContainerDBins
run: |
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "git clone http://github.com/containerd/containerd c:\\containerd "
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "cd c:\containerd ; make binaries"
- name: RunIntegrationTests
id: RunIntegrationTests
# NOTE(aznashwan): this is set to continue-on-error to allow the workflow to run until
# the reports are converted/uploaded to GCloud so as to show up on testgrid.k8s.io too.
continue-on-error: true
run: |
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "sh.exe -s" << EOF
cd /c/containerd
export EXTRA_TESTFLAGS="-timeout=20m"
set -o pipefail
make integration | tee ${{ env.VM_INTEGRATION_LOGFILE }}
EOF
echo 'SUCCEEDED=1' >> $GITHUB_OUTPUT
- name: PrepareRepoList
run: |
cat > repolist.toml << EOF
busybox = "${{ env.BUSYBOX_TESTING_IMAGE_REF }}"
ResourceConsumer = "${{ env.RESOURCE_CONSUMER_TESTING_IMAGE_REF }}"
EOF
cat > cri-test-images.yaml << EOF
defaultTestContainerImage: ${{ env.BUSYBOX_TESTING_IMAGE_REF }}
webServerTestImage: ${{ env.WEBSERVER_TESTING_IMAGE_REF }}
EOF
scp -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} repolist.toml azureuser@${{ env.VM_PUB_IP }}:c:/repolist.toml
scp -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} cri-test-images.yaml azureuser@${{ env.VM_PUB_IP }}:c:/cri-test-images.yaml
- name: RunCRIIntegrationTests
id: RunCRIIntegrationTests
# NOTE(aznashwan): this is set to continue-on-error to allow the workflow to run until
# the reports are converted/uploaded to GCloud so as to show up on testgrid.k8s.io too.
continue-on-error: true
run: |
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "sh.exe -s" <<EOF
cd c:/containerd
./script/setup/install-cni-windows
ls '/c/Program Files/containerd/cni/conf/'
cat '/c/Program Files/containerd/cni/conf/'*
export TEST_IMAGE_LIST=c:/repolist.toml
set -o pipefail
make cri-integration | tee ${{ env.VM_CRI_INTEGRATION_LOGFILE }}
EOF
echo 'SUCCEEDED=1' >> $GITHUB_OUTPUT
- name: GetCritestRepo
run: |
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "git clone https://github.com/kubernetes-sigs/cri-tools c:/cri-tools"
- name: BuildCritest
run: |
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "sh.exe -c 'cd /c/cri-tools && make critest'"
- name: RunCritest
id: RunCritest
# NOTE(aznashwan): this is set to continue-on-error to allow the workflow to run until
# the reports are converted/uploaded to GCloud so as to show up on testgrid.k8s.io too.
continue-on-error: true
run: |
# This test is exceedingly flaky only on ws2022 so skip for now to keep CI happy.
# Info: https://github.com/containerd/containerd/issues/6652
SKIP=""
if [ '${{ matrix.win_ver }}' == 'ltsc2022' ];then
SKIP='-ginkgo.skip="runtime should support exec with tty=true and stdin=true"'
fi
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "powershell.exe -command { C:\containerd\bin\containerd.exe --log-level=debug --log-file=C:/logs/containerd.log --service-name containerd --register-service ; Set-Service containerd -StartupType Automatic; Start-Service containerd }"
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "sh.exe -s" <<EOF
sleep 5
set -o pipefail
C:/cri-tools/build/bin/windows/amd64/critest.exe $SKIP --runtime-endpoint='npipe://./pipe/containerd-containerd' --test-images-file='c:/cri-test-images.yaml' --report-dir='c:/Logs' -ginkgo.junit-report="C:\Logs\junit_critest.xml" | tee c:/Logs/critest.log
EOF
echo 'SUCCEEDED=1' >> $GITHUB_OUTPUT
- name: PullLogsFromWinNode
run: |
# Generate JUnit reports from the stdouts of the tests:
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "sh.exe -c 'touch ${{ env.VM_INTEGRATION_LOGFILE }}; cat ${{ env.VM_INTEGRATION_LOGFILE }} | go-junit-report.exe > /c/Logs/junit_integration.xml'"
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "sh.exe -c 'touch ${{ env.VM_CRI_INTEGRATION_LOGFILE }}; cat ${{ env.VM_CRI_INTEGRATION_LOGFILE }} | go-junit-report.exe > /c/Logs/junit_cri_integration.xml'"
# Copy over all the JUnit reports:
scp -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }}:c:/Logs/*.xml ${{ env.LOGS_DIR }}/artifacts/
for f in $(ls ${{ env.LOGS_DIR }}/artifacts/*.xml); do
xmlstarlet ed -d "/testsuites/testsuite/properties" $f > ${{ env.LOGS_DIR }}/$(basename $f)
mv ${{ env.LOGS_DIR }}/$(basename $f) $f
done
- name: FinishJob
run: |
jq -n --arg result SUCCESS --arg timestamp $(date +%s) '$timestamp|tonumber|{timestamp:.,$result}' > ${{ env.LOGS_DIR }}/finished.json
echo "${{ env.STARTED_TIME }}" > ${{ github.workspace }}/latest-build.txt
- name: AssignGcpCreds
id: AssignGcpCreds
run: |
echo 'GCP_SERVICE_ACCOUNT=${{ secrets.GCP_SERVICE_ACCOUNT }}' >> $GITHUB_OUTPUT
echo 'GCP_WORKLOAD_IDENTITY_PROVIDER=${{ secrets.GCP_WORKLOAD_IDENTITY_PROVIDER }}' >> $GITHUB_OUTPUT
- name: AuthGcp
uses: google-github-actions/auth@v0
if: steps.AssignGcpCreds.outputs.GCP_SERVICE_ACCOUNT && steps.AssignGcpCreds.outputs.GCP_WORKLOAD_IDENTITY_PROVIDER
with:
service_account: ${{ secrets.GCP_SERVICE_ACCOUNT }}
workload_identity_provider: ${{ secrets.GCP_WORKLOAD_IDENTITY_PROVIDER }}
- name: UploadJobReport
uses: google-github-actions/upload-cloud-storage@v0.10.4
if: steps.AssignGcpCreds.outputs.GCP_SERVICE_ACCOUNT && steps.AssignGcpCreds.outputs.GCP_WORKLOAD_IDENTITY_PROVIDER
with:
path: ${{ github.workspace }}/latest-build.txt
destination: ${{ matrix.GOOGLE_BUCKET }}
parent: false
- name: UploadLogsDir
uses: google-github-actions/upload-cloud-storage@v0.10.4
if: steps.AssignGcpCreds.outputs.GCP_SERVICE_ACCOUNT && steps.AssignGcpCreds.outputs.GCP_WORKLOAD_IDENTITY_PROVIDER
with:
path: ${{ env.LOGS_DIR }}
destination: ${{ matrix.GOOGLE_BUCKET }}${{ env.STARTED_TIME}}
parent: false
- name: Check all CI stages succeeded
uses: actions/github-script@v6
with:
script: |
const stepResults = {
RunIntegrationTests: "${{ steps.RunIntegrationTests.outputs.SUCCEEDED }}",
RunCRIIntegrationTests: "${{ steps.RunCRIIntegrationTests.outputs.SUCCEEDED }}",
RunCritest: "${{ steps.RunCritest.outputs.SUCCEEDED }}",
};
let failedTasks = [];
for( [step, result] of Object.entries(stepResults) ) {
if (result != "1") {
failedTasks.push(step);
}
};
if (failedTasks.length != 0) {
core.setFailed(`One or more CI stages have failed. Please review the outputs of the following stepts: ${failedTasks}.`);
};
- name: ResourceCleanup
if: always()
uses: azure/CLI@v1
with:
inlinescript: |
az group delete -g ${{ matrix.AZURE_RESOURCE_GROUP }} --yes