Tag Usage Analysis in CIFMW
===========================

In [1]:
import pandas as pd
import json

pd.set_option("display.max_rows", None, "display.max_columns", None, "display.max_colwidth", None)

with open('tags_data.json', 'r') as f:
    data = json.load(f)

flattened_data = []
for file, tags_dict in data.items():
    for tag, line_numbers in tags_dict.items():
        for line_num in line_numbers:
            flattened_data.append({'file': file, 'tag': tag, 'line_number': line_num})

df = pd.DataFrame(flattened_data)

## Display the DataFrame

In [3]:
df

Unnamed: 0,file,tag,line_number
0,ci/playbooks/edpm_build_images/edpm_image_builder.yml,edpm_build_img,23
1,ci/playbooks/edpm_build_images/edpm_image_builder.yml,edpm_build_img,30
2,ci/playbooks/edpm_build_images/edpm_image_builder.yml,edpm_build_img,36
3,ci/playbooks/architecture/validate-architecture.yml,edpm_deploy,199
4,ci/playbooks/pre-doc.yml,golang,23
5,ci/playbooks/kuttl/deploy-deps.yaml,infra,40
6,ci/playbooks/kuttl/deploy-deps.yaml,infra,54
7,ci/playbooks/tcib/tcib.yml,container_img_build,17
8,ci/playbooks/tcib/tcib.yml,container_img_build,30
9,ci/playbooks/tcib/tcib.yml,container_img_build,51


## Count the total number of unique tags

In [3]:
unique_tags_count = df['tag'].nunique()
print(f"Total number of unique tags: {unique_tags_count}")

Total number of unique tags: 58


## List all unique tags

In [4]:
unique_tags_list = df['tag'].unique().tolist()
print("List of unique tags:")
unique_tags_list

List of unique tags:


['edpm_build_img',
 'edpm_deploy',
 'golang',
 'infra',
 'container_img_build',
 'build_openstack_packages',
 'packages',
 'build-packages',
 'build-containers',
 'build-operators',
 'edpm',
 'always',
 'tests',
 'bootstrap_libvirt',
 'edpm_bootstrap',
 'storage',
 'operator',
 'update_containers',
 'set_openstack_containers',
 'openstack_ca',
 'edpm_post',
 'nft',
 'bootstrap',
 'keypair',
 'admin',
 'block',
 'spec',
 'cephadm',
 'client',
 'update',
 'ceph_update',
 'admin-setup',
 'run-tests',
 'compliance',
 'pre-end',
 'logs',
 'deepscrub',
 'never',
 'k8s',
 'control-plane',
 'deploy_architecture_stage_',
 'cephadm_mon_dump',
 'cephadm_extract_keys',
 'cephadm_bootstrap',
 'systemd',
 '{{ overcloud_tags }}',
 'undercloud',
 'devscripts_deploy',
 'cleanup',
 'bootstrap_layout',
 'deploy_architecture',
 'bootstrap_env',
 'libvirt_layout',
 'bootstrap_nat64',
 'crc_layout',
 'devscripts_layout',
 'ocp_layout',
 'bootstrap_repositories']

Note: One tag in the above list is variable "{{ overcloud_tags }}" whose value is set to "overcloud_{{ _stack.stackname }}" in [roles/adoption_osp_deploy/tasks/main.yml#L147](https://github.com/openstack-k8s-operators/ci-framework/blob/main/roles/adoption_osp_deploy/tasks/main.yml#L147)

## Count occurrences of each tag

In [5]:
tag_counts = df['tag'].value_counts()
print("Occurrences of each tag:")
tag_counts

Occurrences of each tag:


tag
bootstrap                     112
always                         80
packages                       38
bootstrap_layout               29
deepscrub                      13
control-plane                  11
bootstrap_env                  10
undercloud                      8
edpm_bootstrap                  8
{{ overcloud_tags }}            7
never                           7
deploy_architecture             6
infra                           5
container_img_build             5
edpm_deploy                     5
bootstrap_libvirt               5
cephadm                         4
k8s                             3
edpm_build_img                  3
update                          3
edpm                            3
tests                           3
logs                            2
ocp_layout                      2
devscripts_layout               2
crc_layout                      2
bootstrap_nat64                 2
libvirt_layout                  2
build_openstack_packages        2
cleanup   

## Find files associated with particular tags

In [6]:
files_by_tag = df.groupby('tag')['file'].apply(lambda x: list(set(x))).reset_index()
print("Files associated with particular tags:")
files_by_tag

Files associated with particular tags:


Unnamed: 0,tag,file
0,admin,"[playbooks/ceph.yml, roles/cifmw_ceph/tasks/main.yml]"
1,admin-setup,[post-deployment.yml]
2,always,"[roles/cifmw_cephadm/tasks/pre.yml, roles/edpm_deploy/tasks/main.yml, roles/openshift_login/tasks/main.yml, roles/ci_setup/tasks/directories.yml, roles/reproducer/tasks/main.yml, roles/ci_setup/tasks/load_vars.yml, roles/openshift_setup/tasks/main.yml, roles/devscripts/tasks/110_check_ocp.yml, roles/cifmw_setup/tasks/bootstrap.yml, roles/reproducer/tasks/reuse_main.yaml, roles/ci_setup/tasks/main.yml, roles/artifacts/tasks/cleanup.yml, hooks/playbooks/kustomize_cr.yml, roles/devscripts/tasks/main.yml, roles/reproducer/tasks/ci_job.yml, playbooks/02-infra.yml, roles/operator_build/tasks/main.yml, roles/rhol_crc/tasks/main.yml, roles/devscripts/tasks/100_pre.yml, playbooks/01-bootstrap.yml, roles/update/tasks/main.yml, roles/devscripts/tasks/112_verify_golden_image.yml, roles/reproducer/tasks/configure_cleanup.yaml, roles/artifacts/tasks/environment.yml, roles/edpm_deploy_baremetal/tasks/main.yml, roles/edpm_prepare/tasks/main.yml, roles/reproducer/tasks/configure_post_deployment.yml, playbooks/nfs.yml, roles/libvirt_manager/tasks/main.yml, roles/devscripts/tasks/111_verify_cluster.yml, roles/reproducer/tasks/configure_architecture.yml, roles/devscripts/tasks/300_post.yml, roles/test_deps/tasks/main.yml, roles/cifmw_nfs/tasks/main.yml, roles/install_yamls/tasks/main.yml, roles/pkg_build/tasks/main.yml, roles/cifmw_setup/tasks/infra.yml, playbooks/06-deploy-architecture.yml, roles/artifacts/tasks/main.yml]"
3,block,"[playbooks/ceph.yml, roles/cifmw_ceph/tasks/main.yml]"
4,bootstrap,"[roles/rhol_crc/tasks/dependencies.yml, roles/edpm_build_images/tasks/add_cert.yml, roles/artifacts/tasks/packages.yml, roles/reproducer/tasks/main.yml, roles/devscripts/tasks/130_prep_host.yml, roles/copy_container/tasks/main.yml, roles/openshift_provisioner_node/tasks/install_packages.yml, roles/edpm_build_images/tasks/install.yml, roles/libvirt_manager/tasks/deploy_layout.yml, roles/cifmw_setup/tasks/bootstrap.yml, roles/reproducer/tasks/reuse_main.yaml, roles/repo_setup/tasks/install.yml, roles/registry_deploy/tasks/main.yml, roles/devscripts/tasks/main.yml, roles/reproducer/tasks/ci_job.yml, roles/openshift_provisioner_node/tasks/add_bridges.yml, roles/devscripts/tasks/100_pre.yml, roles/ci_setup/tasks/repos.yml, playbooks/01-bootstrap.yml, roles/reproducer/tasks/configure_controller.yml, roles/ci_setup/tasks/packages.yml, roles/polarion/tasks/main.yml, roles/libvirt_manager/tasks/main.yml, roles/openshift_provisioner_node/tasks/main.yml, roles/test_deps/tasks/main.yml, roles/reproducer/tasks/libvirt_layout.yml, roles/install_yamls/tasks/main.yml, roles/libvirt_manager/tasks/packages.yml, roles/build_openstack_packages/tasks/install_dlrn.yml, roles/openshift_provisioner_node/tasks/add_user.yml, roles/openshift_provisioner_node/tasks/add_virtual_network.yml]"
5,bootstrap_env,"[roles/reproducer/tasks/reuse_main.yaml, roles/reproducer/tasks/main.yml, roles/reproducer/tasks/libvirt_layout.yml, roles/reproducer/tasks/configure_controller.yml]"
6,bootstrap_layout,"[roles/reproducer/tasks/reuse_main.yaml, roles/reproducer/tasks/main.yml, roles/reproducer/tasks/libvirt_layout.yml, roles/reproducer/tasks/configure_controller.yml, roles/libvirt_manager/tasks/deploy_layout.yml, roles/libvirt_manager/tasks/main.yml]"
7,bootstrap_libvirt,"[roles/libvirt_manager/tasks/main.yml, playbooks/adoption/infra.yml]"
8,bootstrap_nat64,[roles/reproducer/tasks/main.yml]
9,bootstrap_repositories,[roles/reproducer/tasks/main.yml]


## Get the total number of tag applications

In [7]:
total_tag_applications = len(df)
print(f"Total number of tag applications: {total_tag_applications}")

Total number of tag applications: 425


## List all tags used in each file

In [8]:
tags_by_file = df.groupby('file')['tag'].apply(lambda x: list(set(x))).reset_index()
print("Tags used in each file:")
tags_by_file

Tags used in each file:


Unnamed: 0,file,tag
0,ci/playbooks/architecture/validate-architecture.yml,[edpm_deploy]
1,ci/playbooks/edpm_build_images/edpm_image_builder.yml,[edpm_build_img]
2,ci/playbooks/kuttl/deploy-deps.yaml,[infra]
3,ci/playbooks/pre-doc.yml,[golang]
4,ci/playbooks/tcib/tcib.yml,"[build_openstack_packages, container_img_build]"
5,deploy-edpm-reuse.yaml,[packages]
6,deploy-edpm.yml,"[build-operators, infra, build-packages, edpm, build-containers]"
7,hooks/playbooks/kustomize_cr.yml,[always]
8,playbooks/01-bootstrap.yml,"[always, packages, bootstrap]"
9,playbooks/02-infra.yml,[always]


## Check for presence of a tag in a file
Example: Check if 'ci/playbooks/pre-doc.yml' uses the 'golang' tag

In [14]:
file_to_check = 'playbooks/06-deploy-architecture.yml'
tag_to_check = 'always'
is_tag_present = ((df['file'] == file_to_check) & (df['tag'] == tag_to_check)).any()
print(f"Is '{tag_to_check}' tag present in '{file_to_check}'?\n{is_tag_present}")

Is 'always' tag present in 'playbooks/06-deploy-architecture.yml'?
True
