In [1]:
from dotenv import load_dotenv
from flatten_dict import flatten
from functools import cached_property
from kubernetes import client
from kubernetes.dynamic.resource import ResourceField, ResourceInstance
from openshift.dynamic import DynamicClient
from openshift.helper.userpassauth import OCPLoginConfiguration
from pathlib import Path
from urllib.parse import urljoin, urlparse
from uuid import UUID
import json
import os
import pandas as pd
import re
import requests
import urllib3

In [2]:
load_dotenv() 

# disable pesky urllib3 ssl warnings.
urllib3.disable_warnings()

OCP_APIHOST = os.environ["OCP_APIHOST"]
OCP_USERNAME = os.environ["OCP_USERNAME"]
OCP_PASSWORD = os.environ["OCP_PASSWORD"]
QPC_BASE_URL = os.environ["QPC_BASE_URL"]
QPC_USERNAME = os.environ["QPC_USERNAME"]
QPC_PASSWORD = os.environ["QPC_PASSWORD"]
QPC_REPORT_ID = os.environ["QPC_REPORT_ID"]
OUTPUT_DIR = os.environ["OUTPUT_DIR"]

kubeConfig = OCPLoginConfiguration(ocp_username=OCP_USERNAME, ocp_password=OCP_PASSWORD)
kubeConfig.host = OCP_APIHOST
kubeConfig.verify_ssl = False

kubeConfig.get_token()

k8s_client = client.ApiClient(kubeConfig)

dyn_client = DynamicClient(k8s_client)

In [3]:
class JsonEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, ResourceInstance):
            return obj.items
        if isinstance(obj, ResourceField):
            return obj.__dict__
        return super().default(obj)

In [4]:
class BaseUrlClient(requests.Session):
    """Specialized request session with a configurable base_url."""

    def __init__(self, *, base_url=None, auth=None, verify=False, **kwargs):
        """
        Initialize ApiClient.

        base_url: will be prepended to all requests urls
        auth: Auth class (as specified on requests documentation)
        verify: SSL verify (default set to False)
        """
        super().__init__(**kwargs)
        self.verify = verify
        self.base_url = base_url
        self.auth = auth

    def request(self, method, url, *args, **kwargs):
        """Prepare a request and send it."""
        request_url = urljoin(self.base_url, url)
        return super().request(method, request_url, *args, **kwargs)


class QPCAuth(requests.auth.AuthBase):
    """Auth class for Quipucords server."""

    def __init__(self, *, username, password):
        """Initialize QPCAuth."""
        self._qpc_client = BaseUrlClient(base_url=None)
        self._username = username
        self._password = password

    def __call__(self, r: requests.PreparedRequest):
        """Add authorization token to request headers."""
        if not self._qpc_client.base_url:
            parsed_url = urlparse(r.url)
            self._qpc_client.base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
        r.headers["Authorization"] = f"Token {self.auth_token}"
        return r

    @cached_property
    def auth_token(self):
        """QPC auth token."""
        auth_response = self._qpc_client.post(
            "api/v1/token/", {"username": self._username, "password": self._password}
        )
        assert auth_response.ok, auth_response.text
        return auth_response.json()["token"]


In [5]:
outdir = Path(OUTPUT_DIR)
outdir.mkdir(exist_ok=True, parents=True)

def get_data(api_version, kind, file_name):
    file_json = outdir / f"{file_name}.json"
    if not file_json.exists():
        nodes = dyn_client.resources.get(api_version=api_version, kind=kind).get()
        file_json.write_text(json.dumps(nodes, cls=JsonEncoder))
    
    return json.load(file_json.open())

In [6]:
nodes_info = get_data("v1", "Node", "nodes")

In [7]:
flattened_nodes = [flatten(n, enumerate_types=(list,)) for n in nodes_info]

In [8]:
binary_unit = re.compile("^\d+([KMG]i)$")

def convert_to_bytes(res):
    try:
        m = binary_unit.match(res)
    except:
        return res
    if not m:
        return res
    power = {
        "Ki": 1,
        "Mi": 2,
        "Gi": 3,
    }[m.group(1)]
    return int(res.replace(m.group(1), "")) * 1024 ** power

def convert_uuid(val):
    try:
        return UUID(val)
    except:
        return val

ocp_data = [
    {"hostname": node[("metadata", "name")], "fact_value": v, "ocp_key": "->".join(map(str, k))}
    for node in flattened_nodes
    for k, v in node.items()
]
df_ocp = pd.DataFrame(ocp_data)
df_ocp.hostname = df_ocp.hostname.apply(lambda x: x.split(".")[0])
df_ocp.fact_value = df_ocp.fact_value.apply(convert_to_bytes)
df_ocp.fact_value = df_ocp.fact_value.apply(convert_uuid)
df_ocp.fact_value.replace("amd64", "x86_64", inplace=True)
df_ocp.head()

Unnamed: 0,hostname,fact_value,ocp_key
0,master-0,master-0.bciconelocp411.lab.upshift.rdu2.redha...,metadata->name
1,master-0,0d300d31-82b5-4dc7-8ba1-754173be3e1f,metadata->uid
2,master-0,468950,metadata->resourceVersion
3,master-0,2022-12-11T16:55:58Z,metadata->creationTimestamp
4,master-0,x86_64,metadata->labels->beta.kubernetes.io/arch


In [9]:
qpc_client = BaseUrlClient(base_url=QPC_BASE_URL)
qpc_client.auth = QPCAuth(
    username=QPC_USERNAME,
    password=QPC_PASSWORD,
)
qpc_response = qpc_client.get(f"api/v1/reports/{QPC_REPORT_ID}/details")

In [10]:
flattened_hosts = [
    flatten(facts, enumerate_types=(list,))
    for source in qpc_response.json()["sources"]
    for facts in source["facts"]
]

net_data = [
    {"hostname": host[("connection_host",)], "fact_value": v, "net_key": "->".join(map(str, k))}
    for host in flattened_hosts
    for k, v in host.items()
    if v is not None
]
df_net = pd.DataFrame(net_data)
df_net.hostname = df_net.hostname.apply(lambda x: x.split(".")[0])
df_net.fact_value = df_net.fact_value.apply(convert_uuid)
df_net.head()

Unnamed: 0,hostname,fact_value,net_key
0,master-0,10.0.94.160,ifconfig_ip_addresses->0
1,master-0,10.128.0.1,ifconfig_ip_addresses->1
2,master-0,4.11,ansible_distribution_version
3,master-0,4,ansible_distribution_major_version
4,master-0,/etc/redhat-release,ansible_distribution_file_path


In [11]:
def lower(x):
    try:
        x = round(x)
    except:
        ...
    return str(x).lower()

df_ocp_comparable = df_ocp.copy()
df_net_comparable = df_net.copy()
# make outputs more comparable
df_ocp_comparable.fact_value = df_ocp_comparable.fact_value.apply(lower)
df_net_comparable.fact_value = df_net_comparable.fact_value.apply(lower)

df_merged = df_net_comparable.merge(df_ocp_comparable, on=["fact_value", "hostname"])

In [12]:
df_net.to_csv(outdir / "network.csv", index=None)
df_ocp.to_csv(outdir / "ocp.csv", index=None)
df_merged.to_csv(outdir / "merged.csv", index=None)

In [13]:
def non_noise_value(x):
    if x in ["0", "", "true", "false"]:
        return False
    return True

df_merged_non_noise = df_merged[(df_merged.fact_value.apply(non_noise_value))]
df_merged_non_noise.to_csv(outdir / "merged_non_noise.csv", index=None)

In [14]:
df_merged_non_noise.head()

Unnamed: 0,hostname,fact_value,net_key,ocp_key
0,master-0,10.0.94.160,ifconfig_ip_addresses->0,status->addresses->0->address
1,master-0,10.0.94.160,ansible_ens3->ipv4->address,status->addresses->0->address
2,master-0,10.0.94.160,ansible_default_ipv4->address,status->addresses->0->address
3,master-0,10.0.94.160,ansible_all_ipv4_addresses->1,status->addresses->0->address
4,master-0,4,ansible_distribution_major_version,status->capacity->cpu


In [15]:
df_net[(df_net.net_key == "subman_virt_uuid") | (df_net.net_key == "dmi_system_uuid")]

Unnamed: 0,hostname,fact_value,net_key
3797,upi-0,7dbf8100-65c7-45b8-8f48-a695c794ff8c,dmi_system_uuid
3844,upi-0,7dbf8100-65c7-45b8-8f48-a695c794ff8c,subman_virt_uuid
7401,worker-0,7dbf8100-65c7-45b8-8f48-a695c794ff8c,subman_virt_uuid
11845,worker-2,7dbf8100-65c7-45b8-8f48-a695c794ff8c,subman_virt_uuid
16811,worker-1,7dbf8100-65c7-45b8-8f48-a695c794ff8c,subman_virt_uuid
22560,master-2,7dbf8100-65c7-45b8-8f48-a695c794ff8c,subman_virt_uuid
29570,master-1,7dbf8100-65c7-45b8-8f48-a695c794ff8c,subman_virt_uuid


In [16]:
df_key_counts = pd.DataFrame(df_merged_non_noise.loc[:, ["net_key", "ocp_key"]].value_counts())
df_key_counts.sort_index(inplace=True)
consistent_match_mask = df_key_counts[0] == df_key_counts[0].max()
df_key_counts[consistent_match_mask]

Unnamed: 0_level_0,Unnamed: 1_level_0,0
net_key,ocp_key,Unnamed: 2_level_1
ansible_architecture,metadata->labels->beta.kubernetes.io/arch,6
ansible_architecture,metadata->labels->kubernetes.io/arch,6
ansible_architecture,status->nodeInfo->architecture,6
ansible_default_ipv4->address,status->addresses->0->address,6
ansible_distribution_major_version,status->capacity->cpu,6
ansible_ens3->ipv4->address,status->addresses->0->address,6
ansible_fqdn,metadata->labels->kubernetes.io/hostname,6
ansible_fqdn,metadata->name,6
ansible_fqdn,status->addresses->1->address,6
ansible_kernel,status->nodeInfo->kernelVersion,6


In [17]:
df_matches = df_key_counts.reset_index()
df_matches.rename({0: "matches"}, axis=1, inplace=True)
df_matches.to_csv(outdir / "matches.csv", index=None)
df_matches.tail()

Unnamed: 0,net_key,ocp_key,matches
55,uname_os,metadata->labels->kubernetes.io/os,6
56,uname_os,status->nodeInfo->operatingSystem,6
57,uname_processor,metadata->labels->beta.kubernetes.io/arch,6
58,uname_processor,metadata->labels->kubernetes.io/arch,6
59,uname_processor,status->nodeInfo->architecture,6


In [18]:
df_net[~df_net.net_key.str.contains("ansible")].to_markdown(outdir / "network.md", index=None)
df_ocp.to_markdown(outdir / "ocp.md", index=None)
df_merged.to_markdown(outdir / "merged.md", index=None)
df_merged_non_noise.to_markdown(outdir / "merged_non_noise.md", index=None)
df_matches.to_markdown(outdir / "matches.md")

In [19]:
get_data("config.openshift.io/v1", "ClusterVersion", "cluster-version")
# get_data("config.openshift.io/v1", "Node", "ocp-node")
# get_data("config.openshift.io/v1", "Infrastructure", "infrastructure")
# get_data("machineconfiguration.openshift.io/v1", "MachineConfigPool", "machine_config_pool")

[{'apiVersion': 'config.openshift.io/v1',
  'kind': 'ClusterVersion',
  'metadata': {'creationTimestamp': '2022-12-11T16:22:58Z',
   'generation': 2,
   'managedFields': [{'apiVersion': 'config.openshift.io/v1',
     'fieldsType': 'FieldsV1',
     'fieldsV1': {'f:spec': {'.': {}, 'f:channel': {}, 'f:clusterID': {}}},
     'manager': 'cluster-bootstrap',
     'operation': 'Update',
     'time': '2022-12-11T16:22:58Z'},
    {'apiVersion': 'config.openshift.io/v1',
     'fieldsType': 'FieldsV1',
     'fieldsV1': {'f:status': {'.': {},
       'f:availableUpdates': {},
       'f:capabilities': {'.': {},
        'f:enabledCapabilities': {},
        'f:knownCapabilities': {}},
       'f:conditions': {},
       'f:desired': {'.': {},
        'f:channels': {},
        'f:image': {},
        'f:url': {},
        'f:version': {}},
       'f:history': {},
       'f:observedGeneration': {},
       'f:versionHash': {}}},
     'manager': 'cluster-version-operator',
     'operation': 'Update',
     's