## Red Hat Product Documentation Ingestion

Using this notebook and the various dependencies, you can extract and ingest a specific Red Hat Product documentation inside a Milvus database using Nomic AI Embeddings.

### Needed packages

In [None]:
!pip install -q einops==0.7.0 langchain==0.1.12 pypdf==4.0.2 pymilvus==2.3.6 sentence-transformers==2.4.0 beautifulsoup4==4.12.2 html2text==2024.2.26 lxml==5.1.0 tqdm==4.66.2 

In [None]:
import os
import rh_documentation_ingestion as rhdi

## Product information

In [None]:
class product_info:
    def __init__(self, product, product_full_name, version, language):
        self.product = product
        self.product_full_name = product_full_name
        self.version = version
        self.language = language

In [None]:
products = []
products.append(product_info('red_hat_openshift_ai_self-managed',
                             'Red Hat OpenShift AI Self-Managed',
                             '2.8',
                             'en-US'))
products.append(product_info('red_hat_openshift_ai_self-managed',
                             'Red Hat OpenShift AI Self-Managed',
                             '2.7',
                             'en-US'))
products.append(product_info('red_hat_openshift_ai_self-managed',
                             'Red Hat OpenShift AI Self-Managed',
                             '2.6',
                             'en-US'))
products.append(product_info('red_hat_3scale_api_management',
                             'Red Hat 3scale API Management',
                             '2.14',
                             'en-US'))
products.append(product_info('red_hat_advanced_cluster_management_for_kubernetes',
                             'Red Hat Advanced Cluster Management for Kubernetes',
                             '2.10',
                             'en-US'))
products.append(product_info('red_hat_advanced_cluster_security_for_kubernetes',
                             'Red Hat Advanced Cluster Security for Kubernetes',
                             '4.4',
                             'en-US'))
products.append(product_info('red_hat_amq_streams',
                             'Red Hat AMQ Streams',
                             '2.6',
                             'en-US'))
products.append(product_info('red_hat_ansible_automation_platform',
                             'Red Hat Ansible Automation Platform',
                             '2.4',
                             'en-US'))
products.append(product_info('red_hat_ansible_lightspeed_with_ibm_watsonx_code_assistant',
                             'Red Hat Ansible Lightspeed with IBM watsonx Code Assistant',
                             '2.x_latest',
                             'en-US'))
products.append(product_info('red_hat_data_grid',
                             'Red Hat Data Grid',
                             '8.4',
                             'en-US'))
products.append(product_info('red_hat_developer_hub',
                             'Red Hat Developer Hub',
                             '1.1',
                             'en-US'))
products.append(product_info('red_hat_enterprise_linux',
                             'Red Hat Enterprise Linux',
                             '9',
                             'en-US'))
products.append(product_info('red_hat_enterprise_linux',
                             'Red Hat Enterprise Linux',
                             '8',
                             'en-US'))
products.append(product_info('red_hat_build_of_microshift',
                             'Red Hat build of MicroShift',
                             '4.15',
                             'en-US'))
products.append(product_info('red_hat_openshift_data_foundation',
                             'Red Hat OpenShift Data Foundation',
                             '4.15',
                             'en-US'))
products.append(product_info('red_hat_satellite',
                             'Red Hat Satellite',
                             '6.14',
                             'en-US'))
products.append(product_info('red_hat_single_sign-on',
                             'Red Hat Single Sign-On',
                             '7.6',
                             'en-US'))
products.append(product_info('red_hat_advanced_cluster_security_for_kubernetes',
                             'Red Hat Advanced Cluster Security for Kubernetes',
                             '4.4',
                             'en-US'))
products.append(product_info('red_hat_enterprise_linux',
                             'Red Hat Enterprise Linux',
                             '7',
                             'en-US'))
products.append(product_info('openshift_container_platform',
                             'Red Hat OpenShift Container Platform',
                             '4.15',
                             'en-US'))
products.append(product_info('openshift_container_platform',
                             'Red Hat OpenShift Container Platform',
                             '4.14',
                             'en-US'))
products.append(product_info('openshift_container_platform',
                             'Red Hat OpenShift Container Platform',
                             '4.13',
                             'en-US'))
products.append(product_info('openshift_container_platform',
                             'Red Hat OpenShift Container Platform',
                             '4.12',
                             'en-US'))
products.append(product_info('red_hat_openshift_serverless',
                             'Red Hat OpenShift Serverless',
                             '1.32',
                             'en-US'))
products.append(product_info('red_hat_hybrid_cloud_console',
                             'Red Hat Hybrid Cloud Console',
                             '1-latest',
                             'en-US'))
products.append(product_info('red_hat_insights',
                             'Red Hat Insights',
                             '1-latest',
                             'en-US'))

## Ingestion

In [None]:
milvus = {}
milvus["MILVUS_HOST"] = "vectordb-milvus.milvus.svc.cluster.local"
milvus["MILVUS_PORT"] = 19530
milvus["MILVUS_USERNAME"] = os.getenv('MILVUS_USERNAME')
milvus["MILVUS_PASSWORD"] = os.getenv('MILVUS_PASSWORD')

In [None]:
for product in products:
    print('-----------------------------------')
    print(f'Processing "{product.product_full_name}" at version {product.version}, language {product.language}')
    try:
        rhdi.ingest_documentation(product, milvus)
    except Exception as e:
        print(f'Error processing "{product.product_full_name}" at version {product.version}, language {product.language}')
        print(f'{e}')