In [1]:
import ergodic
import os
from dotenv import load_dotenv
import logging

logging.basicConfig(level=logging.INFO)

load_dotenv()

username = os.getenv("ERGODIC_API_USERNAME")
password = os.getenv("ERGODIC_API_PASSWORD")
api_url = "https://ergbackendv3.azurewebsites.net"

In [2]:
from ergodic.client import ErgodicClient

client = ErgodicClient(api_url, username, password)

INFO:root:Sending request to https://ergbackendv3.azurewebsites.net/token
INFO:root:Login successful


In [3]:
client.assets.list()

INFO:ergodic.assets.utils:Listing assets


[{'id': '2351a349-c488-46d2-9ddc-6f746dc9c164',
  'name': 'Diabetes Dataset',
  'userid': 'c1a48216-e60b-45a7-bae5-f0fdf5a6be4d',
  'object_type': 'PickleContext',
  'ref': 'c1a48216-e60b-45a7-bae5-f0fdf5a6be4d/data/2351a349-c488-46d2-9ddc-6f746dc9c164.pkl',
  'metadata': '{"database_spec":{"description":"The diabetes database contains medical data related to diabetes patients. It includes various health metrics such as age, sex, body mass index (BMI), blood pressure (BP), and several serum measurements. This database is used for analyzing and predicting diabetes-related health outcomes.","table_names":["diabetes"],"tables":[{"description":"This table contains medical data for diabetes patients, including various health metrics and serum measurements.","column_names":["age","sex","bmi","bp","s1","s2","s3","s4","s5","s6","target"],"columns":[{"description":"Age of the patient in years, represented as a normalized double value.","data_type":"DOUBLE","is_nullable":true,"unique_sample":[0.

In [4]:
pdfs = client.assets.upload_pdfs_from_folder("../data/WNBU/")

INFO:ergodic.assets.utils:Uploading PDFs from folder: ../data/WNBU/
INFO:ergodic.assets.utils:Uploading PDF: ../data/WNBU/OUTPUT58.pdf
INFO:ergodic.assets.utils:Uploading PDF: ../data/WNBU/OUTPUT64.pdf
INFO:ergodic.assets.utils:Uploading PDF: ../data/WNBU/OUTPUT70.pdf
INFO:ergodic.assets.utils:Uploading PDF: ../data/WNBU/OUTPUT118.pdf
INFO:ergodic.assets.utils:Uploading PDF: ../data/WNBU/OUTPUT119.pdf
INFO:ergodic.assets.utils:Uploading PDF: ../data/WNBU/OUTPUT71.pdf
INFO:ergodic.assets.utils:Uploading PDF: ../data/WNBU/OUTPUT65.pdf
INFO:ergodic.assets.utils:Uploading PDF: ../data/WNBU/OUTPUT59.pdf
INFO:ergodic.assets.utils:Uploading PDF: ../data/WNBU/OUTPUT98.pdf
INFO:ergodic.assets.utils:Uploading PDF: ../data/WNBU/OUTPUT73.pdf
INFO:ergodic.assets.utils:Uploading PDF: ../data/WNBU/OUTPUT67.pdf
INFO:ergodic.assets.utils:Uploading PDF: ../data/WNBU/datasheet-c78-741709.pdf
INFO:ergodic.assets.utils:Uploading PDF: ../data/WNBU/OUTPUT66.pdf
INFO:ergodic.assets.utils:Uploading PDF: ../dat

In [6]:
pdfs = client.assets.upload_pdfs_from_folder("../data/sheets/")

INFO:ergodic.assets.utils:Uploading PDFs from folder: ../data/sheets/
INFO:ergodic.assets.utils:Uploading PDF: ../data/sheets/OUTPUT1.pdf
INFO:ergodic.assets.utils:Finished uploading PDFs from folder


In [10]:
pdfs[0]["context"]["metadata"]

{'title': 'Cisco Catalyst 9163E Series Access Points Data sheet - Cisco',
 'author': '',
 'summary': "The document is a data sheet for Cisco's Catalyst 9163E Series Access Points, updated as of January 26, 2024. It highlights the device's capabilities in extending Wi-Fi 6E networks outdoors, suitable for environments like campuses and stadiums. The access point offers flexibility with on-premises or cloud management options and features such as high-density coverage, built-in GPS, and advanced interference management. It supports Cisco DNA Software and Meraki cloud management for enhanced network analytics and security. The document also covers licensing, warranty, environmental sustainability, and Cisco's financial solutions.",
 'markdown': "# Cisco Catalyst 9163E Series Access Points Data sheet\n\n**Updated: January 26, 2024**\nBias-Free Language\n\n\n-----\n\np g g p y\n\nSeries, the 9163E offers the flexibility to choose between on-premises and cloud management with the ability to 

In [11]:
pdfs_uuids = [x["context"]["uuid"] for x in pdfs]
pdfs_uuids

['6894042d-1105-4b3e-8c17-4f5b2ea370f8']

In [11]:
client.collections.create_collection(
    "Cisco Wireless Product Sheets",
    "Product Sheets for Cisco Wireless Products",
    pdfs_uuids,
)

INFO:ergodic.collections.utils:Creating collection with name: Cisco Wireless Product Sheets


{'message': 'Collection created successfully',
 'collection_context': {'name': 'Cisco Wireless Product Sheets',
  'uuid': 'e71a6e60-2952-499f-a87b-626171284524',
  'userId': 'c1a48216-e60b-45a7-bae5-f0fdf5a6be4d',
  'object_type': 'CollectionContext',
  'ref': '',
  'metadata': {'user_defined_attributes': [], 'candidate_features': None},
  'description': 'Product Sheets for Cisco Wireless Products',
  'user_context': {},
  'inputs': ['735076bc-059f-46d6-b058-74e3bafcaede',
   '22f3e3d3-5d00-4e19-98f9-3eef98259a64',
   'a7824d86-47fd-4ad7-b2ee-1330d0a4ad03',
   '21dee5b4-712d-4756-bdfa-dbb0c95fe643',
   '2ff9ddea-67b5-4697-a9a5-31f3ff247638',
   '26923a7a-0083-4f52-9c27-6093198294c5',
   '70f8c0e5-ce95-49cd-919b-03315eb2f958',
   '98c3195c-c9b3-45b9-bc73-40ef6f3469d0',
   'fdbf5b63-3717-481e-bf03-247d4555273b',
   '481af5dd-62fc-4904-8560-73acb7e5caec',
   '027c3449-0189-4fb5-a4a1-f77af9379013',
   'cca62129-c66b-4cb5-aa44-c5d139c4e460',
   'be2e9bd3-b2c5-4009-ab1c-5ad7ae1eca0a',
   '20

In [12]:
collection_id = "e71a6e60-2952-499f-a87b-626171284524"

In [13]:
asset = client.assets.get("e71a6e60-2952-499f-a87b-626171284524")
pdfs = asset["inputs"]
pdf_data = client.assets.get_data(pdfs[0])

INFO:ergodic.assets.utils:Getting asset with ID: e71a6e60-2952-499f-a87b-626171284524


INFO:ergodic.assets.utils:Getting data from asset 735076bc-059f-46d6-b058-74e3bafcaede


In [15]:
from ergodic.display import markdown

# markdown(pdf_data)

In [16]:
from ergodic.actions.match import KeywordMatcher

matcher = KeywordMatcher(client, match_type="fuzzy", fuzzy_match_threshold=0.95)
matcher.load(pdfs)

  from .autonotebook import tqdm as notebook_tqdm
Loading data:   0%|          | 0/128 [00:00<?, ?it/s]INFO:ergodic.assets.utils:Getting data from asset 735076bc-059f-46d6-b058-74e3bafcaede
Loading data:   1%|          | 1/128 [00:00<00:51,  2.48it/s]INFO:ergodic.assets.utils:Getting data from asset 22f3e3d3-5d00-4e19-98f9-3eef98259a64
Loading data:   2%|▏         | 2/128 [00:00<00:44,  2.81it/s]INFO:ergodic.assets.utils:Getting data from asset a7824d86-47fd-4ad7-b2ee-1330d0a4ad03
Loading data:   2%|▏         | 3/128 [00:02<01:51,  1.13it/s]INFO:ergodic.assets.utils:Getting data from asset 21dee5b4-712d-4756-bdfa-dbb0c95fe643
Loading data:   3%|▎         | 4/128 [00:02<01:19,  1.55it/s]INFO:ergodic.assets.utils:Getting data from asset 2ff9ddea-67b5-4697-a9a5-31f3ff247638
Loading data:   4%|▍         | 5/128 [00:02<01:03,  1.94it/s]INFO:ergodic.assets.utils:Getting data from asset 26923a7a-0083-4f52-9c27-6093198294c5
Loading data:   5%|▍         | 6/128 [00:03<00:58,  2.09it/s]INFO:ergo

In [17]:
pids = ["C9120AXI", "C9124AXE", "C9120AXE", "C9120AXP", "C9124AXD", "C9124AXI"]
list_of_docs = [matcher.match(pid) for pid in pids]

small_collection_pdfs = []
for doc in list_of_docs:
    for d in doc:
        small_collection_pdfs.append(d["id"])

list(set(small_collection_pdfs))

Matching keywords: 100%|██████████| 128/128 [00:00<00:00, 46277.99it/s]
Getting asset info:   0%|          | 0/128 [00:00<?, ?it/s]INFO:ergodic.assets.utils:Getting asset with ID: 1798b6cf-682a-40d9-b28e-0c6378c8b9d1
Getting asset info:  24%|██▍       | 31/128 [00:00<00:02, 42.55it/s]INFO:ergodic.assets.utils:Getting asset with ID: b63c7f58-16bd-435a-b74a-fa591f1812c2
Getting asset info: 100%|██████████| 128/128 [00:01<00:00, 91.09it/s]
Matching keywords: 100%|██████████| 128/128 [00:00<00:00, 33435.32it/s]
Getting asset info: 100%|██████████| 128/128 [00:00<00:00, 375171.85it/s]
Matching keywords: 100%|██████████| 128/128 [00:00<00:00, 65304.82it/s]
Getting asset info:   0%|          | 0/128 [00:00<?, ?it/s]INFO:ergodic.assets.utils:Getting asset with ID: 1798b6cf-682a-40d9-b28e-0c6378c8b9d1
Getting asset info:  24%|██▍       | 31/128 [00:00<00:01, 50.74it/s]INFO:ergodic.assets.utils:Getting asset with ID: b63c7f58-16bd-435a-b74a-fa591f1812c2
Getting asset info: 100%|██████████| 128/1

['b63c7f58-16bd-435a-b74a-fa591f1812c2',
 '1798b6cf-682a-40d9-b28e-0c6378c8b9d1']

In [18]:
list_of_docs

[[{'id': '1798b6cf-682a-40d9-b28e-0c6378c8b9d1',
   'name': 'Cisco Embedded Wireless Controller on Catalyst Access Points Data Sheet - Cisco',
   'userid': 'c1a48216-e60b-45a7-bae5-f0fdf5a6be4d',
   'object_type': 'PdfContext',
   'ref': 'c1a48216-e60b-45a7-bae5-f0fdf5a6be4d/pdf/1798b6cf-682a-40d9-b28e-0c6378c8b9d1.pdf',
   'metadata': '{"title":"Cisco Embedded Wireless Controller on Catalyst Access Points Data Sheet - Cisco","author":"","summary":"The document is a data sheet for the Cisco Embedded Wireless Controller on Catalyst Access Points (EWC-AP). It details the features, benefits, and specifications of the EWC-AP, which integrates the Cisco Catalyst 9800 Series Wireless Controllers with the Cisco Catalyst 9100 Access Points to provide advanced Wi-Fi 6 capabilities. The document highlights key metrics, deployment modes, licensing options, software requirements, and environmental sustainability initiatives. It also includes information on Cisco\'s services, warranty, and flexible

In [19]:
pdfs = list(set(small_collection_pdfs))

In [20]:
client.collections.create_collection(
    "Cisco Wireless Product Sheets Small",
    "Small Collection of Cisco Wireless Product Sheets",
    pdfs,
)

INFO:ergodic.collections.utils:Creating collection with name: Cisco Wireless Product Sheets Small


{'message': 'Collection created successfully',
 'collection_context': {'name': 'Cisco Wireless Product Sheets Small',
  'uuid': 'bb3a0f00-0b92-4bdb-8bb5-a628ecd5cba1',
  'userId': 'c1a48216-e60b-45a7-bae5-f0fdf5a6be4d',
  'object_type': 'CollectionContext',
  'ref': '',
  'metadata': {'user_defined_attributes': [], 'candidate_features': None},
  'description': 'Small Collection of Cisco Wireless Product Sheets',
  'user_context': {},
  'inputs': ['b63c7f58-16bd-435a-b74a-fa591f1812c2',
   '1798b6cf-682a-40d9-b28e-0c6378c8b9d1'],
  'version': '0.0.1',
  'last_updated': '2024-10-22T17:12:24.891774',
  'logger': {'history': []}}}

In [28]:
response["collection_data_context"]

{'name': 'Cisco Wireless Product Sheets Small Features',
 'uuid': '77819e48-9944-4774-b4f0-aa25e62ab2e3',
 'userId': 'c1a48216-e60b-45a7-bae5-f0fdf5a6be4d',
 'object_type': 'CollectionDataContext',
 'ref': '',
 'metadata': {'collection_uuid': 'bb3a0f00-0b92-4bdb-8bb5-a628ecd5cba1',
  'selected_features': ['Product name',
   'Parts Included or Product IDs (PIDs)'],
  'data_file_name': '77819e48-9944-4774-b4f0-aa25e62ab2e3.pkl'},
 'description': 'Small Collection of Cisco Wireless Product Sheets Features',
 'user_context': None,
 'inputs': ['bb3a0f00-0b92-4bdb-8bb5-a628ecd5cba1'],
 'version': '0.0.1',
 'last_updated': '2024-10-22T17:43:52.833415',
 'logger': {'history': []},
 'data': [{'asset_uuid': 'b63c7f58-16bd-435a-b74a-fa591f1812c2',
   'Product name': 'Catalyst 9120',
   'Parts Included or Product IDs (PIDs)': ['Cisco Catalyst 9120i',
    'Cisco Catalyst 9120e',
    'Cisco Catalyst 9100 Series',
    'AIR-BRACKET-1',
    'AIR-BRACKET-2',
    'AIR-AP-BRACKET-1',
    'AIR-AP-BRACKET-2

In [21]:
small_collection_id = "bb3a0f00-0b92-4bdb-8bb5-a628ecd5cba1"

In [23]:
response = client.collections.create_collection_data(
    small_collection_id,
    ["Product name", "Parts Included or Product IDs (PIDs)"],
    "Cisco Wireless Product Sheets Small Features",
    "Small Collection of Cisco Wireless Product Sheets Features",
)

INFO:ergodic.collections.utils:Creating collection data with name: Cisco Wireless Product Sheets Small Features


In [31]:
import pandas

features_df = pandas.DataFrame(response["collection_data_context"]["data"])
features_df

Unnamed: 0,asset_uuid,Product name,Parts Included or Product IDs (PIDs)
0,b63c7f58-16bd-435a-b74a-fa591f1812c2,Catalyst 9120,"[Cisco Catalyst 9120i, Cisco Catalyst 9120e, C..."
1,1798b6cf-682a-40d9-b28e-0c6378c8b9d1,Cisco Embedded Wireless Controller on Catalyst...,"[Cisco Catalyst 9100 Access Points, Cisco Cata..."


In [32]:
features_df["Parts Included or Product IDs (PIDs)"]
# Expand the dataframe such that every item in the 'Parts Included or Product IDs (PIDs)' will occupy one row
expanded_df = features_df.explode("Parts Included or Product IDs (PIDs)")

# Reset the index to have a clean, sequential index
expanded_df = expanded_df.reset_index(drop=True)

expanded_df

Unnamed: 0,asset_uuid,Product name,Parts Included or Product IDs (PIDs)
0,b63c7f58-16bd-435a-b74a-fa591f1812c2,Catalyst 9120,Cisco Catalyst 9120i
1,b63c7f58-16bd-435a-b74a-fa591f1812c2,Catalyst 9120,Cisco Catalyst 9120e
2,b63c7f58-16bd-435a-b74a-fa591f1812c2,Catalyst 9120,Cisco Catalyst 9100 Series
3,b63c7f58-16bd-435a-b74a-fa591f1812c2,Catalyst 9120,AIR-BRACKET-1
4,b63c7f58-16bd-435a-b74a-fa591f1812c2,Catalyst 9120,AIR-BRACKET-2
5,b63c7f58-16bd-435a-b74a-fa591f1812c2,Catalyst 9120,AIR-AP-BRACKET-1
6,b63c7f58-16bd-435a-b74a-fa591f1812c2,Catalyst 9120,AIR-AP-BRACKET-2
7,b63c7f58-16bd-435a-b74a-fa591f1812c2,Catalyst 9120,AIR-CHNL-ADAPTER
8,b63c7f58-16bd-435a-b74a-fa591f1812c2,Catalyst 9120,Erico Caddy 512a
9,b63c7f58-16bd-435a-b74a-fa591f1812c2,Catalyst 9120,Cooper B-Line BA50a


In [37]:
from ergodic.actions import suggest

In [38]:
suggest.features(client, "1798b6cf-682a-40d9-b28e-0c6378c8b9d1")

['Product Type',
 'Regulatory Domain',
 'Release Date',
 'Wi-Fi Technology',
 'CSR Report Section',
 'Compliance verification link',
 'Service Benefits',
 'Product Material Content Laws',
 'Smart Accounts',
 'Service Components',
 'Payment Solutions',
 'Antenna Type',
 'Update Date',
 'Bias-Free Language',
 'Maximum Access Points',
 'Supported Features',
 'Cisco DNA Support',
 'Deployment Modes',
 'Geographic Availability',
 'License Duration',
 'Product model',
 'Cisco DNA Center Integration',
 'Recommended Products',
 'Contact Information for Sustainability Inquiries',
 'Smart Licensing',
 'Electronic Waste Laws',
 'RF Features',
 'WEEE Compliance',
 'Access Point Model',
 'Packaging Data Disclaimer',
 'Service Offered',
 'Reference Links',
 'Product Description',
 'Benefits of Service',
 'Maximum WLANs',
 'Subscription Types',
 'Licensing Requirements',
 'Warranty details',
 'Software warranty terms',
 'Related Links',
 'Product Name',
 'Environmental Sustainability Policies',
 'Pol

In [39]:
suggest.features(
    client,
    "1798b6cf-682a-40d9-b28e-0c6378c8b9d1",
    "Features related to compliance certification",
)

['Subscription term options',
 'Compliance certifications',
 'Product material content laws compliance',
 'Compliance certification details',
 'Installation environment',
 'Smart Licensing requirements',
 'Installation environments',
 'Country-specific compliance',
 'Warranty details',
 'Wi-Fi Standards',
 'Product models',
 'Certification standards met',
 'Packaging data accuracy',
 'Feature expiration details',
 'Centralized management capabilities',
 'Supported Access Points',
 'Compliance verification link',
 'Product compliance documentation links',
 'Licensing type',
 'WEEE compliance',
 'License Type',
 'Compliance Standards',
 'Regulatory domains',
 'Energy Efficiency Features',
 'License management',
 'Date of compliance updates',
 'Security Features',
 'Regulatory approval status',
 'Electronic waste laws compliance',
 'Policy Engine',
 'Operating System',
 'Sustainability inquiries contact information',
 'Compliance with Smart Accounts',
 'Software version requirements',
 'R