# Chart Descriptions

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/andrewm4894/netdata-gpt-notebooks/blob/main/notebooks/chart_descriptions/chart_descriptions.ipynb)

In [271]:
# if running in colab uncomment the following line and run it to install the required packages
#!pip install python-dotenv netdata-pandas openai

In [272]:
import os
from dotenv import load_dotenv
import pandas as pd
import numpy as np
from netdata_pandas.data_cloud import get_data_cloud
import openai
import pprint as pp
from urllib.parse import urlparse
import requests
import json
from datetime import datetime

# load tokens from .env file
load_dotenv()

NETDATA_API_TOKEN = os.getenv('NETDATA_API_TOKEN')
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')

openai.api_key = OPENAI_API_KEY

In [273]:
# inputs
space_id = 'ea93d7b8-0df6-45c0-b13d-1560996c89eb' 
room_id = 'd8a4e0c5-7c79-4145-900e-83a9f06fcb6a'
chart = None
chart_startswith = 'netdata.'
output_dir = f'output/{chart_startswith}'
now = datetime.now().strftime("%Y%m%d_%H%M%S")

In [274]:
def get_charts_cloud(space_id, room_id, api_token=None, base_url='https://app.netdata.cloud', node_ids=[]):
    """Get charts from netdata cloud api.
    """
    
    if api_token is None:
        api_token = os.getenv('NETDATA_API_TOKEN')
    
    base_url = 'https://app.netdata.cloud'
    url = f'{base_url}/api/v2/spaces/{space_id}/rooms/{room_id}/charts'
    headers = {'Accept': '*/*', 'Content-Type': 'application/json', 'Authorization': f'Bearer {api_token}'}
    data = {
        'filter': {
            'nodeIDs': node_ids,
        }
    }
    r = requests.post(url, headers=headers, data=json.dumps(data))
    
    if r.status_code != 200:
        
        print(f'Error: {r.status_code, r.text}')
        return None
    
    else:
            
        return r.json()['results']


def make_prompt(chart, chart_json):
    prompt = f"""
    You are an experienced SRE and sysadmin.

    You are monitoring your infrastructure using Netdata Cloud.

    You are documenting individual charts and their dimensions to help other users.

    The chart_json object is available to you and follows the format below:

    ```json
    {{
        "id": "the chart id",
        "title": "the chart title",
        "dimensions": "a list of the dimensions",
        "units": "the units of the chart",
        "family": "the menu family of the chart",
        "context": "context of the chat. it follows structure like <type>.<name> where <type> also impacts where in the menu the chart appears and so can give hints of what it might relate to",
        "chart_type": "usually line or stacked or area - this is the type of chart",
        "chart_labels": "some optional labels or tags for the chart that cone also sometimes be useful to help understand the chart",
    }}
    ```

    Here is the individual chart_json object describing the `{chart}` chart you are documenting:

    ```json
    {chart_json}
    ```

    Can you write short, useful and educational description of the chart and its dimensions? 

    Please follow a json format like this (the output needs to be valid json):

    ```json
    {{
        "chart_id": "{chart}",
        "chart_description": "<add description here>",
        "dimension_descriptions": [
            {{
                "<dimension name>": "<add dimension description here>",
                ...
            }}]
    }}
    ```
    """
    
    return prompt


charts = get_charts_cloud(space_id, room_id, api_token=NETDATA_API_TOKEN)

In [275]:
if chart == None:
    charts_list = list(set(list(charts.keys())))
    #charts_list = [c for c in charts_list if c.startswith('system.')]
    #charts_list = [c for c in charts_list if 'prometheus.' not in c]
    #charts_list = [c for c in charts_list if 'netdata.' not in c]
    charts_list = [c for c in charts_list if c.startswith(chart_startswith)]
    charts_list = sorted(charts_list)
    chart = np.random.choice(charts_list)

print(chart)
print(charts_list)

netdata.heartbeat
['netdata.aclk_cloud_req', 'netdata.aclk_cloud_req_http_type', 'netdata.aclk_mqtt_tx_perc', 'netdata.aclk_mqtt_tx_queue', 'netdata.aclk_mqtt_wss_time', 'netdata.aclk_openssl_bytes', 'netdata.aclk_processed_query_type', 'netdata.aclk_protobuf_rx_types', 'netdata.aclk_query_per_second', 'netdata.aclk_query_threads', 'netdata.aclk_query_time', 'netdata.aclk_status', 'netdata.apps_children_fix', 'netdata.apps_cpu', 'netdata.apps_fix', 'netdata.apps_sizes', 'netdata.clients', 'netdata.compression_ratio', 'netdata.db_points', 'netdata.db_points_read', 'netdata.db_points_results', 'netdata.db_points_stored', 'netdata.dbengine_buffers', 'netdata.dbengine_cache_hit_ratio', 'netdata.dbengine_compression_ratio', 'netdata.dbengine_events', 'netdata.dbengine_extent_cache_events', 'netdata.dbengine_extent_cache_hit_ratio', 'netdata.dbengine_extent_cache_memory', 'netdata.dbengine_extent_cache_memory_changes', 'netdata.dbengine_extent_cache_memory_migrations', 'netdata.dbengine_exte

In [276]:
if not os.path.exists(output_dir):
   os.makedirs(output_dir)

for chart in charts_list:
    
    #print(chart)
    
    try:
        df = get_data_cloud(space_id, room_id, chart)
    except:
        next
    #print(df.shape)
    #print(df.head())

    dimensions = df.columns
    #print(dimensions)
    
    chart_json = {}
    chart_json['id'] = chart
    chart_json['title'] = charts[chart]['title']
    chart_json['dimensions'] = list(dimensions)
    chart_json['units'] = charts[chart]['units']
    chart_json['family'] = charts[chart]['family']
    chart_json['context'] = charts[chart]['context']
    chart_json['chart_type'] = charts[chart]['chartType']
    chart_json['chart_labels'] = charts[chart]['chartLabels']
    
    prompt = make_prompt(chart, chart_json)
    
    #print(prompt)
    
    # build messages list to pass to openai
    messages=[
        {"role": "user", "content": prompt}
    ]
    
    # call openai api
    completion = openai.ChatCompletion.create(
    model="gpt-3.5-turbo",
    messages=messages,
    )
    
    reply_content = completion.choices[0].message.content
    #pp.pprint(reply_content)
    reply_content = reply_content.replace('",\n        }]','"\n        }]').replace('",\n        }\n    ]','"\n        }\n    ]')
    #print(reply_content)
    
    try:
        chart_description_json = json.loads(reply_content)
    except:
        next
    print(chart_description_json)
    
    file_name_prompt = f'{chart}__{now}_PROMPT.txt'
    with open(f'{output_dir}/{file_name_prompt}', 'w') as f:
        f.write(prompt)
        
    file_name_result = f'{chart}__{now}_RESULT.json'
    with open(f'{output_dir}/{file_name_result}', 'w') as f:
        json.dump(chart_description_json, f, indent=4)

{'chart_id': 'netdata.aclk_cloud_req', 'chart_description': 'This chart shows the number of requests per second received from the Netdata Cloud.', 'dimension_descriptions': [{'received': 'This dimension measures the number of requests received by the Netdata Cloud per second.'}]}
{'chart_id': 'netdata.aclk_cloud_req_http_type', 'chart_description': 'This chart displays the number of requests received from Netdata Cloud via HTTP by their type.', 'dimension_descriptions': [{'data': 'The type of request being made (e.g. GET, POST, PUT, DELETE).'}]}
{'chart_id': 'netdata.aclk_mqtt_tx_perc', 'chart_description': 'This chart shows the actively used percentage of the MQTT Transmission Buffer. It helps monitor the efficiency of sending messages using the MQTT protocol.', 'dimension_descriptions': [{'used': 'This dimension represents the percentage of the MQTT Transmission Buffer that is currently in use.'}]}
{'chart_id': 'netdata.aclk_mqtt_tx_queue', 'chart_description': 'The chart shows the s