# Iguazio's Time Series Database

In [8]:
import v3io_frames as frames

client = frames.Client('https://framesd.default-tenant.app.usss-sales-341.iguazio-cd1.com', container='bigdata', token='bceb8c21-02e3-4bc3-8934-3f109af7c8b0')

client.create(backend="tsdb", table='tsdb_table', rate="1/h")



In [3]:
import numpy as np
from datetime import datetime, timedelta


# Generate a DataFrame with TSDB metric samples and a "time" index column
def gen_df_w_tsdb_data(num_items=24, freq="1H", end=None, start=None,
                       start_delta=None, tz=None, normalize=False, zero=False,
                       attrs=["cpu", "mem", "disk"]):
    if (start is None and start_delta is not None and end is not None):
        start = end - timedelta(days=start_delta)
    if (zero):
        if (end is not None):
            end = end.replace(minute=0, second=0, microsecond=0)
        if (start is not None):
            start = start.replace(minute=0, second=0, microsecond=0)
    # If `start`, `end`, `num_items` (date_range() `periods`), and `freq`
    # are set, ignore `freq`
    if (freq is not None and start is not None and end is not None and
            num_items is not None):
        freq = None
    times = pd.date_range(periods=num_items, freq=freq, start=start, end=end,
                          tz=tz, normalize=normalize)
    data = np.random.rand(num_items, len(attrs)) * 100
    df = pd.DataFrame(data, index=times, columns=attrs)
    df.index.name = "time"
    return df

In [5]:
import pandas as pd

# Prepare DataFrames with randomly generated metric samples
end_t = datetime.now()
start_delta = 7  # start time = ent_t - 7 days
dfs = []
for i in range(4):
    # Generate a new DataFrame with TSDB metrics
    dfs.append(gen_df_w_tsdb_data(end=end_t, start_delta=7, zero=True))
    # Display DataFrame info & head (optional - for testing)
    # print("\n** dfs[" + str(i) + "] **")
    # display(dfs[i].info(), dfs[i].head())

# Write to a TSDB table

# Prepare metric labels to write
labels = [
    {"node": "11", "os": "linux"},
    {"node": "2", "os": "windows"},
    {"node": "11", "os": "windows"},
    {"node": "2", "os": "linux"}
]

In [6]:
# Write the contents of the prepared DataFrames to a TSDB table. Use multiple
# write commands with the `labels` parameter to set different label values.
num_dfs = len(dfs)
for i in range(num_dfs):
    client.write("tsdb", table='tsdb_table', dfs=dfs[i], labels=labels[i])

casting datetime64[ns] values to int64 with .astype(...) is deprecated and will raise in a future version. Use .view(...) instead.
casting datetime64[ns] values to int64 with .astype(...) is deprecated and will raise in a future version. Use .view(...) instead.
casting datetime64[ns] values to int64 with .astype(...) is deprecated and will raise in a future version. Use .view(...) instead.
casting datetime64[ns] values to int64 with .astype(...) is deprecated and will raise in a future version. Use .view(...) instead.


In [7]:
# Read all metrics from the TSDB table (start="0"; default `end` time = "now")
# into a single DataFrame (default `Iterator`=False) and display the first 10
# items; show metric labels as index columns (multi_index=True)
df = client.read(backend="tsdb", table='tsdb_table', start="0", multi_index=True)
print(df.head(8))

                                                   cpu       disk        mem
time                             node os                                    
2022-06-28 08:00:00+00:00        2    linux   3.789494  77.587174  73.207230
2022-06-28 15:18:15.652000+00:00 2    linux  57.550241   8.884240  33.728375
2022-06-28 22:36:31.304000+00:00 2    linux  31.188500  10.223689  30.056445
2022-06-29 05:54:46.956000+00:00 2    linux  63.820421   8.543283  53.678887
2022-06-29 13:13:02.608000+00:00 2    linux  96.506226  72.899027  90.498496
2022-06-29 20:31:18.260000+00:00 2    linux  94.945842  25.973721  73.745772
2022-06-30 03:49:33.913000+00:00 2    linux  83.466036  34.248145  78.203917
2022-06-30 11:07:49.565000+00:00 2    linux  61.987083  15.210972  48.690961




In [9]:
# Read over-time aggregates with a 1-day aggregation step for all metric
# samples in the table with the `os` label "linux" and the `node` label 11.
df = client.read(backend="tsdb", table='tsdb_table', aggregators="count,sum",
                 step="1d", start="0", filter="os=='linux' and node=='11'",
                 multi_index=True)
print(df)

                                      count(cpu)  count(disk)  count(mem)  \
time                      node os                                           
2022-06-28 00:00:00+00:00 11   linux         3.0          3.0         3.0   
2022-06-29 00:00:00+00:00 11   linux         3.0          3.0         3.0   
2022-06-30 00:00:00+00:00 11   linux         3.0          3.0         3.0   
2022-07-01 00:00:00+00:00 11   linux         4.0          4.0         4.0   
2022-07-02 00:00:00+00:00 11   linux         3.0          3.0         3.0   
2022-07-03 00:00:00+00:00 11   linux         3.0          3.0         3.0   
2022-07-04 00:00:00+00:00 11   linux         3.0          3.0         3.0   
2022-07-05 00:00:00+00:00 11   linux         2.0          2.0         2.0   

                                        sum(cpu)   sum(disk)    sum(mem)  
time                      node os                                         
2022-06-28 00:00:00+00:00 11   linux  137.933411  101.389720  187.707498  
2022

