# Time Series Database
https://www.iguazio.com/docs/latest-release/tutorials/tsdb/

In [5]:
import os

In [10]:
import v3io_frames as v3f

In [11]:
client = v3f.Client("framesd:8081", container="users")

In [12]:
# Relative path to the TSDB table within the parent platform data container
tsdb_table = os.path.join(os.getenv("V3IO_USERNAME"), "examples", "tsdb_tab")

In [13]:
# Create a new TSDB table; ingestion rate = one sample per hour ("1/h")
client.create(backend="tsdb", table=tsdb_table, rate="1/h")

In [14]:
import numpy as np
from datetime import datetime, timedelta


# Genearte a DataFrame with TSDB metric samples and a "time" index column
def gen_df_w_tsdb_data(num_items=24, freq="1H", end=None, start=None,
                       start_delta=None, tz=None, normalize=False, zero=False,
                       attrs=["cpu", "mem", "disk"]):
    if (start is None and start_delta is not None and end is not None):
        start = end - timedelta(days=start_delta)
    if (zero):
        if (end is not None):
            end = end.replace(minute=0, second=0, microsecond=0)
        if (start is not None):
            start = start.replace(minute=0, second=0, microsecond=0)
    # If `start`, `end`, `num_items` (date_range() `periods`), and `freq`
    # are set, ignore `freq`
    if (freq is not None and start is not None and end is not None and
            num_items is not None):
        freq = None
    times = pd.date_range(periods=num_items, freq=freq, start=start, end=end,
                          tz=tz, normalize=normalize)
    data = np.random.rand(num_items, len(attrs)) * 100
    df = pd.DataFrame(data, index=times, columns=attrs)
    df.index.name = "time"
    return df

In [15]:
# Prepare DataFrames with randomly generated metric samples
end_t = datetime.now()
start_delta = 7  # start time = ent_t - 7 days
dfs = []
for i in range(4):
    # Generate a new DataFrame with TSDB metrics
    dfs.append(gen_df_w_tsdb_data(end=end_t, start_delta=7, zero=True))
    # Display DataFrame info & head (optional - for testing)
    # print("\n** dfs[" + str(i) + "] **")
    # display(dfs[i].info(), dfs[i].head())

In [16]:
# Write to a TSDB table

# Prepare metric labels to write
labels = [
    {"node": "11", "os": "linux"},
    {"node": "2", "os": "windows"},
    {"node": "11", "os": "windows"},
    {"node": "2", "os": "linux"}
]

# Write the contents of the prepared DataFrames to a TSDB table. Use multiple
# write commands with the `labels` parameter to set different label values.
num_dfs = len(dfs)
for i in range(num_dfs):
    client.write("tsdb", table=tsdb_table, dfs=dfs[i], labels=labels[i])

<a id="frames-tsdb-read"></a>
### Read from the TSDB Table

- [Overview and Basic Examples](#frames-tsdb-read-basic)
- [Conditional Read](#frames-tsdb-read-conditional)

<a id="frames-tsdb-read-basic"></a>
#### Overview and Basic Examples

Use the `read` method of the Frames client with the `tsdb` backend to read data from your TSDB table (i.e., query the database).<br>
Note that you cannot mix raw sample-data queries and aggregation queries.

You must set the `table` parameter to the path to the TSDB table.<br>
You can optionally set additional method parameters to configure the query:

- `columns` defines the query metrics (default = all).
- `aggregators` defines aggregation functions ("aggregators") to execute for all the configured metrics.
- `filter` restricts the query by using a platform [filter expression](https://www.iguazio.com/docs/latest-release/reference/expressions/condition-expression/#filter-expression).
- `start` and `end` define the query's time range &mdash; the metric-sample timestamps to which to apply the query.
   The default `end` time is `"now"` and the default `start` time is 1 hour before the end time (`<end> - 1h`).
- `step` defines the interval for aggregation or raw-data downsampling (default = the query's time range).
- `multi_index` casn be set to `True` to return labels as index columns, as demonstrated in the following examples.
  By default, only the metric sample-time primary-key attribute is returned as an index column.

See the [Frames API reference](https://www.iguazio.com/docs/latest-release/reference/api-reference/frames/tsdb/read/) for more information about the `read` parameters that are supported for the `tsdb` backend.

In [17]:
# Read all metrics from the TSDB table (start="0"; default `end` time = "now")
# into a single DataFrame (default `Iterator`=False) and display the first 10
# items; show metric labels as index columns (multi_index=True)
df = client.read(backend="tsdb", table=tsdb_table, start="0", multi_index=True)
display(df.head(8))

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,cpu,disk,mem
time,node,os,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-06-30 20:00:00+00:00,11,linux,33.558443,40.09426,56.346097
2020-07-01 03:18:15.652000+00:00,11,linux,98.888037,3.515425,43.113603
2020-07-01 10:36:31.304000+00:00,11,linux,12.730153,30.178336,42.144769
2020-07-01 17:54:46.956000+00:00,11,linux,56.176174,33.406265,76.411339
2020-07-02 01:13:02.608000+00:00,11,linux,17.137622,75.141189,88.772401
2020-07-02 08:31:18.260000+00:00,11,linux,74.012713,58.211154,7.822386
2020-07-02 15:49:33.913000+00:00,11,linux,6.775565,15.148528,61.897653
2020-07-02 23:07:49.565000+00:00,11,linux,78.837399,45.045562,79.365142


<a id="frames-tsdb-read-conditional"></a>
#### Conditional Read

The following example demonstrates how to use a query filter to conditionally read only a subset of the data from a TSDB table.
This is done by setting the value of the `filter` parameter to a [platform filter expression](https://www.iguazio.com/docs/latest-release/reference/expressions/condition-expression/#filter-expression).

In [18]:
# Read over-time aggregates with a 1-day aggregation step for all metric
# samples in the table with the `os` label "linux" and the `node` label 11.
df = client.read(backend="tsdb", table=tsdb_table, aggregators="count,sum",
                 step="1d", start="0", filter="os=='linux' and node=='11'",
                 multi_index=True)
display(df)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,count(cpu),count(disk),count(mem),sum(cpu),sum(disk),sum(mem)
time,node,os,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2020-06-30 00:00:00+00:00,11,linux,1.0,1.0,1.0,33.558443,40.09426,56.346097
2020-07-01 00:00:00+00:00,11,linux,3.0,3.0,3.0,167.794365,67.100026,161.669711
2020-07-02 00:00:00+00:00,11,linux,4.0,4.0,4.0,176.7633,193.546433,237.857582
2020-07-03 00:00:00+00:00,11,linux,3.0,3.0,3.0,141.95509,235.766405,160.15608
2020-07-04 00:00:00+00:00,11,linux,3.0,3.0,3.0,203.714962,109.089064,137.007011
2020-07-05 00:00:00+00:00,11,linux,3.0,3.0,3.0,121.987851,148.014711,183.294034
2020-07-06 00:00:00+00:00,11,linux,4.0,4.0,4.0,225.031656,104.302928,221.771689
2020-07-07 00:00:00+00:00,11,linux,3.0,3.0,3.0,93.777317,114.027732,120.367749


<a id="frames-tsdb-delete"></a>
### Delete the TSDB Table

Use the `delete` method of the Frames client with the `tsdb` backend to delete the TSDB table that was used in the previous steps.

In [16]:
client.delete("tsdb", tsdb_table)

<a id="frames-cleanup"></a>
## Cleanup

You can optionally delete any of the directories or files that you created.
See the instructions in the [Creating and Deleting Container Directories](https://www.iguazio.com/docs/latest-release/tutorials/getting-started/containers/#create-delete-container-dirs) tutorial.
For example, the following code uses a local file-system command to delete the entire **&lt;running user&gt;/examples/** directory in the "users" container.
Edit the path, as needed, then remove the comment mark (`#`) and run the code.

In [17]:
#!rm -rf /User/examples/