<a href="https://colab.research.google.com/github/antonum/Timescale-Workshops/blob/main/Management/timescale-metadata.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Query Timescale Metadata

Collection of queries to analize timescale-specific entities such as Hypertables and Continuos Aggregates

# Setup Timescale Connection

By default, this notebook installs Timescale right within the colab runtime with endpoint `"postgres://postgres:password@localhost/postgres"`. You can optionally use your own Timescale cloud instance endpoint.

Try Timescale Cloud for free at: https://console.cloud.timescale.com/signup

In [2]:
import os
### Default connection for in-notebook Timescale ###
TS_CONNECTION="postgres://postgres:password@localhost/postgres"

### Use environment variable ###
#TS_CONNECTION = os.getenv("TS_CONNECTION", "postgres://postgres:password@localhost/postgres")

### Use your own Timescale Cloud instance ###
#TS_CONNECTION="postgres://tsdbadmin:xxxxxxx.yyyyy.tsdb.cloud.timescale.com:39966/tsdb?sslmode=require"

### Use colab secret ###
from google.colab import userdata
TS_CONNECTION=userdata.get('TS_CONNECTION')

### Set environment variable to be used in psql CLI ###
os.environ["TS_CONNECTION"]=TS_CONNECTION

In [None]:
#@title Install Timescale
%%bash
set -e # Exit immediately if a command exits with a non-zero status.

# --- Configuration ---
PG_VERSION="17"
PGVECTORSCALE_VERSION="0.7.0"
PG_PASSWORD="password" # Consider using a more secure password

echo "--- 1. Installing Prerequisites & Adding Repositories ---"
# Install essential packages quietly
apt-get -qq -y install gnupg postgresql-common apt-transport-https lsb-release wget > /dev/null 2>&1

# Add the official PostgreSQL repository
# The 'yes |' answers confirmation prompts automatically. Output redirected.
yes | /usr/share/postgresql-common/pgdg/apt.postgresql.org.sh > /dev/null 2>&1

# Add the TimescaleDB repository
echo "deb https://packagecloud.io/timescale/timescaledb/ubuntu/ $(lsb_release -c -s) main" | sudo tee /etc/apt/sources.list.d/timescaledb.list > /dev/null
# Add the TimescaleDB GPG key using the recommended method (avoids apt-key add)
wget --quiet -O - https://packagecloud.io/timescale/timescaledb/gpgkey | sudo gpg --dearmor -o /etc/apt/trusted.gpg.d/timescaledb.gpg

echo "--- 2. Updating Package List & Installing PostgreSQL + Extensions ---"
# Update package list quietly (should suppress apt-key warnings too)
apt-get -qq update > /dev/null 2>&1

# Install PostgreSQL, TimescaleDB, pgvector, toolkit, and client
apt-get -qq -y install \
  "timescaledb-2-postgresql-${PG_VERSION}" \
  "postgresql-client-${PG_VERSION}" \
  "postgresql-${PG_VERSION}-pgvector" \
  "timescaledb-toolkit-postgresql-${PG_VERSION}" > /dev/null 2>&1

echo "--- 3. Installing pgvectorscale ---"
# Download and install pgvectorscale
wget --quiet "https://github.com/timescale/pgvectorscale/releases/download/${PGVECTORSCALE_VERSION}/pgvectorscale-${PGVECTORSCALE_VERSION}-pg${PG_VERSION}-amd64.zip" -O pgvectorscale.zip
unzip -q pgvectorscale.zip # Use -q for quiet unzip
# Install the .deb package quietly
apt-get -qq -y install "./pgvectorscale-postgresql-${PG_VERSION}_${PGVECTORSCALE_VERSION}-Linux_amd64.deb" > /dev/null 2>&1

# Clean up downloaded files
rm pgvectorscale.zip "./pgvectorscale-postgresql-${PG_VERSION}_${PGVECTORSCALE_VERSION}-Linux_amd64.deb"

echo "--- 4. Configuring PostgreSQL & TimescaleDB ---"
# Tune PostgreSQL for TimescaleDB
timescaledb-tune --quiet --yes  > /dev/null 2>&1

# Restart PostgreSQL service to apply changes
service postgresql restart
sleep 2 # Give the service a moment to restart fully

echo "--- 5. Setting Up Database User and Extensions ---"
# Set the password for the default postgres user
sudo -u postgres psql -c "ALTER USER postgres PASSWORD '${PG_PASSWORD}'" > /dev/null

# Connect as the postgres user and create extensions quietly
psql -d "postgres://postgres:${PG_PASSWORD}@localhost/postgres" > /dev/null <<EOF
CREATE EXTENSION IF NOT EXISTS timescaledb CASCADE;
CREATE EXTENSION IF NOT EXISTS timescaledb_toolkit CASCADE;
CREATE EXTENSION IF NOT EXISTS vector CASCADE;
CREATE EXTENSION IF NOT EXISTS vectorscale CASCADE;
EOF

echo "--- Installation and Setup Complete ---"



In [None]:
# Optional: Verify extensions are installed
#!psql -d $TS_CONNECTION -c '\dx'

In [3]:
#@title Init psycopg2 connection to Timescale
import pandas as pd
import psycopg2

# establish connection to Timescale
conn = psycopg2.connect(TS_CONNECTION)
cursor = conn.cursor()

# helper function to convert SQL Results to the dataframe
def execute_sql(query, cursor=cursor):
    try:
        cursor.execute(query)
        conn.commit()
        # Check if query returns data (SELECT)
        if cursor.description:  # If description is not None, query returned data
            columns = [desc[0] for desc in cursor.description]
            data = cursor.fetchall()
            df = pd.DataFrame(data, columns=columns)
            return df
        else:
            # Query was likely INSERT, CREATE TABLE, UPDATE, DELETE, etc.
            return f"Rows affected: {cursor.rowcount}"  # Return the number of rows affected

    except psycopg2.Error as e:
        print(f"Error executing SQL query: {e}")
        conn.rollback()  # Rollback changes in case of error
        return None  # Or raise the exception if you prefer

## Hypertables

In [34]:
query = """
SELECT * from timescaledb_information.hypertables
"""
df_ht=execute_sql(query)
df_ht

Unnamed: 0,hypertable_schema,hypertable_name,owner,num_dimensions,num_chunks,compression_enabled,tablespaces
0,public,test_table,tsdbadmin,1,3,False,
1,public,alter_test,tsdbadmin,1,5,True,
2,v1,vector_ht,tsdbadmin,1,2,True,
3,public,ticks,tsdbadmin,1,0,False,
4,public,cpu,tsdbadmin,1,15,True,
5,weather,observations,tsdbadmin,1,13,True,
6,public,cpu1,tsdbadmin,1,0,True,
7,public,sensor_data,tsdbadmin,1,1,False,
8,public,transactions,tsdbadmin,1,1,True,


In [50]:
#@title Select Hypertable
# prompt: dropdown box allowing select 	hypertable_schema.hypertable_name from the df

import ipywidgets as widgets

# Assuming 'df' is your DataFrame from the previous code
hypertable_options = (df_ht['hypertable_schema'] + "." + df_ht['hypertable_name']).tolist()

# Create a dropdown widget
hypertable_dropdown = widgets.Dropdown(
    options=hypertable_options,
    value=hypertable_options[0] if hypertable_options else None,  # Set the first option as default
    description='Hypertable:',
    disabled=False,
)

# Display the dropdown
display(hypertable_dropdown)

# Access the selected value
selected_hypertable = hypertable_dropdown.value
#print(f"Selected hypertable: {selected_hypertable}")


Dropdown(description='Hypertable:', options=('public.test_table', 'public.alter_test', 'v1.vector_ht', 'public…

### Hypertable size before/after compression

In [66]:
query = f"""
SELECT
    '{hypertable_dropdown.value}' AS hypertable,
    pg_size_pretty(before_compression_total_bytes) AS before_compression,
    pg_size_pretty(after_compression_total_bytes) AS after_compression
FROM hypertable_compression_stats('{hypertable_dropdown.value}');
"""
#print(query)
execute_sql(query)


Unnamed: 0,hypertable,before_compression,after_compression
0,public.cpu,928 MB,291 MB


### Policies for the hypertable

In [71]:
query = f"""
SELECT * FROM timescaledb_information.jobs
WHERE
  hypertable_name = '{hypertable_dropdown.value.split(".")[1]}'
  AND hypertable_schema = '{hypertable_dropdown.value.split(".")[0]}';
"""
execute_sql(query)

Unnamed: 0,job_id,application_name,schedule_interval,max_runtime,max_retries,retry_period,proc_schema,proc_name,owner,scheduled,fixed_schedule,config,next_start,initial_start,hypertable_schema,hypertable_name,check_schema,check_name
0,1005,Compression Policy [1005],0 days 12:00:00,0 days,-1,0 days 01:00:00,_timescaledb_functions,policy_compression,tsdbadmin,True,False,"{'hypertable_id': 15, 'compress_after': '1 mon'}",2025-04-23 16:38:55.578276+00:00,,weather,observations,_timescaledb_functions,policy_compression_check


### Hypertable Chunks

In [63]:
query = f"""
SELECT * FROM timescaledb_information.chunks
WHERE
hypertable_name = '{hypertable_dropdown.value.split(".")[1]}'
AND hypertable_schema = '{hypertable_dropdown.value.split(".")[0]}';
"""
#print(query)
execute_sql(query)

Unnamed: 0,hypertable_schema,hypertable_name,chunk_schema,chunk_name,primary_dimension,primary_dimension_type,range_start,range_end,range_start_integer,range_end_integer,is_compressed,chunk_tablespace,chunk_creation_time
0,weather,observations,_timescaledb_internal,_hyper_15_92_chunk,date,date,2024-05-17 00:00:00+00:00,2024-06-16 00:00:00+00:00,,,True,,2025-02-11 16:01:07.062051+00:00
1,weather,observations,_timescaledb_internal,_hyper_15_93_chunk,date,date,2024-06-16 00:00:00+00:00,2024-07-16 00:00:00+00:00,,,True,,2025-02-11 16:01:07.068429+00:00
2,weather,observations,_timescaledb_internal,_hyper_15_94_chunk,date,date,2024-01-18 00:00:00+00:00,2024-02-17 00:00:00+00:00,,,True,,2025-02-11 16:01:07.070860+00:00
3,weather,observations,_timescaledb_internal,_hyper_15_95_chunk,date,date,2024-11-13 00:00:00+00:00,2024-12-13 00:00:00+00:00,,,True,,2025-02-11 16:01:07.072299+00:00
4,weather,observations,_timescaledb_internal,_hyper_15_96_chunk,date,date,2024-08-15 00:00:00+00:00,2024-09-14 00:00:00+00:00,,,True,,2025-02-11 16:01:07.074522+00:00
5,weather,observations,_timescaledb_internal,_hyper_15_97_chunk,date,date,2024-12-13 00:00:00+00:00,2025-01-12 00:00:00+00:00,,,True,,2025-02-11 16:01:07.075679+00:00
6,weather,observations,_timescaledb_internal,_hyper_15_98_chunk,date,date,2024-02-17 00:00:00+00:00,2024-03-18 00:00:00+00:00,,,True,,2025-02-11 16:01:07.077175+00:00
7,weather,observations,_timescaledb_internal,_hyper_15_99_chunk,date,date,2023-12-19 00:00:00+00:00,2024-01-18 00:00:00+00:00,,,True,,2025-02-11 16:01:07.078772+00:00
8,weather,observations,_timescaledb_internal,_hyper_15_100_chunk,date,date,2024-04-17 00:00:00+00:00,2024-05-17 00:00:00+00:00,,,True,,2025-02-11 16:01:07.080361+00:00
9,weather,observations,_timescaledb_internal,_hyper_15_101_chunk,date,date,2024-09-14 00:00:00+00:00,2024-10-14 00:00:00+00:00,,,True,,2025-02-11 16:01:07.081741+00:00


## Continuous Aggregates

In [61]:
query = f"""
SELECT * FROM timescaledb_information.continuous_aggregates;
"""
df_cagg=execute_sql(query)
df_cagg

Unnamed: 0,hypertable_schema,hypertable_name,view_schema,view_name,view_owner,materialized_only,compression_enabled,materialization_hypertable_schema,materialization_hypertable_name,view_definition,finalized
0,public,cpu,public,cpu_1h,tsdbadmin,True,False,_timescaledb_internal,_materialized_hypertable_3,"SELECT time_bucket('01:00:00'::interval, ""tim...",True
1,public,test_table,public,test_table_daily,tsdbadmin,True,False,_timescaledb_internal,_materialized_hypertable_27,"SELECT time_bucket('1 day'::interval, time_ts...",True


In [62]:
#@title Select Continuous Aggregate
# Assuming 'df_cagg' is your DataFrame from the previous code
cagg_options = (df_cagg['view_schema'] + "." + df_cagg['view_name']).tolist()

# Create a dropdown widget
cagg_dropdown = widgets.Dropdown(
    options=cagg_options,
    value=cagg_options[0] if cagg_options else None,  # Set the first option as default
    description='Continuos Aggregate:',
    disabled=False,
)

# Display the dropdown
display(cagg_dropdown)

# Access the selected value
selected_cagg = cagg_dropdown.value

Dropdown(description='Continuos Aggregate:', options=('public.cpu_1h', 'public.test_table_daily'), value='publ…