<a href="https://colab.research.google.com/github/antonum/Timescale-Workshops/blob/main/Management/timescale-metadata.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Query Timescale Metadata

Collection of queries to analize timescale-specific entities such as Hypertables and Continuous Aggregates.

Documentation for APIs used in this notebook: https://docs.timescale.com/api/latest/

# Setup Timescale Connection

By default, this notebook installs Timescale right within the colab runtime with endpoint `"postgres://postgres:password@localhost/postgres"`. You can optionally use your own Timescale cloud instance endpoint.

Try Timescale Cloud for free at: https://console.cloud.timescale.com/signup

In [21]:
import os
### Default connection for in-notebook Timescale ###
TS_CONNECTION="postgres://postgres:password@localhost/postgres"

### Use environment variable ###
#TS_CONNECTION = os.getenv("TS_CONNECTION", "postgres://postgres:password@localhost/postgres")

### Use your own Timescale Cloud instance ###
#TS_CONNECTION="postgres://tsdbadmin:xxxxxxx.yyyyy.tsdb.cloud.timescale.com:39966/tsdb?sslmode=require"

### Use colab secret ###
from google.colab import userdata
#TS_CONNECTION=userdata.get('TS_DEMO')
TS_CONNECTION=userdata.get('TS_CONNECTION_DEMO')

### Set environment variable to be used in psql CLI ###
os.environ["TS_CONNECTION"]=TS_CONNECTION

In [None]:
#@title Install Timescale
%%bash
set -e # Exit immediately if a command exits with a non-zero status.

# --- Configuration ---
PG_VERSION="17"
PGVECTORSCALE_VERSION="0.7.0"
PG_PASSWORD="password" # Consider using a more secure password

echo "--- 1. Installing Prerequisites & Adding Repositories ---"
# Install essential packages quietly
apt-get -qq -y install gnupg postgresql-common apt-transport-https lsb-release wget > /dev/null 2>&1

# Add the official PostgreSQL repository
# The 'yes |' answers confirmation prompts automatically. Output redirected.
yes | /usr/share/postgresql-common/pgdg/apt.postgresql.org.sh > /dev/null 2>&1

# Add the TimescaleDB repository
echo "deb https://packagecloud.io/timescale/timescaledb/ubuntu/ $(lsb_release -c -s) main" | sudo tee /etc/apt/sources.list.d/timescaledb.list > /dev/null
# Add the TimescaleDB GPG key using the recommended method (avoids apt-key add)
wget --quiet -O - https://packagecloud.io/timescale/timescaledb/gpgkey | sudo gpg --dearmor -o /etc/apt/trusted.gpg.d/timescaledb.gpg

echo "--- 2. Updating Package List & Installing PostgreSQL + Extensions ---"
# Update package list quietly (should suppress apt-key warnings too)
apt-get -qq update > /dev/null 2>&1

# Install PostgreSQL, TimescaleDB, pgvector, toolkit, and client
apt-get -qq -y install \
  "timescaledb-2-postgresql-${PG_VERSION}" \
  "postgresql-client-${PG_VERSION}" \
  "postgresql-${PG_VERSION}-pgvector" \
  "timescaledb-toolkit-postgresql-${PG_VERSION}" > /dev/null 2>&1

echo "--- 3. Installing pgvectorscale ---"
# Download and install pgvectorscale
wget --quiet "https://github.com/timescale/pgvectorscale/releases/download/${PGVECTORSCALE_VERSION}/pgvectorscale-${PGVECTORSCALE_VERSION}-pg${PG_VERSION}-amd64.zip" -O pgvectorscale.zip
unzip -q pgvectorscale.zip # Use -q for quiet unzip
# Install the .deb package quietly
apt-get -qq -y install "./pgvectorscale-postgresql-${PG_VERSION}_${PGVECTORSCALE_VERSION}-Linux_amd64.deb" > /dev/null 2>&1

# Clean up downloaded files
rm pgvectorscale.zip "./pgvectorscale-postgresql-${PG_VERSION}_${PGVECTORSCALE_VERSION}-Linux_amd64.deb"

echo "--- 4. Configuring PostgreSQL & TimescaleDB ---"
# Tune PostgreSQL for TimescaleDB
timescaledb-tune --quiet --yes  > /dev/null 2>&1

# Restart PostgreSQL service to apply changes
service postgresql restart
sleep 2 # Give the service a moment to restart fully

echo "--- 5. Setting Up Database User and Extensions ---"
# Set the password for the default postgres user
sudo -u postgres psql -c "ALTER USER postgres PASSWORD '${PG_PASSWORD}'" > /dev/null

# Connect as the postgres user and create extensions quietly
psql -d "postgres://postgres:${PG_PASSWORD}@localhost/postgres" > /dev/null <<EOF
CREATE EXTENSION IF NOT EXISTS timescaledb CASCADE;
CREATE EXTENSION IF NOT EXISTS timescaledb_toolkit CASCADE;
CREATE EXTENSION IF NOT EXISTS vector CASCADE;
CREATE EXTENSION IF NOT EXISTS vectorscale CASCADE;
EOF

echo "--- Installation and Setup Complete ---"



In [None]:
# Optional: Verify extensions are installed
#!psql -d $TS_CONNECTION -c '\dx'

In [22]:
#@title Init psycopg2 connection to Timescale
import pandas as pd
import psycopg2

# establish connection to Timescale
conn = psycopg2.connect(TS_CONNECTION)
cursor = conn.cursor()

# helper function to convert SQL Results to the dataframe
def execute_sql(query, cursor=cursor):
    try:
        cursor.execute(query)
        conn.commit()
        # Check if query returns data (SELECT)
        if cursor.description:  # If description is not None, query returned data
            columns = [desc[0] for desc in cursor.description]
            data = cursor.fetchall()
            df = pd.DataFrame(data, columns=columns)
            return df
        else:
            # Query was likely INSERT, CREATE TABLE, UPDATE, DELETE, etc.
            return f"Rows affected: {cursor.rowcount}"  # Return the number of rows affected

    except psycopg2.Error as e:
        print(f"Error executing SQL query: {e}")
        conn.rollback()  # Rollback changes in case of error
        return None  # Or raise the exception if you prefer

## Hypertables

In [23]:
query = """
SELECT * from timescaledb_information.hypertables
"""
df_ht=execute_sql(query)
df_ht

Unnamed: 0,hypertable_schema,hypertable_name,owner,num_dimensions,num_chunks,compression_enabled,tablespaces
0,public,test_table,tsdbadmin,1,3,False,
1,public,crypto_transactions,tsdbadmin,1,1,True,
2,public,ev_charger_telemetry_demo,tsdbadmin,1,0,True,
3,public,vector_th,tsdbadmin,1,2,False,
4,public,ev_charger_telemetry,tsdbadmin,1,579,True,
5,public,og_production_data,tsdbadmin,1,49,True,
6,public,ticks,tsdbadmin,1,0,False,
7,public,fin_stocks_real_time,tsdbadmin,1,5,True,
8,public,nyc_rides,tsdbadmin,3,22,True,
9,public,transactions,tsdbadmin,1,1,False,


In [24]:
#@title Select Hypertable
# prompt: dropdown box allowing select 	hypertable_schema.hypertable_name from the df

import ipywidgets as widgets

# Assuming 'df' is your DataFrame from the previous code
hypertable_options = (df_ht['hypertable_schema'] + "." + df_ht['hypertable_name']).tolist()

# Create a dropdown widget
hypertable_dropdown = widgets.Dropdown(
    options=hypertable_options,
    value=hypertable_options[0] if hypertable_options else None,  # Set the first option as default
    description='Hypertable:',
    disabled=False,
)

# Display the dropdown
display(hypertable_dropdown)

# Access the selected value
selected_hypertable = hypertable_dropdown.value
#print(f"Selected hypertable: {selected_hypertable}")


Dropdown(description='Hypertable:', options=('public.test_table', 'public.crypto_transactions', 'public.ev_cha…

### Hypertable size before/after compression

In [25]:
query = f"""
SELECT
    '{hypertable_dropdown.value}' AS hypertable,
    pg_size_pretty(before_compression_total_bytes) AS before_compression,
    pg_size_pretty(after_compression_total_bytes) AS after_compression
FROM hypertable_compression_stats('{hypertable_dropdown.value}');
"""
#print(query)
execute_sql(query)


Unnamed: 0,hypertable,before_compression,after_compression
0,public.ev_charger_telemetry,230 GB,60 GB


### Policies for the hypertable

In [26]:
query = f"""
SELECT * FROM timescaledb_information.jobs
WHERE
  hypertable_name = '{hypertable_dropdown.value.split(".")[1]}'
  AND hypertable_schema = '{hypertable_dropdown.value.split(".")[0]}';
"""
execute_sql(query)

Unnamed: 0,job_id,application_name,schedule_interval,max_runtime,max_retries,retry_period,proc_schema,proc_name,owner,scheduled,fixed_schedule,config,next_start,initial_start,hypertable_schema,hypertable_name,check_schema,check_name
0,1055,Compression Policy [1055],0 days 12:00:00,0 days,-1,0 days 01:00:00,_timescaledb_functions,policy_compression,tsdbadmin,True,False,"{'hypertable_id': 128, 'compress_after': '441 ...",2025-04-24 02:30:52.227331+00:00,NaT,public,ev_charger_telemetry,_timescaledb_functions,policy_compression_check
1,1056,User-Defined Action [1056],0 days 01:00:00,0 days,-1,0 days 00:05:00,public,policy_movechunk_to_s3,tsdbadmin,True,True,"{'move_after': '630 days', 'hypertable_id': 128}",2025-04-23 22:22:27.131223+00:00,2025-03-19 05:22:27.131223+00:00,public,ev_charger_telemetry,,


### Hypertable Chunks

In [27]:
query = f"""
SELECT * FROM timescaledb_information.chunks
WHERE
hypertable_name = '{hypertable_dropdown.value.split(".")[1]}'
AND hypertable_schema = '{hypertable_dropdown.value.split(".")[0]}'
  ORDER BY chunk_name;
"""
#print(query)
execute_sql(query)

Unnamed: 0,hypertable_schema,hypertable_name,chunk_schema,chunk_name,primary_dimension,primary_dimension_type,range_start,range_end,range_start_integer,range_end_integer,is_compressed,chunk_tablespace,chunk_creation_time
0,public,ev_charger_telemetry,_timescaledb_internal,_hyper_128_23885_chunk,measurement_timestamp,timestamp with time zone,2023-08-02 00:00:00+00:00,2023-08-03 00:00:00+00:00,,,True,,2025-03-12 20:53:51.305932+00:00
1,public,ev_charger_telemetry,_timescaledb_internal,_hyper_128_23887_chunk,measurement_timestamp,timestamp with time zone,2023-08-03 00:00:00+00:00,2023-08-04 00:00:00+00:00,,,True,,2025-03-12 20:53:53.081590+00:00
2,public,ev_charger_telemetry,_timescaledb_internal,_hyper_128_23888_chunk,measurement_timestamp,timestamp with time zone,2023-08-04 00:00:00+00:00,2023-08-05 00:00:00+00:00,,,True,,2025-03-12 20:53:53.482059+00:00
3,public,ev_charger_telemetry,_timescaledb_internal,_hyper_128_23890_chunk,measurement_timestamp,timestamp with time zone,2023-08-05 00:00:00+00:00,2023-08-06 00:00:00+00:00,,,True,,2025-03-12 20:53:55.387776+00:00
4,public,ev_charger_telemetry,_timescaledb_internal,_hyper_128_23891_chunk,measurement_timestamp,timestamp with time zone,2023-08-06 00:00:00+00:00,2023-08-07 00:00:00+00:00,,,True,,2025-03-12 20:54:12.030313+00:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...
574,public,ev_charger_telemetry,_timescaledb_internal,_hyper_128_24468_chunk,measurement_timestamp,timestamp with time zone,2025-02-24 00:00:00+00:00,2025-02-25 00:00:00+00:00,,,False,,2025-03-14 00:13:20.562011+00:00
575,public,ev_charger_telemetry,_timescaledb_internal,_hyper_128_24469_chunk,measurement_timestamp,timestamp with time zone,2025-02-25 00:00:00+00:00,2025-02-26 00:00:00+00:00,,,False,,2025-03-14 00:13:21.226618+00:00
576,public,ev_charger_telemetry,_timescaledb_internal,_hyper_128_24470_chunk,measurement_timestamp,timestamp with time zone,2025-02-28 00:00:00+00:00,2025-03-01 00:00:00+00:00,,,False,,2025-03-14 00:13:21.960517+00:00
577,public,ev_charger_telemetry,_timescaledb_internal,_hyper_128_24471_chunk,measurement_timestamp,timestamp with time zone,2025-02-27 00:00:00+00:00,2025-02-28 00:00:00+00:00,,,False,,2025-03-14 00:13:22.019490+00:00


In [28]:
query = f"""
SELECT * FROM chunks_detailed_size('{hypertable_dropdown.value}')
  ORDER BY chunk_name;
"""
#print(query)
execute_sql(query)


Unnamed: 0,chunk_schema,chunk_name,table_bytes,index_bytes,toast_bytes,total_bytes,node_name
0,_timescaledb_internal,_hyper_128_23885_chunk,712704,147456,180666368,181526528,
1,_timescaledb_internal,_hyper_128_23887_chunk,720896,147456,185114624,185982976,
2,_timescaledb_internal,_hyper_128_23888_chunk,696320,147456,177651712,178495488,
3,_timescaledb_internal,_hyper_128_23890_chunk,737280,155648,186744832,187637760,
4,_timescaledb_internal,_hyper_128_23891_chunk,770048,155648,196575232,197500928,
...,...,...,...,...,...,...,...
574,_timescaledb_internal,_hyper_128_24468_chunk,338329600,365420544,8192,703758336,
575,_timescaledb_internal,_hyper_128_24469_chunk,346365952,374022144,8192,720396288,
576,_timescaledb_internal,_hyper_128_24470_chunk,364085248,399876096,8192,763969536,
577,_timescaledb_internal,_hyper_128_24471_chunk,364560384,390815744,8192,755384320,


## Continuous Aggregates

In [29]:
query = f"""
SELECT * FROM timescaledb_information.continuous_aggregates;
"""
df_cagg=execute_sql(query)
df_cagg

Unnamed: 0,hypertable_schema,hypertable_name,view_schema,view_name,view_owner,materialized_only,compression_enabled,materialization_hypertable_schema,materialization_hypertable_name,view_definition,finalized
0,public,nyc_rides,public,nyc_ride_stats_by_hour,tsdbadmin,True,False,_timescaledb_internal,_materialized_hypertable_3,"SELECT time_bucket('01:00:00'::interval, pick...",True
1,public,fin_stocks_real_time,public,fin_one_day_candle,tsdbadmin,True,False,_timescaledb_internal,_materialized_hypertable_5,"SELECT time_bucket('1 day'::interval, ""time"")...",True
2,public,energy_metrics,public,energy_kwh_day_by_day,tsdbadmin,True,False,_timescaledb_internal,_materialized_hypertable_8,"SELECT time_bucket('1 day'::interval, created...",True
3,public,energy_metrics,public,energy_kwh_hour_by_hour,tsdbadmin,True,False,_timescaledb_internal,_materialized_hypertable_9,"SELECT time_bucket('01:00:00'::interval, crea...",True
4,public,crypto_transactions,public,crypto_one_hour_transactions,tsdbadmin,True,False,_timescaledb_internal,_materialized_hypertable_13,"SELECT time_bucket('01:00:00'::interval, ""tim...",True
5,public,crypto_transactions,public,crypto_one_hour_blocks,tsdbadmin,True,False,_timescaledb_internal,_materialized_hypertable_14,"SELECT time_bucket('01:00:00'::interval, ""tim...",True
6,public,crypto_transactions,public,crypto_one_hour_coinbase,tsdbadmin,True,False,_timescaledb_internal,_materialized_hypertable_15,"SELECT time_bucket('01:00:00'::interval, ""tim...",True
7,public,og_production_data,public,og_production_data_monthly,tsdbadmin,True,False,_timescaledb_internal,_materialized_hypertable_42,"SELECT time_bucket('1 mon'::interval, ""timest...",True
8,public,nyc_rides,public,nyc_payment_type_ride_stats_by_hour,tsdbadmin,True,True,_timescaledb_internal,_materialized_hypertable_55,"SELECT time_bucket('01:00:00'::interval, pick...",True
9,public,ev_charger_telemetry,public,charging_summary_daily,tsdbadmin,False,False,_timescaledb_internal,_materialized_hypertable_131,"SELECT time_bucket('1 day'::interval, measure...",True


In [30]:
#@title Select Continuous Aggregate
# Assuming 'df_cagg' is your DataFrame from the previous code
cagg_options = (df_cagg['view_schema'] + "." + df_cagg['view_name']).tolist()

# Create a dropdown widget
cagg_dropdown = widgets.Dropdown(
    options=cagg_options,
    value=cagg_options[0] if cagg_options else None,  # Set the first option as default
    description='Continuos Aggregate:',
    disabled=False,
)

# Display the dropdown
display(cagg_dropdown)

# Access the selected value
selected_cagg = cagg_dropdown.value

Dropdown(description='Continuos Aggregate:', options=('public.nyc_ride_stats_by_hour', 'public.fin_one_day_can…

### Show CAGG Policies

In [31]:
query = f"""
SELECT timescaledb_experimental.show_policies('{cagg_dropdown.value}');
"""
#print(query)
execute_sql(query)


Unnamed: 0,show_policies
0,{'policy_name': 'policy_refresh_continuous_agg...
