# Experiments with timestamps

We will do different experiment:

1. duckdb to duckdb

### 1. duckdb to duckdb

We will start creating a duckdb table using TIMESTAMPZ type, with the timezone session set to America/Los Angeles.

In [18]:
import duckdb
import pandas as pd

# Connect to the DuckDB database
conn = duckdb.connect('source.db')

# Create a table and insert data
conn.execute('''
SET TimeZone = 'America/Los_Angeles';
CREATE TABLE IF NOT EXISTS events (
    event_id INTEGER,
    event_tstamp TIMESTAMPTZ
);
DELETE FROM events;
INSERT INTO events (event_id, event_tstamp) VALUES
  (1, '2024-07-30 10:00:00.123+00:00'),
  (2, '2024-07-30 10:00:00.123456+02:00');
''')

# Fetch the results and load into a Pandas DataFrame
results = conn.execute('SELECT * FROM events;').fetchdf()
print(results)

conn.close()

   event_id                     event_tstamp
0         1 2024-07-30 03:00:00.123000-07:00
1         2 2024-07-30 01:00:00.123456-07:00


In [24]:
results.to_dict(orient="records")

[{'event_id': 1,
  'event_tstamp': Timestamp('2024-07-30 03:00:00.123000-0700', tz='America/Los_Angeles')},
 {'event_id': 2,
  'event_tstamp': Timestamp('2024-07-30 01:00:00.123456-0700', tz='America/Los_Angeles')}]

#### 1.1 Timezone flag set to NONE

Now we are doing to use dlt to load data from duckdb source.db to another duckdb instance.

In [54]:
import dlt
import duckdb

# Connect to the DuckDB database
conn = duckdb.connect('source.db')

source_df = conn.execute('SELECT * FROM events;').fetchdf()

pipeline = dlt.pipeline(
  pipeline_name='ducktoduck',
  destination='duckdb',
)

load_info = pipeline.run(source_df.to_dict(orient="records"),table_name='events')

We will now query the database created in the pipeline `ducktoduck.duckdb`

Duckdb seems to always store timestamps in UT and only uses the TimeZone seesion to transform timestamps when reading or writing from db.

In [55]:
import duckdb
import pandas as pd

# Connect to the DuckDB database
conn = duckdb.connect('ducktoduck.duckdb')

results = conn.execute('''
SET TimeZone = 'UTC';
SELECT event_id,event_tstamp FROM ducktoduck_dataset.events;
''').fetchdf()
print(results)

describe = conn.execute('DESCRIBE ducktoduck_dataset.events').fetchdf()

print (describe)
conn.close()

   event_id                     event_tstamp
0         1 2024-07-30 10:00:00.123000+00:00
1         2 2024-07-30 08:00:00.123456+00:00
2         1 2024-07-30 10:00:00.123000+00:00
3         2 2024-07-30 08:00:00.123456+00:00
    column_name               column_type null   key default extra
0      event_id                    BIGINT  YES  None    None  None
1  event_tstamp  TIMESTAMP WITH TIME ZONE  YES  None    None  None
2  _dlt_load_id                   VARCHAR   NO  None    None  None
3       _dlt_id                   VARCHAR   NO  None    None  None


In [56]:
#### 1.1 Timezone flag set to FALSE

In [3]:
import dlt
import duckdb

# Connect to the DuckDB database
conn = duckdb.connect('source.db')

source_df = conn.execute('SELECT * FROM events;').fetchdf()

pipeline = dlt.pipeline(
  pipeline_name='ducktoducktimezoneoff',
  destination='duckdb',
)

load_info = pipeline.run(source_df.to_dict(orient="records"),table_name='events',columns=[{"name": "event_tstamp", "data_type": "timestamp", "timezone": False}])

In [4]:
import duckdb
import pandas as pd

# Connect to the DuckDB database
conn = duckdb.connect('ducktoducktimezoneoff.duckdb')

results = conn.execute('''
SET TimeZone = 'UTC';
SELECT event_id,event_tstamp FROM ducktoducktimezoneoff_dataset.events;
''').fetchdf()
print(results)

describe = conn.execute('DESCRIBE ducktoducktimezoneoff_dataset.events').fetchdf()

print (describe)
conn.close()

   event_id            event_tstamp
0         1 2024-07-30 10:00:00.123
1         2 2024-07-30 08:00:00.123
2         1 2024-07-30 10:00:00.123
3         2 2024-07-30 08:00:00.123
4         1 2024-07-30 10:00:00.123
5         2 2024-07-30 08:00:00.123
6         1 2024-07-30 10:00:00.123
7         2 2024-07-30 08:00:00.123
    column_name   column_type null   key default extra
0  event_tstamp  TIMESTAMP_MS  YES  None    None  None
1      event_id        BIGINT  YES  None    None  None
2  _dlt_load_id       VARCHAR   NO  None    None  None
3       _dlt_id       VARCHAR   NO  None    None  None


In [None]:
import dlt

# source



# destination
postgres = dlt.destinations.postgres("postgresql://loader:loader@localhost/dlt_data")

# pipeline
pipeline = dlt.pipeline(
  pipeline_name='chess',
  destination=postgres
)