# Personio demo

## Installation
Install dlt using pip:

In [1]:
! pip install -q "dlt[duckdb]"
! dlt --version

dlt 0.3.25


Install additional dependencies for this demo:

In [15]:
!pip install -q pandas

## Initialize the verified source
This command will initialize the pipeline example with Personio as the source and `duckdb` as the destination:

In [2]:
! dlt --non-interactive init personio duckdb

Looking up the init scripts in [1mhttps://github.com/dlt-hub/verified-sources.git[0m...
No files to update, exiting


## Add credentials
Set credentials in the `.dlt/secrets.toml` file

```toml
[sources.personio]
client_id = "client_id" # please set me up!
client_secret = "client_secret" # please set me up!
```

## Create the pipeline

Define the pipeline with `dlt.pipleine()` and look what resources are available for `personio_source`:

In [3]:
"""Pipeline to load personio data into Duckdb."""
import dlt
from personio import personio_source

pipeline = dlt.pipeline(
    pipeline_name="personio", destination='duckdb', dataset_name="personio_data"
)
personio_data = personio_source()
personio_data.resources.keys()

dict_keys(['employees', 'absence_types', 'absences', 'attendances', 'projects', 'document_categories', 'employees_absences_balance', 'custom_reports_list', 'custom_reports'])

Let's load only "employees", "absences" and "attendances" resources:

In [4]:
resources = ["employees", "absences", "attendances"]
load_info = pipeline.run(
    personio_data.with_resources(*resources),
)
print(load_info)

Pipeline personio completed in 38.59 seconds
1 load package(s) were loaded to destination duckdb and into dataset personio_data
The duckdb destination used duckdb:////home/alenaastrakhantseva/dlthub/dlt_demos/personio.duckdb location to store data
Load package 1702567952.01384 is LOADED and contains no failed jobs


In [5]:
import duckdb
# a database 'chess_pipeline.duckdb' was created in working directory so just connect to it
conn = duckdb.connect(f"{pipeline.pipeline_name}.duckdb")

# this lets us query data without adding schema prefix to table names
conn.sql(f"SET search_path = '{pipeline.dataset_name}'")

# list all tables
display(conn.sql("DESCRIBE"))

┌──────────┬──────────────────────┬──────────────────────┬──────────────────────┬──────────────────────────┬───────────┐
│ database │        schema        │         name         │     column_names     │       column_types       │ temporary │
│ varchar  │       varchar        │       varchar        │      varchar[]       │        varchar[]         │  boolean  │
├──────────┼──────────────────────┼──────────────────────┼──────────────────────┼──────────────────────────┼───────────┤
│ personio │ personio_data        │ _dlt_loads           │ [load_id, schema_n…  │ [VARCHAR, VARCHAR, BIG…  │ false     │
│ personio │ personio_data        │ _dlt_pipeline_state  │ [version, engine_v…  │ [BIGINT, BIGINT, VARCH…  │ false     │
│ personio │ personio_data        │ _dlt_version         │ [version, engine_v…  │ [BIGINT, BIGINT, TIMES…  │ false     │
│ personio │ personio_data        │ absences             │ [id, status, comme…  │ [BIGINT, VARCHAR, VARC…  │ false     │
│ personio │ personio_data      

In [9]:
stats_table = conn.sql("SELECT * FROM absences").df()
display(stats_table)

Unnamed: 0,id,status,comment,start_date,end_date,days_count,half_day_start,half_day_end,time_off_type__type,time_off_type__attributes__id,...,employee__attributes__email__value,employee__attributes__email__type,employee__attributes__email__universal_id,created_by,certificate__status,created_at,updated_at,_dlt_load_id,_dlt_id,days_count__v_double
0,590798986,approved,,2020-05-07 00:00:00+02:00,2020-05-14 00:00:00+02:00,6.0,0,0,TimeOffType,2526184,...,anibal.rubi@demo-sample.com,standard,email,,not-required,2020-05-19 13:59:43+02:00,2022-11-09 10:34:43+01:00,1702567952.01384,IZHxe+tnBt8BVg,
1,590799031,approved,,2020-10-05 00:00:00+02:00,2020-10-09 00:00:00+02:00,5.0,0,0,TimeOffType,2526184,...,leonard.ennis@demo-sample.com,standard,email,,not-required,2020-05-19 13:59:53+02:00,2022-11-09 10:34:43+01:00,1702567952.01384,Rcbm+wz67uDM2w,
2,590799032,approved,,2020-12-15 00:00:00+01:00,2020-12-18 00:00:00+01:00,4.0,0,0,TimeOffType,2526184,...,leonard.ennis@demo-sample.com,standard,email,,not-required,2020-05-19 13:59:53+02:00,2022-11-09 10:34:43+01:00,1702567952.01384,tecsUhu6MxC75A,
3,590799042,approved,,2020-03-02 00:00:00+01:00,2020-03-15 00:00:00+01:00,10.0,0,0,TimeOffType,2526184,...,max.schmiedel@demo-sample.com,standard,email,,not-required,2020-05-19 13:59:56+02:00,2022-11-09 10:34:43+01:00,1702567952.01384,gOZxs6IwQy90GA,
4,590799052,approved,,2020-04-06 00:00:00+02:00,2020-04-08 00:00:00+02:00,3.0,0,0,TimeOffType,2526184,...,rene.storch@demo-sample.com,standard,email,,not-required,2020-05-19 13:59:58+02:00,2022-11-09 10:34:43+01:00,1702567952.01384,zH79HlUndH3tJw,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1097,590800092,approved,,2023-02-03 00:00:00+01:00,2023-02-03 00:00:00+01:00,1.0,0,0,TimeOffType,2526183,...,susan.neu@demo-sample.com,standard,email,,not-required,2023-03-30 19:36:54+02:00,2023-03-30 19:36:54+02:00,1702567952.01384,c/vFwZr9s+z4Lw,
1098,590800100,approved,,2023-02-03 00:00:00+01:00,2023-02-03 00:00:00+01:00,1.0,0,0,TimeOffType,2526182,...,cristi.widmer@demo-sample.com,standard,email,,not-required,2023-03-30 19:36:55+02:00,2023-03-30 19:36:55+02:00,1702567952.01384,yyX1GC30W0ztAQ,
1099,590800113,approved,,2023-05-16 00:00:00+02:00,2023-05-16 00:00:00+02:00,1.0,0,0,TimeOffType,2526183,...,adrien.soza@demo-sample.com,standard,email,,not-required,2023-07-03 13:29:10+02:00,2023-07-03 13:29:10+02:00,1702567952.01384,cyCuZAu/N9GErw,
1100,590800132,approved,,2023-04-11 00:00:00+02:00,2023-04-11 00:00:00+02:00,1.0,0,0,TimeOffType,2526183,...,laura.erdmann@demo-sample.com,standard,email,,not-required,2023-07-03 13:29:15+02:00,2023-07-03 13:29:15+02:00,1702567952.01384,P+pLBR+6BMJksQ,
