In [1]:
import dlt
from dlt.sources.helpers.rest_client import RESTClient
from dlt.sources.helpers.rest_client.auth import BearerTokenAuth
from dlt.sources.helpers.rest_client.paginators import HeaderLinkPaginator

events_data = []

@dlt.resource(name="events")
def paginated_getter(
        access_token=dlt.secrets.value
):
    client = RESTClient(
        base_url="https://api.github.com/repos/DataTalksClub/data-engineering-zoomcamp/",
        auth=BearerTokenAuth(token=access_token),
        paginator=HeaderLinkPaginator(links_next_key="next")
    )

    for page in client.paginate("events"):
        yield page


for page_data in paginated_getter():
    events_data.append(page_data)
    print(page_data)

  import pkg_resources


{'id': '51755277396', 'type': 'WatchEvent', 'actor': {'id': 57673634, 'login': 'mohamed33389iojoj', 'display_login': 'mohamed33389iojoj', 'gravatar_id': '', 'url': 'https://api.github.com/users/mohamed33389iojoj', 'avatar_url': 'https://avatars.githubusercontent.com/u/57673634?'}, 'repo': {'id': 419661684, 'name': 'DataTalksClub/data-engineering-zoomcamp', 'url': 'https://api.github.com/repos/DataTalksClub/data-engineering-zoomcamp'}, 'payload': {'action': 'started'}, 'public': True, 'created_at': '2025-07-07T03:41:13Z', 'org': {'id': 72699292, 'login': 'DataTalksClub', 'gravatar_id': '', 'url': 'https://api.github.com/orgs/DataTalksClub', 'avatar_url': 'https://avatars.githubusercontent.com/u/72699292?'}}
{'id': '51749081250', 'type': 'WatchEvent', 'actor': {'id': 76108764, 'login': 'Khushi-vyas', 'display_login': 'Khushi-vyas', 'gravatar_id': '', 'url': 'https://api.github.com/users/Khushi-vyas', 'avatar_url': 'https://avatars.githubusercontent.com/u/76108764?'}, 'repo': {'id': 41966

In [2]:
import dlt

pipeline = dlt.pipeline(
    pipeline_name="github_data",
    destination="duckdb",
    dataset_name="events"
)

info = pipeline.run(paginated_getter, table_name="events", write_disposition="replace")

In [4]:
print(pipeline.dataset(dataset_type="default").schema.data_table_names())
print(pipeline.dataset(dataset_type="default").schema)

['events', 'events__payload__pull_request__base__repo__topics', 'events__payload__commits']
Schema github_data at 4471272784


In [8]:
import numpy as np
import pandas as pd
pipeline.dataset(dataset_type="default").events.df()

Unnamed: 0,id,type,actor__id,actor__login,actor__display_login,actor__gravatar_id,actor__url,actor__avatar_url,repo__id,repo__name,...,payload__pull_request__additions,payload__pull_request__deletions,payload__pull_request__changed_files,payload__repository_id,payload__push_id,payload__size,payload__distinct_size,payload__ref,payload__head,payload__before
0,51755277396,WatchEvent,57673634,mohamed33389iojoj,mohamed33389iojoj,,https://api.github.com/users/mohamed33389iojoj,https://avatars.githubusercontent.com/u/57673634?,419661684,DataTalksClub/data-engineering-zoomcamp,...,,,,,,,,,,
1,51749081250,WatchEvent,76108764,Khushi-vyas,Khushi-vyas,,https://api.github.com/users/Khushi-vyas,https://avatars.githubusercontent.com/u/76108764?,419661684,DataTalksClub/data-engineering-zoomcamp,...,,,,,,,,,,
2,51746240597,WatchEvent,161612079,zenooodk,zenooodk,,https://api.github.com/users/zenooodk,https://avatars.githubusercontent.com/u/161612...,419661684,DataTalksClub/data-engineering-zoomcamp,...,,,,,,,,,,
3,51745236353,ForkEvent,86552976,sridhar1962,sridhar1962,,https://api.github.com/users/sridhar1962,https://avatars.githubusercontent.com/u/86552976?,419661684,DataTalksClub/data-engineering-zoomcamp,...,,,,,,,,,,
4,51745228527,WatchEvent,86552976,sridhar1962,sridhar1962,,https://api.github.com/users/sridhar1962,https://avatars.githubusercontent.com/u/86552976?,419661684,DataTalksClub/data-engineering-zoomcamp,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
291,51255055115,WatchEvent,44954796,michaelxzh,michaelxzh,,https://api.github.com/users/michaelxzh,https://avatars.githubusercontent.com/u/44954796?,419661684,DataTalksClub/data-engineering-zoomcamp,...,,,,,,,,,,
292,51253598547,WatchEvent,157688381,Thecoderwithavibe,Thecoderwithavibe,,https://api.github.com/users/Thecoderwithavibe,https://avatars.githubusercontent.com/u/157688...,419661684,DataTalksClub/data-engineering-zoomcamp,...,,,,,,,,,,
293,51253388468,WatchEvent,1024712,philster,philster,,https://api.github.com/users/philster,https://avatars.githubusercontent.com/u/1024712?,419661684,DataTalksClub/data-engineering-zoomcamp,...,,,,,,,,,,
294,51251212625,WatchEvent,78709093,Erick-Rom,Erick-Rom,,https://api.github.com/users/Erick-Rom,https://avatars.githubusercontent.com/u/78709093?,419661684,DataTalksClub/data-engineering-zoomcamp,...,,,,,,,,,,


In [9]:
pipeline.dataset(dataset_type="default").events__payload__pull_request__base__repo__topics.df()

Unnamed: 0,value,_dlt_parent_id,_dlt_list_idx,_dlt_id
0,data-engineering,JBF6FEKxnMKC2g,0,LpgS5jrvZ/W4EQ
1,dbt,JBF6FEKxnMKC2g,1,EwgdhkpqlXcRwQ
2,docker,JBF6FEKxnMKC2g,2,Ul7SuJn2BWmKJg
3,kafka,JBF6FEKxnMKC2g,3,m0kw1Yb8q/IZrA
4,kestra,JBF6FEKxnMKC2g,4,1Tzpioj1GXnacQ
5,spark,JBF6FEKxnMKC2g,5,O3z+2ddZrWCYSw


In [11]:
sql = """
select * 
from events e 
join events__payload__pull_request__base__repo__topics c
on e._dlt_id = c._dlt_parent_id
"""

with pipeline.sql_client() as client:
    with client.execute_query(sql) as cursor:
        data = cursor.df()
        
data

Unnamed: 0,id,type,actor__id,actor__login,actor__display_login,actor__gravatar_id,actor__url,actor__avatar_url,repo__id,repo__name,...,payload__push_id,payload__size,payload__distinct_size,payload__ref,payload__head,payload__before,value,_dlt_parent_id,_dlt_list_idx,_dlt_id_1
0,51586324900,PullRequestEvent,206166622,chaiandmushrooms,chaiandmushrooms,,https://api.github.com/users/chaiandmushrooms,https://avatars.githubusercontent.com/u/206166...,419661684,DataTalksClub/data-engineering-zoomcamp,...,,,,,,,spark,JBF6FEKxnMKC2g,5,O3z+2ddZrWCYSw
1,51586324900,PullRequestEvent,206166622,chaiandmushrooms,chaiandmushrooms,,https://api.github.com/users/chaiandmushrooms,https://avatars.githubusercontent.com/u/206166...,419661684,DataTalksClub/data-engineering-zoomcamp,...,,,,,,,kestra,JBF6FEKxnMKC2g,4,1Tzpioj1GXnacQ
2,51586324900,PullRequestEvent,206166622,chaiandmushrooms,chaiandmushrooms,,https://api.github.com/users/chaiandmushrooms,https://avatars.githubusercontent.com/u/206166...,419661684,DataTalksClub/data-engineering-zoomcamp,...,,,,,,,kafka,JBF6FEKxnMKC2g,3,m0kw1Yb8q/IZrA
3,51586324900,PullRequestEvent,206166622,chaiandmushrooms,chaiandmushrooms,,https://api.github.com/users/chaiandmushrooms,https://avatars.githubusercontent.com/u/206166...,419661684,DataTalksClub/data-engineering-zoomcamp,...,,,,,,,docker,JBF6FEKxnMKC2g,2,Ul7SuJn2BWmKJg
4,51586324900,PullRequestEvent,206166622,chaiandmushrooms,chaiandmushrooms,,https://api.github.com/users/chaiandmushrooms,https://avatars.githubusercontent.com/u/206166...,419661684,DataTalksClub/data-engineering-zoomcamp,...,,,,,,,dbt,JBF6FEKxnMKC2g,1,EwgdhkpqlXcRwQ
5,51586324900,PullRequestEvent,206166622,chaiandmushrooms,chaiandmushrooms,,https://api.github.com/users/chaiandmushrooms,https://avatars.githubusercontent.com/u/206166...,419661684,DataTalksClub/data-engineering-zoomcamp,...,,,,,,,data-engineering,JBF6FEKxnMKC2g,0,LpgS5jrvZ/W4EQ


In [12]:
print(pipeline.default_schema.to_pretty_yaml())

version: 2
version_hash: MU117qR+DOYaB0UF3x1IdKtNR7WKJawmf3jI2hw9Me8=
engine_version: 11
name: github_data
tables:
  _dlt_version:
    columns:
      version:
        data_type: bigint
        nullable: false
      engine_version:
        data_type: bigint
        nullable: false
      inserted_at:
        data_type: timestamp
        nullable: false
      schema_name:
        data_type: text
        nullable: false
      version_hash:
        data_type: text
        nullable: false
      schema:
        data_type: text
        nullable: false
    write_disposition: skip
    resource: _dlt_version
    description: Created by DLT. Tracks schema updates
  _dlt_loads:
    columns:
      load_id:
        data_type: text
        nullable: false
      schema_name:
        data_type: text
        nullable: true
      status:
        data_type: bigint
        nullable: false
      inserted_at:
        data_type: timestamp
        nullable: false
      schema_version_hash:
        data_type: te