-
Notifications
You must be signed in to change notification settings - Fork 36
/
matomo_pipeline.py
91 lines (78 loc) 路 2.43 KB
/
matomo_pipeline.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
"""Contains functions that run the matomo pipeline."""
from matomo import matomo_reports, matomo_visits
import dlt
def run_full_load() -> None:
"""
Does a basic run of the pipeline.
"""
pipeline_reports = dlt.pipeline(
dataset_name="matomo_full_load",
export_schema_path="schemas/export",
full_refresh=False,
destination="postgres",
pipeline_name="matomo",
)
data_reports = matomo_reports()
data_events = matomo_visits()
info = pipeline_reports.run([data_reports, data_events])
print(info)
def run_custom_reports() -> None:
"""
Defines some custom reports you can use and shows how to use for different custom reports
"""
queries = [
{
"resource_name": "custom_report_name",
"methods": ["CustomReports.getCustomReport"],
"date": "2020-01-01",
"period": "day",
"extra_params": {"idCustomReport": 1},
},
{
"resource_name": "custom_report_name2",
"methods": ["CustomReports.getCustomReport"],
"date": "2020-01-01",
"period": "day",
"extra_params": {"idCustomReport": 2},
},
]
site_id = 3
pipeline_reports = dlt.pipeline(
dataset_name="matomo_custom_reports",
full_refresh=False,
destination="postgres",
pipeline_name="matomo",
)
data = matomo_reports(queries=queries, site_id=site_id)
info = pipeline_reports.run(data)
print(info)
def run_reports() -> None:
"""
Runs the pipeline only loading reports.
"""
# site id can also be assigned explicitly. Default is to read from config.toml
site_id = 3
pipeline_reports = dlt.pipeline(
dataset_name="matomo_reports",
full_refresh=False,
destination="postgres",
pipeline_name="matomo",
)
data = matomo_reports(site_id=site_id)
info = pipeline_reports.run(data)
print(info)
def run_live_events() -> None:
"""
Runs the pipeline loading live visits and visitors data, getting only todays data
"""
pipeline_events = dlt.pipeline(
dataset_name="matomo_events",
full_refresh=False,
destination="duckdb",
pipeline_name="matomo",
)
data = matomo_visits(initial_load_past_days=1, get_live_event_visitors=True)
info = pipeline_events.run(data)
print(info)
if __name__ == "__main__":
run_live_events()