-
Notifications
You must be signed in to change notification settings - Fork 8
/
landing_pages_validator.py
40 lines (31 loc) · 1.34 KB
/
landing_pages_validator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import pydatajson.custom_exceptions as ce
from pydatajson import threading_helper
from pydatajson.validators.url_validator import UrlValidator
class LandingPagesValidator(UrlValidator):
def validate(self):
datasets = self.catalog.get('dataset')
datasets = filter(lambda x: x.get('landingPage'), datasets)
metadata = []
urls = []
for dataset_idx, dataset in enumerate(datasets):
metadata.append({
"dataset_idx": dataset_idx,
"dataset_title": dataset.get('title'),
"landing_page": dataset.get('landingPage'),
})
urls.append(dataset.get('landingPage'))
sync_res = threading_helper \
.apply_threading(urls,
self.is_working_url,
self.threads_count)
for i in range(len(sync_res)):
valid, status_code = sync_res[i]
act_metadata = metadata[i]
dataset_idx = act_metadata["dataset_idx"]
dataset_title = act_metadata["dataset_title"]
landing_page = act_metadata["landing_page"]
if not valid:
yield ce.BrokenLandingPageError(dataset_idx, dataset_title,
landing_page, status_code)