-
Notifications
You must be signed in to change notification settings - Fork 8
/
url_validator.py
37 lines (30 loc) · 1.23 KB
/
url_validator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import re
import requests
from requests import RequestException, Timeout
from pydatajson.constants import EXCEPTION_STATUS_CODES, \
INVALID_STATUS_CODES_REGEX
from pydatajson.validators.simple_validator import SimpleValidator
class UrlValidator(SimpleValidator):
def __init__(self, catalog, verify_ssl, url_check_timeout):
super(UrlValidator, self).__init__(catalog)
self.verify_ssl = verify_ssl
self.url_check_timeout = url_check_timeout
def validate(self):
raise NotImplementedError
def is_working_url(self, url):
try:
response = requests.head(url,
timeout=self.url_check_timeout,
verify=self.verify_ssl)
matches = []
if response.status_code not in EXCEPTION_STATUS_CODES:
matches = \
[re.match(pattern, str(response.status_code)) is not None
for pattern in INVALID_STATUS_CODES_REGEX]
return True not in matches, response.status_code
except Timeout:
return False, 408
except (RequestException, Exception):
return False, None