-
Notifications
You must be signed in to change notification settings - Fork 2
/
models.py
92 lines (78 loc) · 2.24 KB
/
models.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
# Packages
import flask
import user_agents
def get_search_results(
session,
api_key,
query,
search_engine_id,
site_restricted_search,
start=None,
num=None,
siteSearch=None,
):
"""
Query the Google Custom Search API for search results
https://developers.google.com/custom-search/v1/site_restricted_api
"""
# Block weird characters
illegal_characters = ("【", "】")
if any(char in query for char in illegal_characters):
flask.abort(403, "Search query contains an illegal character")
# Block web crawlers
bot_prefixes = (
"python", # python-requests/, python-urllib3/, Python/ etc.
"Go-http-client",
"kube-probe",
"Prometheus",
"curl",
"urlwatch",
"GuzzleHttp",
"Feedly",
"github-camo",
"Site24x7",
"check_http",
"Tiny Tiny RSS",
"RSS Discovery Engine",
"NetNewsWire",
"ALittle Client",
"gh",
)
bot_contains = (
"HeadlessChrome/",
"Assetnote/",
"PetalBot",
)
agent = user_agents.parse(str(flask.request.user_agent))
if (
agent.is_bot
or agent.ua_string.startswith(bot_prefixes)
or any(substr in agent.ua_string for substr in bot_contains)
):
flask.abort(403, "Web crawlers may not perform searches")
url_endpoint = "https://www.googleapis.com/customsearch/v1"
if site_restricted_search:
url_endpoint = (
"https://www.googleapis.com/customsearch/v1/siterestrict"
)
response = session.get(
url_endpoint,
params={
"key": api_key,
"cx": search_engine_id,
"q": query,
"start": start,
"num": num,
"siteSearch": siteSearch,
},
)
response.raise_for_status()
results = response.json()
if "items" in results:
# Move "items" to "entries" as "items" is a method name for dicts
results["entries"] = results.pop("items")
# Remove newlines from the snippet
for item in results["entries"]:
if "htmlSnippet" in item:
item["htmlSnippet"] = item["htmlSnippet"].replace("<br>\n", "")
return results