/
POIScanner.py
143 lines (114 loc) · 4.64 KB
/
POIScanner.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
# coding: utf-8
import re
from .keywords import DEFAULT_KEYWORDS, DEFAULT_CATEGORIES
# NOTE: This is a work in progress. Wishlist:
#
# - Allow overwriting of keywords (not just appending)
# - Aggregate results & deduplicate findings
# - Generate minimal repros for findings
# - Search descriptions for keywords (only field names are searched right now)
# (this is tricky because descriptions can be in multiple languages, and
# some keywords might generate too many false positives)
# - Include field/type descriptions in results
class POIScanner(object):
"""Scan a GQLSchema for points of interest (POIs).
This is a scanner that looks for keywords and patterns that might be of
interest to a security researcher:
* Sensitive information
* Potential PII disclosure
* Authentication endpoints
* Custom scalar types
* File upload
* Database operations
* Debug endpoints
* Deprecated fields
"""
def __init__(self, schema, categories=None, keywords=None):
self.schema = schema
self.examined_types = set()
self.categories = categories or DEFAULT_CATEGORIES
# Pre-compile regexes
self.compiled_keywords = {}
for keyword in DEFAULT_KEYWORDS:
cat = keyword["id"]
if cat not in self.categories:
continue
self.compiled_keywords[cat] = {
"name": keyword["name"],
# combine keywords into a single regex
"regex": re.compile("|".join(keyword["keywords"]), re.IGNORECASE),
}
# Add custom keywords
# ignore keywords if not list or ()
if keywords and isinstance(keywords, (list, tuple)):
keywords = [k for k in keywords if k]
if keywords:
self.compiled_keywords["custom"] = {
"name": "Custom",
"regex": re.compile("|".join(keywords), re.IGNORECASE),
}
def scan(self, depth=4):
"""Scan the schema for points of interest."""
results = []
# Scan both the query and mutation types (if present)
initial_types = [self.schema.query]
if self.schema.mutation:
initial_types.append(self.schema.mutation)
# Scan each field in each type
for gql_type in initial_types:
for field in gql_type.fields:
if field.name == "__typename":
continue
results.extend(self._scan_field(field, depth, path=gql_type.name))
# Categorize results
categorized = {}
for result in results:
category = result["type"]
del result["type"]
if category not in categorized:
categorized[category] = []
categorized[category].append(result)
return categorized
def _scan_field(self, field, depth, path):
"""Scan a GQLField for points of interest."""
results = []
path = "{}.{}".format(path, field.name)
# 1. Process current field
# Check deprecated status
if "deprecated" in self.categories and field.is_deprecated:
results.append({
"type": "Deprecated",
"path": path,
"description": field.description or field.type.description
})
# Check if the field is custom scalar type
if "custom_scalars" in self.categories:
if field.type.kind.kind == "SCALAR" and not field.type.kind.is_builtin_scalar:
results.append({
"type": "Custom Scalar",
"path": path,
"description": field.description or field.type.description
})
# Check for default keywords
for cat in self.compiled_keywords:
keyword = self.compiled_keywords[cat]
if keyword["regex"].search(field.name):
results.append({
"type": keyword["name"],
"path": path,
"description": field.description or field.type.description
})
break
# 2. Recurse into subfields if not at max depth
depth -= 1
if depth < 0:
return results
for f in field.type.fields:
if f.name == "__typename":
continue
# Deduplicate results
if f.type.kind.kind == 'OBJECT' and f.type.name in self.examined_types:
continue
self.examined_types.add(f.type.name)
results.extend(self._scan_field(f, depth, path=path))
return results