Skip to content

Commit

Permalink
GCS scanner fix, custom scopes support and more
Browse files Browse the repository at this point in the history
1. Fixing an issue with GCS file scanner that can result
in infinite loop.
2. Fixing GCP VM metadata scope pulling issue.
3. Adding support for a list of files with access tokens.
4. Adding support for user to set custom scopes for AT and RT.
5. Project results are now saved in individual files.
6. GCS list of files is now saved in a separate file rather than
project result.
  • Loading branch information
mshudrak committed Dec 5, 2022
1 parent 6c6614f commit 6e2ff4b
Show file tree
Hide file tree
Showing 3 changed files with 108 additions and 49 deletions.
29 changes: 16 additions & 13 deletions src/gcp_scanner/crawl.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@
"""

import collections
import json
import logging
import io
import sys
from typing import List, Dict, Any, Tuple

Expand Down Expand Up @@ -325,18 +327,19 @@ def get_firewall_rules(


def get_bucket_names(project_name: str, credentials: Credentials,
enum_files: bool) -> Dict[str, Tuple[Any, List[Any]]]:
dump_fd: io.TextIOWrapper
) -> Dict[str, Tuple[Any, List[Any]]]:
"""Retrieve a list of buckets available in the project.
Args:
project_name: A name of a project to query info about.
credentials: An google.oauth2.credentials.Credentials object.
enum_files: If true, the function will enumerate files stored in buckets.
dump_fd: If set, the function will enumerate files stored in buckets and
save them in a file corresponding to provided file descriptor.
This is a very slow, noisy operation and should be used with caution.
Returns:
A dictionary where key is bucket name and value is a tuple of
a bucket Object and list of file objects associated with bucket.
A dictionary where key is bucket name and value is a bucket Object.
"""

logging.info("Retrieving GCS Buckets")
Expand All @@ -352,28 +355,28 @@ def get_bucket_names(project_name: str, credentials: Credentials,
logging.info("Failed to list buckets in the %s", project_name)
logging.info(sys.exc_info())
break

for bucket in response.get("items", []):
buckets_dict[bucket["name"]] = (bucket, None)
if enum_files is True:
if dump_fd is not None:
ret_fields = "nextPageToken,items(name,size,contentType,timeCreated)"

req = service.objects().list(bucket=bucket["name"], fields=ret_fields)

all_objects = []
while req:
try:
resp = req.execute()
all_objects.extend(resp.get("items", []))
for item in resp.get("items", []):
dump_fd.write(json.dumps(item, indent=2, sort_keys=False))

req = service.objects().list_next(req, resp)
except googleapiclient.errors.HttpError:
logging.info("Failed to read the bucket %s", bucket["name"])
logging.info(sys.exc_info())
continue
req = service.objects().list_next(req, resp)
break

buckets_dict[bucket["name"]] = (bucket, all_objects)

request = service.buckets().list_next(
previous_request=request, previous_response=response)
request = service.buckets().list_next(
previous_request=request, previous_response=response)

return buckets_dict

Expand Down
65 changes: 58 additions & 7 deletions src/gcp_scanner/credsdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,6 @@
from httplib2 import Credentials
import requests

# Permissions to request for Access Token
scopes = "https://www.googleapis.com/auth/cloud-platform"

expires_in = 3600 # Expires in 1 hour

credentials_db_search_places = ["/home/", "/root/"]

Expand Down Expand Up @@ -125,7 +121,7 @@ def get_creds_from_metadata() -> Tuple[Optional[str], Optional[Credentials]]:

print("Successfully retrieved instance metadata")
print("Access token length: %d" % len(token), "Instance email: %s" % email,
"Instance scopes: %s" % scopes)
"Instance scopes: %s" % instance_scopes)
return email, credentials_from_token(token, None, None, None, None,
instance_scopes)

Expand Down Expand Up @@ -313,24 +309,79 @@ def impersonate_sa(iam_client: IAMCredentialsClient,
None, None, None, scopes_sa)


def creds_from_access_token(access_token_file):
"""The function is used to obtain Google Auth Credentials from access token.
Args:
access_token_file: a path to a file with access token and scopes stored in
JSON format. Example:
{
"access_token": "<token>",
"scopes": [
"https://www.googleapis.com/auth/devstorage.read_only",
"https://www.googleapis.com/auth/logging.write",
"https://www.googleapis.com/auth/monitoring.write",
"https://www.googleapis.com/auth/servicecontrol",
"https://www.googleapis.com/auth/service.management.readonly",
"https://www.googleapis.com/auth/trace.append"
]
}
Returns:
google.auth.service_account.Credentials: The constructed credentials.
"""

with open(access_token_file, encoding="utf-8") as f:
creds_dict = json.load(f)

user_scopes = creds_dict.get("scopes", None)
if user_scopes is None:
user_scopes = ["https://www.googleapis.com/auth/cloud-platform"]

return credentials_from_token(
creds_dict["access_token"],
None,
None,
None,
None,
user_scopes)


def creds_from_refresh_token(refresh_token_file):
"""The function is used to obtain Google Auth Credentials from refresh token.
Args:
refresh_token_file: a path to a file with refresh_token, client_id,
client_secret, and token_uri stored in JSON format.
Example:
{
"refresh_token": "<token>",
"client_id": "id",
"client_secret": "secret",
scopes: [
https://www.googleapis.com/auth/devstorage.read_only,
https://www.googleapis.com/auth/logging.write,
https://www.googleapis.com/auth/monitoring.write,
https://www.googleapis.com/auth/servicecontrol,
https://www.googleapis.com/auth/service.management.readonly,
https://www.googleapis.com/auth/trace.append
]
}
Returns:
google.auth.service_account.Credentials: The constructed credentials.
"""

with open(refresh_token_file, encoding="utf-8") as f:
creds_dict = json.load(f)

user_scopes = creds_dict.get("scopes", None)
if user_scopes is None:
user_scopes = ["https://www.googleapis.com/auth/cloud-platform"]

return credentials.Credentials(
None,
refresh_token=creds_dict["refresh_token"],
token_uri=creds_dict["token_uri"],
client_id=creds_dict["client_id"],
client_secret=creds_dict["client_secret"],
scopes=["https://www.googleapis.com/auth/cloud-platform"])
scopes=user_scopes)
63 changes: 34 additions & 29 deletions src/gcp_scanner/scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,9 @@ def crawl_loop(initial_sa_tuples: List[Tuple[str, Credentials, List[str]]],
if res:
project_list.append(res)
else:
# force object creation
project_list.append({'projectId': force_project_id, 'projectNumber': 'N/A'})
# force object creation anyway
project_list.append({'projectId': force_project_id,
'projectNumber': "N/A"})

# Enumerate projects accessible by SA
for project in project_list:
Expand Down Expand Up @@ -147,16 +148,17 @@ def crawl_loop(initial_sa_tuples: List[Tuple[str, Credentials, List[str]]],

# Get storage buckets
if is_set(scan_config, 'storage_buckets'):
fetch_bucket_names = False
dump_file_names = None
if scan_config is not None:
obj = scan_config.get('storage_buckets', None)
if obj is not None:
fetch_bucket_names = obj.get('fetch_file_names', False)
if obj is not None and obj.get('fetch_file_names', False) is True:
dump_file_names = open(out_dir + '/%s.gcs' % project_id, 'w',
encoding='utf-8')
project_result['storage_buckets'] = crawl.get_bucket_names(project_id,
credentials, fetch_bucket_names)
credentials, dump_file_names)

# Get DNS managed zones
if is_set(scan_config, 'storage_buckets'):
if is_set(scan_config, 'managed_zones'):
project_result['managed_zones'] = crawl.get_managed_zones(project_id,
credentials)

Expand Down Expand Up @@ -230,7 +232,7 @@ def crawl_loop(initial_sa_tuples: List[Tuple[str, Credentials, List[str]]],
iam_policy)

for candidate_service_account in project_service_accounts:
print('Trying %s' % candidate_service_account)
logging.info('Trying %s' % candidate_service_account)
if not candidate_service_account.startswith('serviceAccount'):
continue
try:
Expand All @@ -240,20 +242,24 @@ def crawl_loop(initial_sa_tuples: List[Tuple[str, Credentials, List[str]]],
(candidate_service_account, creds_impersonated, updated_chain))
project_result['service_account_edges'].append(
candidate_service_account)
print('Successfully impersonated %s using %s ' %
logging.info('Successfully impersonated %s using %s ' %
(candidate_service_account, sa_name))
except Exception:
logging.info('Failed to get token for %s',
candidate_service_account)
logging.info(sys.exc_info()[1])

# Write out results to json DB
logging.info('Saving results into the file')
# Write out results to json DB
logging.info('Saving results for {project_id} into the file')

sa_results_data = json.dumps(sa_results, indent=2, sort_keys=False)

sa_results_data = json.dumps(sa_results, indent=2, sort_keys=False)
with open(out_dir + '/%s.json' % sa_name, 'w',
encoding='utf-8') as outfile:
outfile.write(sa_results_data)

with open(out_dir + '/%s.json' % sa_name, 'w', encoding='utf-8') as outfile:
outfile.write(sa_results_data)
# Clean memory to avoid leak for large amount projects.
sa_results.clear()


def iam_client_for_credentials(
Expand Down Expand Up @@ -313,14 +319,15 @@ def main():
parser.add_argument(
'-at',
default=None,
dest='access_token',
help='Use access token directly to scan GCP resources. Limited by TTL')
dest='access_token_files',
help='A list of comma separated files with access token and OAuth scopes.\
TTL limited. A token and scopes should be stored in JSON format.')
parser.add_argument(
'-rt',
default=None,
dest='refresh_token_files',
help='A list of comma separated files with refresh_token, client_id,\
token_uri and client_secret'
token_uri and client_secret stored in JSON format.'
)
parser.add_argument(
'-s', default=None, dest='key_name', help='Name of individual SA to scan')
Expand Down Expand Up @@ -406,18 +413,16 @@ def main():
continue

sa_tuples.append((account_name, credentials, []))
if args.access_token:
credentials = credsdb.credentials_from_token(
args.access_token,
None,
None,
None,
None,
scopes_user='https://www.googleapis.com/auth/cloud-platform')
if credentials is None:
logging.info('Failed to retrieve credentials using token provided')
else:
sa_tuples.append(('access_token_user_provided', credentials, []))

if args.access_token_files:
for access_token_file in args.access_token_files.split(','):
credentials = credsdb.creds_from_access_token(access_token_file)

if credentials is None:
logging.info('Failed to retrieve credentials using token provided')
else:
token_file_name = os.path.basename(refresh_token_file)
sa_tuples.append(('token_file_name', credentials, []))

if args.refresh_token_files:
for refresh_token_file in args.refresh_token_files.split(','):
Expand Down

0 comments on commit 6e2ff4b

Please sign in to comment.