Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Gh 998] Maintenance window #1236

Merged
merged 40 commits into from
Jun 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
a176949
Maintenance Window Frontend Changes - 1
TejasRGitHub Apr 9, 2024
dc1eb1a
Frontend views + few backend files comments for code
TejasRGitHub Apr 16, 2024
7febedf
Alembic Script to upgrade db
TejasRGitHub Apr 17, 2024
c2eedda
Event-bridge base functions and maintenance modules
TejasRGitHub Apr 18, 2024
482db02
Changes for making graphQL Work
TejasRGitHub Apr 18, 2024
485129e
Mutation graphQl corrections
TejasRGitHub Apr 18, 2024
89e9944
Resolver changs
TejasRGitHub Apr 19, 2024
2c400b7
Struture for event bridge graphql calls
TejasRGitHub Apr 22, 2024
3f5ace1
Adding few thigns
TejasRGitHub Apr 23, 2024
9dd0890
All GraphQL Endpoint working
TejasRGitHub Apr 23, 2024
24a2493
Complete graphQL endpoint + api_handler logic to block calls
TejasRGitHub Apr 23, 2024
be31c18
Correcting enums for maintenance status
TejasRGitHub Apr 24, 2024
174ce68
Finalizing changes for maintenance window
TejasRGitHub Apr 25, 2024
1fe784b
Comments and make lint plus yarn lint fix
TejasRGitHub Apr 25, 2024
627d31c
Making container id for ssm param to sched task id
Apr 25, 2024
95c63f5
Merge branch 'main' into GH-998-Maintenance-Window
Apr 30, 2024
093fab2
Changes from aws deployed data.all after testing
TejasRGitHub Apr 30, 2024
fed1609
Linting fixes
TejasRGitHub Apr 30, 2024
11deb7b
Removing custom maintenance text
TejasRGitHub Apr 30, 2024
f996471
Alembic script upgrade fix
TejasRGitHub Apr 30, 2024
16a7dc1
Fixing integration tests
TejasRGitHub Apr 30, 2024
672ee82
Few final changes
TejasRGitHub Apr 30, 2024
cdf7eee
Linting
TejasRGitHub Apr 30, 2024
5b83e74
Adding changes after code review
TejasRGitHub May 7, 2024
2ae74ce
Code changes after review comments
TejasRGitHub May 8, 2024
3cd7d15
Merge branch 'main' into GH-998-Maintenance-Window
May 8, 2024
f3eab6f
Resolving review comments - reusing code
TejasRGitHub May 8, 2024
02abe68
Resolving issue with alembic
TejasRGitHub May 8, 2024
70dbc24
Merge branch 'main' into GH-998-Maintenance-Window
May 13, 2024
ea896f8
Addressing comments after 2nd review
TejasRGitHub May 13, 2024
f7fb577
Making enums conform to graphQLMapper
TejasRGitHub May 13, 2024
e7eba0c
trying few things
TejasRGitHub May 14, 2024
85803fb
resolving last of code review steps
TejasRGitHub May 14, 2024
713605f
fixing linting
TejasRGitHub May 14, 2024
6b383ce
Minor Changes
TejasRGitHub May 14, 2024
8180382
Minor Changes - 1
TejasRGitHub May 14, 2024
c027fa1
Merge branch 'main' into GH-998-Maintenance-Window
May 23, 2024
c5d6be9
Moving code and addressing code review comments
TejasRGitHub May 23, 2024
e5728a5
Resolving migration tasks
TejasRGitHub May 23, 2024
9088ea5
Removing unneccsary comments
TejasRGitHub May 23, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
110 changes: 19 additions & 91 deletions backend/api_handler.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import json
import logging
import os
import datetime
from argparse import Namespace
from time import perf_counter

Expand All @@ -11,16 +10,19 @@
)

from dataall.base.api import bootstrap as bootstrap_schema, get_executable_schema
from dataall.base.services.service_provider_factory import ServiceProviderFactory
from dataall.base.utils.api_handler_utils import (
extract_groups,
attach_tenant_policy_for_groups,
check_reauth,
validate_and_block_if_maintenance_window,
)
from dataall.core.tasks.service_handlers import Worker
from dataall.base.aws.sqs import SqsQueue
from dataall.base.aws.parameter_store import ParameterStoreManager
from dataall.base.context import set_context, dispose_context, RequestContext
from dataall.core.permissions.services.tenant_policy_service import TenantPolicyService
from dataall.base.db import get_engine
from dataall.core.permissions.services.tenant_permissions import TENANT_ALL
from dataall.base.loader import load_modules, ImportMode


logger = logging.getLogger()
logger.setLevel(os.environ.get('LOG_LEVEL', 'INFO'))
log = logging.getLogger(__name__)
Expand All @@ -32,7 +34,6 @@
load_modules(modes={ImportMode.API})
SCHEMA = bootstrap_schema()
TYPE_DEFS = gql(SCHEMA.gql(with_directives=False))
REAUTH_TTL = int(os.environ.get('REAUTH_TTL', '5'))
ENVNAME = os.getenv('envname', 'local')
ENGINE = get_engine(envname=ENVNAME)
Worker.queue = SqsQueue.send
Expand Down Expand Up @@ -60,27 +61,6 @@ def adapted(obj, info, **kwargs):
print(f'Lambda Context ' f'Initialization took: {end - start:.3f} sec')


def get_cognito_groups(claims):
if not claims:
raise ValueError(
'Received empty claims. ' 'Please verify authorizer configuration',
claims,
)
groups = list()
saml_groups = claims.get('custom:saml.groups', '')
if len(saml_groups):
groups: list = saml_groups.replace('[', '').replace(']', '').replace(', ', ',').split(',')
cognito_groups = claims.get('cognito:groups', '')
if len(cognito_groups):
groups.extend(cognito_groups.split(','))
return groups


def get_custom_groups(user_id):
service_provider = ServiceProviderFactory.get_service_provider_instance()
return service_provider.get_groups_for_user(user_id)


def handler(event, context):
"""Sample pure Lambda function
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the handler function looks very messy, it is trying to do many things:

  • setting the api context
  • adding tenant permissions
  • checking maintenance
  • checking reauth
    Can we divide it into smaller functions?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. Updated in the next commit

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd move those methods in separate files and reuse them across different handlers

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure. I can do that . I see the value of reusing them. Thanks @petrkalos!


Expand Down Expand Up @@ -131,82 +111,30 @@ def handler(event, context):
if 'user_id' in event['requestContext']['authorizer']:
user_id = event['requestContext']['authorizer']['user_id']
log.debug('username is %s', username)
try:
groups = []
if os.environ.get('custom_auth', None):
groups.extend(get_custom_groups(user_id))
else:
groups.extend(get_cognito_groups(claims))
log.debug('groups are %s', ','.join(groups))
with ENGINE.scoped_session() as session:
for group in groups:
policy = TenantPolicyService.find_tenant_policy(session, group, TenantPolicyService.TENANT_NAME)
if not policy:
print(f'No policy found for Team {group}. Attaching TENANT_ALL permissions')
TenantPolicyService.attach_group_tenant_policy(
session=session,
group=group,
permissions=TENANT_ALL,
tenant_name=TenantPolicyService.TENANT_NAME,
)

except Exception as e:
print(f'Error managing groups due to: {e}')
groups = []

set_context(RequestContext(ENGINE, username, groups, user_id))
groups: list = extract_groups(user_id=user_id, claims=claims)
attach_tenant_policy_for_groups(groups=groups)

set_context(RequestContext(ENGINE, username, groups, user_id))
app_context = {
'engine': ENGINE,
'username': username,
'groups': groups,
'schema': SCHEMA,
}

# Determine if there are any Operations that Require ReAuth From SSM Parameter
try:
reauth_apis = ParameterStoreManager.get_parameter_value(
region=os.getenv('AWS_REGION', 'eu-west-1'), parameter_path=f'/dataall/{ENVNAME}/reauth/apis'
).split(',')
except Exception:
log.info('No ReAuth APIs Found in SSM')
reauth_apis = None
query = json.loads(event.get('body'))

maintenance_window_validation_response = validate_and_block_if_maintenance_window(query=query, groups=groups)
if maintenance_window_validation_response is not None:
return maintenance_window_validation_response
reauth_validation_response = check_reauth(query=query, auth_time=claims['auth_time'], username=username)
if reauth_validation_response is not None:
return reauth_validation_response

else:
raise Exception(f'Could not initialize user context from event {event}')

query = json.loads(event.get('body'))

# If The Operation is a ReAuth Operation - Ensure A Non-Expired Session or Return Error
if reauth_apis and query.get('operationName', None) in reauth_apis:
now = datetime.datetime.now(datetime.timezone.utc)
try:
auth_time_datetime = datetime.datetime.fromtimestamp(int(claims['auth_time']), tz=datetime.timezone.utc)
if auth_time_datetime + datetime.timedelta(minutes=REAUTH_TTL) < now:
raise Exception('ReAuth')
except Exception as e:
log.info(f'ReAuth Required for User {username} on Operation {query.get("operationName", "")}, Error: {e}')
response = {
'data': {query.get('operationName', 'operation'): None},
'errors': [
{
'message': f"ReAuth Required To Perform This Action {query.get('operationName', '')}",
'locations': None,
'path': [query.get('operationName', '')],
'extensions': {'code': 'REAUTH'},
}
],
}
return {
'statusCode': 401,
'headers': {
'content-type': 'application/json',
'Access-Control-Allow-Origin': '*',
'Access-Control-Allow-Headers': '*',
'Access-Control-Allow-Methods': '*',
},
'body': json.dumps(response),
}

success, response = graphql_sync(schema=executable_schema, data=query, context_value=app_context)

dispose_context()
Expand Down
12 changes: 12 additions & 0 deletions backend/dataall/base/aws/parameter_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,18 @@ def get_parameter_value(AwsAccountId=None, region=None, parameter_path=None):
raise Exception(e)
return parameter_value

@staticmethod
def get_parameters_by_path(AwsAccountId=None, region=None, parameter_path=None):
TejasRGitHub marked this conversation as resolved.
Show resolved Hide resolved
if not parameter_path:
raise Exception('Parameter name is None')
try:
parameter_values = ParameterStoreManager.client(AwsAccountId, region).get_parameters_by_path(
Path=parameter_path
)['Parameters']
except ClientError as e:
raise Exception(e)
return parameter_values

@staticmethod
def update_parameter(AwsAccountId, region, parameter_name, parameter_value):
if not parameter_name:
Expand Down
181 changes: 181 additions & 0 deletions backend/dataall/base/utils/api_handler_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
import datetime
import json
import os
import logging

from graphql import parse, utilities, OperationType, GraphQLSyntaxError
from dataall.base.aws.parameter_store import ParameterStoreManager
from dataall.base.db import get_engine
from dataall.base.services.service_provider_factory import ServiceProviderFactory
from dataall.core.permissions.services.tenant_permissions import TENANT_ALL
from dataall.core.permissions.services.tenant_policy_service import TenantPolicyService
from dataall.modules.maintenance.api.enums import MaintenanceModes, MaintenanceStatus
from dataall.modules.maintenance.services.maintenance_service import MaintenanceService
from dataall.base.config import config
from dataall.core.permissions.services.tenant_policy_service import TenantPolicyValidationService

logger = logging.getLogger()
logger.setLevel(os.environ.get('LOG_LEVEL', 'INFO'))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is this used for? We are using "log" everywhere else.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you mean why are we using the setLevel command ?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the setLevel method is used mostly at the top of logger initialization. I am adding it here to set the log level to whatever is set for each lambda in their environment variable - which is available via the os.environ.get('LOG_LEVEL', 'INFO').

log = logging.getLogger(__name__)

ENVNAME = os.getenv('envname', 'local')
REAUTH_TTL = int(os.environ.get('REAUTH_TTL', '5'))
# ALLOWED OPERATIONS WHEN A USER IS NOT DATAALL ADMIN AND NO-ACCESS MODE IS SELECTED
MAINTENANCE_ALLOWED_OPERATIONS_WHEN_NO_ACCESS = [
item.casefold() for item in ['getGroupsForUser', 'getMaintenanceWindowStatus']
]
ENGINE = get_engine(envname=ENVNAME)


def get_cognito_groups(claims):
if not claims:
raise ValueError(
'Received empty claims. ' 'Please verify authorizer configuration',
claims,
)
groups = list()
saml_groups = claims.get('custom:saml.groups', '')
translation_table = str.maketrans({'[': None, ']': None, ', ': ','})
if len(saml_groups):
groups = saml_groups.translate(translation_table).split(',')
cognito_groups = claims.get('cognito:groups', '')
if len(cognito_groups):
groups.extend(cognito_groups.split(','))
return groups


def get_custom_groups(user_id):
service_provider = ServiceProviderFactory.get_service_provider_instance()
return service_provider.get_groups_for_user(user_id)


def send_unauthorized_response(operation='', message='', extension=None):
response = {
'data': {operation: None},
'errors': [
{
'message': message,
'locations': None,
'path': [operation],
}
],
}
if extension is not None:
response['errors'][0]['extensions'] = extension
return {
'statusCode': 401,
'headers': {
'content-type': 'application/json',
'Access-Control-Allow-Origin': '*',
'Access-Control-Allow-Headers': '*',
'Access-Control-Allow-Methods': '*',
},
'body': json.dumps(response),
}


def extract_groups(user_id, claims):
groups = []
try:
if os.environ.get('custom_auth', None):
groups.extend(get_custom_groups(user_id))
else:
groups.extend(get_cognito_groups(claims))
log.debug('groups are %s', ','.join(groups))
return groups
except Exception as e:
log.exception(f'Error managing groups due to: {e}')
return groups


def attach_tenant_policy_for_groups(groups=None):
if groups is None:
groups = []
with ENGINE.scoped_session() as session:
for group in groups:
policy = TenantPolicyService.find_tenant_policy(session, group, TenantPolicyService.TENANT_NAME)
if not policy:
log.info(f'No policy found for Team {group}. Attaching TENANT_ALL permissions')
TenantPolicyService.attach_group_tenant_policy(
session=session,
group=group,
permissions=TENANT_ALL,
tenant_name=TenantPolicyService.TENANT_NAME,
)


def check_reauth(query, auth_time, username):
# Determine if there are any Operations that Require ReAuth From SSM Parameter
try:
reauth_apis = ParameterStoreManager.get_parameter_value(
region=os.getenv('AWS_REGION', 'eu-west-1'), parameter_path=f'/dataall/{ENVNAME}/reauth/apis'
).split(',')
except Exception:
log.info('No ReAuth APIs Found in SSM')
reauth_apis = None

# If The Operation is a ReAuth Operation - Ensure A Non-Expired Session or Return Error
if reauth_apis and query.get('operationName', None) in reauth_apis:
now = datetime.datetime.now(datetime.timezone.utc)
try:
auth_time_datetime = datetime.datetime.fromtimestamp(int(auth_time), tz=datetime.timezone.utc)
if auth_time_datetime + datetime.timedelta(minutes=REAUTH_TTL) < now:
raise Exception('ReAuth')
except Exception as e:
log.info(f'ReAuth Required for User {username} on Operation {query.get("operationName", "")}, Error: {e}')
return send_unauthorized_response(
operation=query.get('operationName', 'operation'),
message=f"ReAuth Required To Perform This Action {query.get('operationName', '')}",
extension={'code': 'REAUTH'},
)


def validate_and_block_if_maintenance_window(query, groups, blocked_for_mode_enum=None):
"""
When the maintenance module is set to active, checks
- If the maintenance mode is enabled
- Based on the maintenance mode, actions which can be taken by user can be modified
- READ-ONLY -> Block All Mutation calls and allow query graphql calls
- NO-ACCESS -> Block All graphql query call irrespective of type
- Check if the user belongs to the DAAdministrators group
@param query: graphql query dict containing operation, query, variables
@param groups: user groups
@param blocked_for_mode_enum: sets the mode for blocking only specific modes. When set to None, both graphql types ( Query and Mutation ) will be blocked. When a specific mode is set, blocking will only occure for that mode
@return: error response if maintenance window is blocking gql calls else None
"""
if config.get_property('modules.maintenance.active'):
maintenance_mode = MaintenanceService._get_maintenance_window_mode(engine=ENGINE)
maintenance_status = MaintenanceService.get_maintenance_window_status().status
isAdmin = TenantPolicyValidationService.is_tenant_admin(groups)

if (
(maintenance_mode == MaintenanceModes.NOACCESS.value)
and (maintenance_status is not MaintenanceStatus.INACTIVE.value)
and not isAdmin
and (blocked_for_mode_enum is None or blocked_for_mode_enum == MaintenanceModes.NOACCESS)
):
if query.get('operationName', '').casefold() not in MAINTENANCE_ALLOWED_OPERATIONS_WHEN_NO_ACCESS:
return send_unauthorized_response(
operation=query.get('operationName', 'operation'),
message='Access Restricted: data.all is currently undergoing maintenance, and your actions are temporarily blocked.',
)
elif (
(maintenance_mode == MaintenanceModes.READONLY.value)
and (maintenance_status is not MaintenanceStatus.INACTIVE.value)
and not isAdmin
and (blocked_for_mode_enum is None or blocked_for_mode_enum == MaintenanceModes.READONLY)
):
# If its mutation then block and return
try:
parsed_query_document = parse(query.get('query', ''))
graphQL_operation_type = utilities.get_operation_ast(parsed_query_document)
TejasRGitHub marked this conversation as resolved.
Show resolved Hide resolved
if graphQL_operation_type.operation == OperationType.MUTATION:
return send_unauthorized_response(
operation=query.get('operationName', 'operation'),
message='Access Restricted: data.all is currently undergoing maintenance, and your actions are temporarily blocked.',
)
except GraphQLSyntaxError as e:
log.error(
f'Error occured while parsing query when validating for {maintenance_mode} maintenance mode due to - {e}'
)
raise e
7 changes: 5 additions & 2 deletions backend/dataall/core/stacks/aws/ecs.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,10 +86,13 @@ def run_ecs_task(
raise e

@staticmethod
def is_task_running(cluster_name, started_by):
def is_task_running(cluster_name, started_by=None):
try:
client = boto3.client('ecs')
running_tasks = client.list_tasks(cluster=cluster_name, startedBy=started_by, desiredStatus='RUNNING')
if started_by is None:
running_tasks = client.list_tasks(cluster=cluster_name, desiredStatus='RUNNING')
else:
running_tasks = client.list_tasks(cluster=cluster_name, startedBy=started_by, desiredStatus='RUNNING')
if running_tasks and running_tasks.get('taskArns'):
return True
return False
Expand Down
Loading
Loading