Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions service_configuration_lib/spark_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -920,6 +920,7 @@ def _append_event_log_conf(
spark_opts['spark.eventLog.enabled'] = 'false'
return spark_opts

spark_opts['aws_account_id'] = account_id
for conf in self.spark_srv_conf.get('environments', {}).values():
if account_id == conf['account_id']:
spark_opts['spark.eventLog.enabled'] = 'true'
Expand Down Expand Up @@ -1262,6 +1263,27 @@ def get_spark_conf(
if aws_creds:
spark_conf = _append_aws_credentials_conf(spark_conf, *aws_creds, aws_region)

# gather all data needed by our internal Spark configuration service
scs_args = {
'cluster_manager': cluster_manager,
'spark_app_base_name': spark_app_base_name,
'docker_image': docker_img,
'user_spark_opts': user_spark_opts,
'paasta_cluster': paasta_cluster,
'paasta_pool': paasta_pool,
'paasta_service': paasta_service,
'paasta_instance': paasta_instance,
'extra_volumes': extra_volumes,
'force_spark_resource_configs': force_spark_resource_configs,
'k8s_server_address': k8s_server_address,
'jira_ticket': jira_ticket,
'service_account_name': service_account_name,
'ui_port': int(ui_port),
'user': os.environ.get('USER'),
'aws_account_id': spark_conf.pop('aws_account_id', None),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if we extract the account id code into a function and throw an lru_cache or something at it, then we could avoid using spark_conf as a way to pass short-lived data to this function - but i guess this is fine if y'all are fine with it :p

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i thought of that and felt short live is simpler and better than function and lru cache

}
spark_conf['scs_conf'] = json.dumps(scs_args, indent=4)

return spark_conf


Expand Down
6 changes: 4 additions & 2 deletions tests/spark_config_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -1333,7 +1333,8 @@ def test_leaders_get_spark_conf_kubernetes(
list(mock_append_spark_prometheus_conf.return_value.keys()) +
list(mock_append_event_log_conf.return_value.keys()) +
list(mock_append_aws_credentials_conf.return_value.keys()) +
list(mock_append_sql_partitions_conf.return_value.keys()),
list(mock_append_sql_partitions_conf.return_value.keys()) +
['scs_conf'], # services config key
)
assert set(output.keys()) == verified_keys
mock_adjust_spark_requested_resources_kubernetes.mocker.assert_called_once_with(
Expand Down Expand Up @@ -1464,7 +1465,8 @@ def test_local_spark(
list(mock_adjust_spark_requested_resources_kubernetes.return_value.keys()) +
list(mock_get_dra_configs.return_value.keys()) +
list(mock_append_aws_credentials_conf.return_value.keys()) +
list(mock_append_sql_partitions_conf.return_value.keys()),
list(mock_append_sql_partitions_conf.return_value.keys()) +
['scs_conf'], # services config key
)
assert set(output.keys()) == verified_keys
mock_append_event_log_conf.mocker.assert_called_once_with(
Expand Down